Merge branch 'search'
authorStephen Burrows <stephen.r.burrows@gmail.com>
Thu, 3 Mar 2011 22:14:02 +0000 (17:14 -0500)
committerStephen Burrows <stephen.r.burrows@gmail.com>
Thu, 3 Mar 2011 22:14:02 +0000 (17:14 -0500)
contrib/sobol/__init__.py [new file with mode: 0644]
contrib/sobol/admin.py [new file with mode: 0644]
contrib/sobol/forms.py [new file with mode: 0644]
contrib/sobol/models.py [new file with mode: 0644]
contrib/sobol/search.py [new file with mode: 0644]
contrib/sobol/templates/admin/sobol/search/grappelli_results.html [new file with mode: 0644]
contrib/sobol/templates/admin/sobol/search/results.html [new file with mode: 0644]
contrib/sobol/templates/search/googlesearch.html [new file with mode: 0644]
contrib/sobol/utils.py [new file with mode: 0644]
models/fields/__init__.py

diff --git a/contrib/sobol/__init__.py b/contrib/sobol/__init__.py
new file mode 100644 (file)
index 0000000..90eaf18
--- /dev/null
@@ -0,0 +1 @@
+from philo.contrib.sobol.search import *
\ No newline at end of file
diff --git a/contrib/sobol/admin.py b/contrib/sobol/admin.py
new file mode 100644 (file)
index 0000000..504cde2
--- /dev/null
@@ -0,0 +1,104 @@
+from django.conf import settings
+from django.conf.urls.defaults import patterns, url
+from django.contrib import admin
+from django.core.urlresolvers import reverse
+from django.db.models import Count
+from django.http import HttpResponseRedirect, Http404
+from django.shortcuts import render_to_response
+from django.template import RequestContext
+from django.utils.functional import update_wrapper
+from django.utils.translation import ugettext_lazy as _
+from philo.admin import EntityAdmin
+from philo.contrib.sobol.models import Search, ResultURL, SearchView
+
+
+class ResultURLInline(admin.TabularInline):
+       model = ResultURL
+       readonly_fields = ('url',)
+       can_delete = False
+       extra = 0
+       max_num = 0
+
+
+class SearchAdmin(admin.ModelAdmin):
+       readonly_fields = ('string',)
+       inlines = [ResultURLInline]
+       list_display = ['string', 'unique_urls', 'total_clicks']
+       search_fields = ['string', 'result_urls__url']
+       actions = ['results_action']
+       if 'grappelli' in settings.INSTALLED_APPS:
+               results_template = 'admin/sobol/search/grappelli_results.html'
+       else:
+               results_template = 'admin/sobol/search/results.html'
+       
+       def get_urls(self):
+               urlpatterns = super(SearchAdmin, self).get_urls()
+               
+               def wrap(view):
+                       def wrapper(*args, **kwargs):
+                               return self.admin_site.admin_view(view)(*args, **kwargs)
+                       return update_wrapper(wrapper, view)
+               
+               info = self.model._meta.app_label, self.model._meta.module_name
+               
+               urlpatterns = patterns('',
+                       url(r'^results/$', wrap(self.results_view), name="%s_%s_selected_results" % info),
+                       url(r'^(.+)/results/$', wrap(self.results_view), name="%s_%s_results" % info)
+               ) + urlpatterns
+               return urlpatterns
+       
+       def unique_urls(self, obj):
+               return obj.unique_urls
+       unique_urls.admin_order_field = 'unique_urls'
+       
+       def total_clicks(self, obj):
+               return obj.total_clicks
+       total_clicks.admin_order_field = 'total_clicks'
+       
+       def queryset(self, request):
+               qs = super(SearchAdmin, self).queryset(request)
+               return qs.annotate(total_clicks=Count('result_urls__clicks', distinct=True), unique_urls=Count('result_urls', distinct=True))
+       
+       def results_action(self, request, queryset):
+               info = self.model._meta.app_label, self.model._meta.module_name
+               if len(queryset) == 1:
+                       return HttpResponseRedirect(reverse("admin:%s_%s_results" % info, args=(queryset[0].pk,)))
+               else:
+                       url = reverse("admin:%s_%s_selected_results" % info)
+                       return HttpResponseRedirect("%s?ids=%s" % (url, ','.join([str(item.pk) for item in queryset])))
+       results_action.short_description = "View results for selected %(verbose_name_plural)s"
+       
+       def results_view(self, request, object_id=None, extra_context=None):
+               if object_id is not None:
+                       object_ids = [object_id]
+               else:
+                       object_ids = request.GET.get('ids').split(',')
+                       
+                       if object_ids is None:
+                               raise Http404
+               
+               qs = self.queryset(request).filter(pk__in=object_ids)
+               opts = self.model._meta
+               
+               if len(object_ids) == 1:
+                       title = _(u"Search results for %s" % qs[0])
+               else:
+                       title = _(u"Search results for multiple objects")
+               
+               context = {
+                       'title': title,
+                       'queryset': qs,
+                       'opts': opts,
+                       'root_path': self.admin_site.root_path,
+                       'app_label': opts.app_label
+               }
+               return render_to_response(self.results_template, context, context_instance=RequestContext(request))
+
+
+class SearchViewAdmin(EntityAdmin):
+       raw_id_fields = ('results_page',)
+       related_lookup_fields = {'fk': raw_id_fields}
+
+
+admin.site.register(Search, SearchAdmin)
+admin.site.register(SearchView, SearchViewAdmin)
\ No newline at end of file
diff --git a/contrib/sobol/forms.py b/contrib/sobol/forms.py
new file mode 100644 (file)
index 0000000..e79d9e7
--- /dev/null
@@ -0,0 +1,12 @@
+from django import forms
+from philo.contrib.sobol.utils import SEARCH_ARG_GET_KEY
+
+
+class BaseSearchForm(forms.BaseForm):
+       base_fields = {
+               SEARCH_ARG_GET_KEY: forms.CharField()
+       }
+
+
+class SearchForm(forms.Form, BaseSearchForm):
+       pass
\ No newline at end of file
diff --git a/contrib/sobol/models.py b/contrib/sobol/models.py
new file mode 100644 (file)
index 0000000..b653c09
--- /dev/null
@@ -0,0 +1,224 @@
+from django.conf.urls.defaults import patterns, url
+from django.contrib import messages
+from django.core.exceptions import ValidationError
+from django.db import models
+from django.http import HttpResponseRedirect, Http404, HttpResponse
+from django.utils import simplejson as json
+from django.utils.datastructures import SortedDict
+from philo.contrib.sobol import registry
+from philo.contrib.sobol.forms import SearchForm
+from philo.contrib.sobol.utils import HASH_REDIRECT_GET_KEY, URL_REDIRECT_GET_KEY, SEARCH_ARG_GET_KEY, check_redirect_hash
+from philo.exceptions import ViewCanNotProvideSubpath
+from philo.models import MultiView, Page
+from philo.models.fields import SlugMultipleChoiceField
+from philo.validators import RedirectValidator
+import datetime
+try:
+       import eventlet
+except:
+       eventlet = False
+
+
+class Search(models.Model):
+       string = models.TextField()
+       
+       def __unicode__(self):
+               return self.string
+       
+       def get_weighted_results(self, threshhold=None):
+               "Returns this search's results ordered by decreasing weight."
+               if not hasattr(self, '_weighted_results'):
+                       result_qs = self.result_urls.all()
+                       
+                       if threshhold is not None:
+                               result_qs = result_qs.filter(counts__datetime__gte=threshhold)
+                       
+                       results = [result for result in result_qs]
+                       
+                       results.sort(cmp=lambda x,y: cmp(y.weight, x.weight))
+                       
+                       self._weighted_results = results
+               
+               return self._weighted_results
+       
+       def get_favored_results(self, error=5, threshhold=None):
+               """
+               Calculate the set of most-favored results. A higher error
+               will cause this method to be more reticent about adding new
+               items.
+               
+               The thought is to see whether there are any results which
+               vastly outstrip the other options. As such, evenly-weighted
+               results should be grouped together and either added or
+               excluded as a group.
+               """
+               if not hasattr(self, '_favored_results'):
+                       results = self.get_weighted_results(threshhold)
+                       
+                       grouped_results = SortedDict()
+                       
+                       for result in results:
+                               grouped_results.setdefault(result.weight, []).append(result)
+                       
+                       self._favored_results = []
+                       
+                       for value, subresults in grouped_results.items():
+                               cost = error * sum([(value - result.weight)**2 for result in self._favored_results])
+                               if value > cost:
+                                       self._favored_results += subresults
+                               else:
+                                       break
+               return self._favored_results
+       
+       class Meta:
+               ordering = ['string']
+               verbose_name_plural = 'searches'
+
+
+class ResultURL(models.Model):
+       search = models.ForeignKey(Search, related_name='result_urls')
+       url = models.TextField(validators=[RedirectValidator()])
+       
+       def __unicode__(self):
+               return self.url
+       
+       def get_weight(self, threshhold=None):
+               if not hasattr(self, '_weight'):
+                       clicks = self.clicks.all()
+                       
+                       if threshhold is not None:
+                               clicks = clicks.filter(datetime__gte=threshhold)
+                       
+                       self._weight = sum([click.weight for click in clicks])
+               
+               return self._weight
+       weight = property(get_weight)
+       
+       class Meta:
+               ordering = ['url']
+
+
+class Click(models.Model):
+       result = models.ForeignKey(ResultURL, related_name='clicks')
+       datetime = models.DateTimeField()
+       
+       def __unicode__(self):
+               return self.datetime.strftime('%B %d, %Y %H:%M:%S')
+       
+       def get_weight(self, default=1, weighted=lambda value, days: value/days**2):
+               if not hasattr(self, '_weight'):
+                       days = (datetime.datetime.now() - self.datetime).days
+                       if days < 0:
+                               raise ValueError("Click dates must be in the past.")
+                       default = float(default)
+                       if days == 0:
+                               self._weight = float(default)
+                       else:
+                               self._weight = weighted(default, days)
+               return self._weight
+       weight = property(get_weight)
+       
+       def clean(self):
+               if self.datetime > datetime.datetime.now():
+                       raise ValidationError("Click dates must be in the past.")
+       
+       class Meta:
+               ordering = ['datetime']
+               get_latest_by = 'datetime'
+
+
+class SearchView(MultiView):
+       results_page = models.ForeignKey(Page, related_name='search_results_related')
+       searches = SlugMultipleChoiceField(choices=registry.iterchoices())
+       enable_ajax_api = models.BooleanField("Enable AJAX API", default=True)
+       placeholder_text = models.CharField(max_length=75, default="Search")
+       
+       search_form = SearchForm
+       
+       def __unicode__(self):
+               return u"%s (%s)" % (self.placeholder_text, u", ".join([display for slug, display in registry.iterchoices()]))
+       
+       def get_reverse_params(self, obj):
+               raise ViewCanNotProvideSubpath
+       
+       @property
+       def urlpatterns(self):
+               urlpatterns = patterns('',
+                       url(r'^$', self.results_view, name='results'),
+               )
+               if self.enable_ajax_api:
+                       urlpatterns += patterns('',
+                               url(r'^(?P<slug>[\w-]+)$', self.ajax_api_view, name='ajax_api_view')
+                       )
+               return urlpatterns
+       
+       def get_search_instance(self, slug, search_string):
+               return registry[slug](search_string.lower())
+       
+       def results_view(self, request, extra_context=None):
+               results = None
+               
+               context = self.get_context()
+               context.update(extra_context or {})
+               
+               if SEARCH_ARG_GET_KEY in request.GET:
+                       form = self.search_form(request.GET)
+                       
+                       if form.is_valid():
+                               search_string = request.GET[SEARCH_ARG_GET_KEY].lower()
+                               url = request.GET.get(URL_REDIRECT_GET_KEY)
+                               hash = request.GET.get(HASH_REDIRECT_GET_KEY)
+                               
+                               if url and hash:
+                                       if check_redirect_hash(hash, search_string, url):
+                                               # Create the necessary models
+                                               search = Search.objects.get_or_create(string=search_string)[0]
+                                               result_url = search.result_urls.get_or_create(url=url)[0]
+                                               result_url.clicks.create(datetime=datetime.datetime.now())
+                                               return HttpResponseRedirect(url)
+                                       else:
+                                               messages.add_message(request, messages.INFO, "The link you followed had been tampered with. Here are all the results for your search term instead!")
+                                               # TODO: Should search_string be escaped here?
+                                               return HttpResponseRedirect("%s?%s=%s" % (request.path, SEARCH_ARG_GET_KEY, search_string))
+                               if not self.enable_ajax_api:
+                                       search_instances = []
+                                       if eventlet:
+                                               pool = eventlet.GreenPool()
+                                       for slug in self.searches:
+                                               search_instance = self.get_search_instance(slug, search_string)
+                                               search_instances.append(search_instance)
+                                               if eventlet:
+                                                       pool.spawn_n(self.make_result_cache, search_instance)
+                                               else:
+                                                       self.make_result_cache(search_instance)
+                                       if eventlet:
+                                               pool.waitall()
+                                       context.update({
+                                               'searches': search_instances
+                                       })
+                               else:
+                                       context.update({
+                                               'searches': [{'verbose_name': verbose_name, 'url': self.reverse('ajax_api_view', kwargs={'slug': slug}, node=request.node)} for slug, verbose_name in registry.iterchoices()]
+                                       })
+               else:
+                       form = SearchForm()
+               
+               context.update({
+                       'form': form
+               })
+               return self.results_page.render_to_response(request, extra_context=context)
+       
+       def make_result_cache(self, search_instance):
+               search_instance.results
+       
+       def ajax_api_view(self, request, slug, extra_context=None):
+               search_string = request.GET.get(SEARCH_ARG_GET_KEY)
+               
+               if not request.is_ajax() or not self.enable_ajax_api or slug not in self.searches or search_string is None:
+                       raise Http404
+               
+               search_instance = self.get_search_instance(slug, search_string)
+               response = HttpResponse(json.dumps({
+                       'results': [result.get_context() for result in search_instance.results],
+               }))
+               return response
\ No newline at end of file
diff --git a/contrib/sobol/search.py b/contrib/sobol/search.py
new file mode 100644 (file)
index 0000000..36c2b5d
--- /dev/null
@@ -0,0 +1,382 @@
+#encoding: utf-8
+
+from django.conf import settings
+from django.contrib.sites.models import Site
+from django.core.cache import cache
+from django.db.models.options import get_verbose_name as convert_camelcase
+from django.utils import simplejson as json
+from django.utils.http import urlquote_plus
+from django.utils.safestring import mark_safe
+from django.utils.text import capfirst
+from django.template import loader, Context, Template
+import datetime
+from philo.contrib.sobol.utils import make_tracking_querydict
+
+try:
+       from eventlet.green import urllib2
+except:
+       import urllib2
+
+
+__all__ = (
+       'Result', 'BaseSearch', 'DatabaseSearch', 'URLSearch', 'JSONSearch', 'GoogleSearch', 'registry'
+)
+
+
+SEARCH_CACHE_KEY = 'philo_sobol_search_results'
+DEFAULT_RESULT_TEMPLATE_STRING = "{% if url %}<a href='{{ url }}'>{% endif %}{{ title }}{% if url %}</a>{% endif %}"
+
+# Determines the timeout on the entire result cache.
+MAX_CACHE_TIMEOUT = 60*60*24*7
+
+
+class RegistrationError(Exception):
+       pass
+
+
+class SearchRegistry(object):
+       # Holds a registry of search types by slug.
+       def __init__(self):
+               self._registry = {}
+       
+       def register(self, search, slug=None):
+               slug = slug or search.slug
+               if slug in self._registry:
+                       if self._registry[slug] != search:
+                               raise RegistrationError("A different search is already registered as `%s`")
+               else:
+                       self._registry[slug] = search
+       
+       def unregister(self, search, slug=None):
+               if slug is not None:
+                       if slug in self._registry and self._registry[slug] == search:
+                               del self._registry[slug]
+                       raise RegistrationError("`%s` is not registered as `%s`" % (search, slug))
+               else:
+                       for slug, search in self._registry.items():
+                               if search == search:
+                                       del self._registry[slug]
+       
+       def items(self):
+               return self._registry.items()
+       
+       def iteritems(self):
+               return self._registry.iteritems()
+       
+       def iterchoices(self):
+               for slug, search in self.iteritems():
+                       yield slug, search.verbose_name
+       
+       def __getitem__(self, key):
+               return self._registry[key]
+       
+       def __iter__(self):
+               return self._registry.__iter__()
+
+
+registry = SearchRegistry()
+
+
+class Result(object):
+       """
+       A result is instantiated with a configuration dictionary, a search,
+       and a template name. The configuration dictionary is expected to
+       define a `title` and optionally a `url`. Any other variables may be
+       defined; they will be made available through the result object in
+       the template, if one is defined.
+       """
+       def __init__(self, search, result):
+               self.search = search
+               self.result = result
+       
+       def get_title(self):
+               return self.search.get_result_title(self.result)
+       
+       def get_url(self):
+               return "?%s" % self.search.get_result_querydict(self.result).urlencode()
+       
+       def get_template(self):
+               return self.search.get_result_template(self.result)
+       
+       def get_extra_context(self):
+               return self.search.get_result_extra_context(self.result)
+       
+       def get_context(self):
+               context = self.get_extra_context()
+               context.update({
+                       'title': self.get_title(),
+                       'url': self.get_url()
+               })
+               return context
+       
+       def render(self):
+               t = self.get_template()
+               c = Context(self.get_context())
+               return t.render(c)
+       
+       def __unicode__(self):
+               return self.render()
+
+
+class BaseSearchMetaclass(type):
+       def __new__(cls, name, bases, attrs):
+               if 'verbose_name' not in attrs:
+                       attrs['verbose_name'] = capfirst(convert_camelcase(name))
+               if 'slug' not in attrs:
+                       attrs['slug'] = name.lower()
+               return super(BaseSearchMetaclass, cls).__new__(cls, name, bases, attrs)
+
+
+class BaseSearch(object):
+       """
+       Defines a generic search interface. Accessing self.results will
+       attempt to retrieve cached results and, if that fails, will
+       initiate a new search and store the results in the cache.
+       """
+       __metaclass__ = BaseSearchMetaclass
+       result_limit = 10
+       _cache_timeout = 60*48
+       
+       def __init__(self, search_arg):
+               self.search_arg = search_arg
+       
+       def _get_cached_results(self):
+               """Return the cached results if the results haven't timed out. Otherwise return None."""
+               result_cache = cache.get(SEARCH_CACHE_KEY)
+               if result_cache and self.__class__ in result_cache and self.search_arg.lower() in result_cache[self.__class__]:
+                       cached = result_cache[self.__class__][self.search_arg.lower()]
+                       if cached['timeout'] >= datetime.datetime.now():
+                               return cached['results']
+               return None
+       
+       def _set_cached_results(self, results, timeout):
+               """Sets the results to the cache for <timeout> minutes."""
+               result_cache = cache.get(SEARCH_CACHE_KEY) or {}
+               cached = result_cache.setdefault(self.__class__, {}).setdefault(self.search_arg.lower(), {})
+               cached.update({
+                       'results': results,
+                       'timeout': datetime.datetime.now() + datetime.timedelta(minutes=timeout)
+               })
+               cache.set(SEARCH_CACHE_KEY, result_cache, MAX_CACHE_TIMEOUT)
+       
+       @property
+       def results(self):
+               if not hasattr(self, '_results'):
+                       results = self._get_cached_results()
+                       if results is None:
+                               try:
+                                       # Cache one extra result so we can see if there are
+                                       # more results to be had.
+                                       limit = self.result_limit
+                                       if limit is not None:
+                                               limit += 1
+                                       results = self.get_results(self.result_limit)
+                               except:
+                                       if settings.DEBUG:
+                                               raise
+                                       #  On exceptions, don't set any cache; just return.
+                                       return []
+                       
+                               self._set_cached_results(results, self._cache_timeout)
+                       self._results = results
+               
+               return self._results
+       
+       def get_results(self, limit=None, result_class=Result):
+               """
+               Calls self.search() and parses the return value into Result objects.
+               """
+               results = self.search(limit)
+               return [result_class(self, result) for result in results]
+       
+       def search(self, limit=None):
+               """
+               Returns an iterable of up to <limit> results. The
+               get_result_title, get_result_url, get_result_template, and
+               get_result_extra_context methods will be used to interpret the
+               individual items that this function returns, so the result can
+               be an object with attributes as easily as a dictionary
+               with keys. The only restriction is that the objects be
+               pickleable so that they can be used with django's cache system.
+               """
+               raise NotImplementedError
+       
+       def get_result_title(self, result):
+               raise NotImplementedError
+       
+       def get_result_url(self, result):
+               "Subclasses override this to provide the actual URL for the result."
+               raise NotImplementedError
+       
+       def get_result_querydict(self, result):
+               return make_tracking_querydict(self.search_arg, self.get_result_url(result))
+       
+       def get_result_template(self, result):
+               if hasattr(self, 'result_template'):
+                       return loader.get_template(self.result_template)
+               if not hasattr(self, '_result_template'):
+                       self._result_template = Template(DEFAULT_RESULT_TEMPLATE_STRING)
+               return self._result_template
+       
+       def get_result_extra_context(self, result):
+               return {}
+       
+       def has_more_results(self):
+               """Useful to determine whether to display a `view more results` link."""
+               return len(self.results) > self.result_limit
+       
+       @property
+       def more_results_url(self):
+               """
+               Returns the actual url for more results. This will be encoded
+               into a querystring for tracking purposes.
+               """
+               raise NotImplementedError
+       
+       @property
+       def more_results_querydict(self):
+               return make_tracking_querydict(self.search_arg, self.more_results_url)
+       
+       def __unicode__(self):
+               return ' '.join(self.__class__.verbose_name.rsplit(' ', 1)[:-1]) + ' results'
+
+
+class DatabaseSearch(BaseSearch):
+       model = None
+       
+       def has_more_results(self):
+               return self.get_queryset().count() > self.result_limit
+       
+       def search(self, limit=None):
+               if not hasattr(self, '_qs'):
+                       self._qs = self.get_queryset()
+                       if limit is not None:
+                               self._qs = self._qs[:limit]
+               
+               return self._qs
+       
+       def get_queryset(self):
+               return self.model._default_manager.all()
+
+
+class URLSearch(BaseSearch):
+       """
+       Defines a generic interface for searches that require accessing a
+       certain url to get search results.
+       """
+       search_url = ''
+       query_format_str = "%s"
+
+       @property
+       def url(self):
+               "The URL where the search gets its results."
+               return self.search_url + self.query_format_str % urlquote_plus(self.search_arg)
+
+       @property
+       def more_results_url(self):
+               "The URL where the users would go to get more results."
+               return self.url
+       
+       def parse_response(self, response, limit=None):
+               raise NotImplementedError
+       
+       def search(self, limit=None):
+               return self.parse_response(urllib2.urlopen(self.url), limit=limit)
+
+
+class JSONSearch(URLSearch):
+       """
+       Makes a GET request and parses the results as JSON. The default
+       behavior assumes that the return value is a list of results.
+       """
+       def parse_response(self, response, limit=None):
+               return json.loads(response.read())[:limit]
+
+
+class GoogleSearch(JSONSearch):
+       search_url = "http://ajax.googleapis.com/ajax/services/search/web"
+       query_format_str = "?v=1.0&q=%s"
+       # TODO: Change this template to reflect the app's actual name.
+       result_template = 'search/googlesearch.html'
+       timeout = 60
+       
+       def parse_response(self, response, limit=None):
+               responseData = json.loads(response.read())['responseData']
+               results, cursor = responseData['results'], responseData['cursor']
+               
+               if results:
+                       self._more_results_url = cursor['moreResultsUrl']
+                       self._estimated_result_count = cursor['estimatedResultCount']
+               
+               return results[:limit]
+       
+       @property
+       def url(self):
+               # Google requires that an ajax request have a proper Referer header.
+               return urllib2.Request(
+                       super(GoogleSearch, self).url,
+                       None,
+                       {'Referer': "http://%s" % Site.objects.get_current().domain}
+               )
+       
+       @property
+       def has_more_results(self):
+               if self.results and len(self.results) < self._estimated_result_count:
+                       return True
+               return False
+       
+       @property
+       def more_results_url(self):
+               return self._more_results_url
+       
+       def get_result_title(self, result):
+               return result['titleNoFormatting']
+       
+       def get_result_url(self, result):
+               return result['unescapedUrl']
+       
+       def get_result_extra_context(self, result):
+               return result
+
+
+registry.register(GoogleSearch)
+
+
+try:
+       from BeautifulSoup import BeautifulSoup, SoupStrainer, BeautifulStoneSoup
+except:
+       pass
+else:
+       __all__ += ('ScrapeSearch', 'XMLSearch',)
+       class ScrapeSearch(URLSearch):
+               _strainer_args = []
+               _strainer_kwargs = {}
+               
+               @property
+               def strainer(self):
+                       if not hasattr(self, '_strainer'):
+                               self._strainer = SoupStrainer(*self._strainer_args, **self._strainer_kwargs)
+                       return self._strainer
+               
+               def parse_response(self, response, limit=None):
+                       strainer = self.strainer
+                       soup = BeautifulSoup(response, parseOnlyThese=strainer)
+                       return self.parse_results(soup[:limit])
+               
+               def parse_results(self, results):
+                       """
+                       Provides a hook for parsing the results of straining. This
+                       has no default behavior because the results absolutely
+                       must be parsed to properly extract the information.
+                       For more information, see http://www.crummy.com/software/BeautifulSoup/documentation.html#Improving%20Memory%20Usage%20with%20extract
+                       """
+                       raise NotImplementedError
+       
+       
+       class XMLSearch(ScrapeSearch):
+               _self_closing_tags = []
+               
+               def parse_response(self, response, limit=None):
+                       strainer = self.strainer
+                       soup = BeautifulStoneSoup(page, selfClosingTags=self._self_closing_tags, parseOnlyThese=strainer)
+                       return self.parse_results(soup[:limit])
\ No newline at end of file
diff --git a/contrib/sobol/templates/admin/sobol/search/grappelli_results.html b/contrib/sobol/templates/admin/sobol/search/grappelli_results.html
new file mode 100644 (file)
index 0000000..45135ff
--- /dev/null
@@ -0,0 +1,53 @@
+{% extends "admin/base_site.html" %}
+
+<!-- LOADING -->
+{% load i18n %}
+
+<!-- EXTRASTYLES -->
+{% block extrastyle %}<style type="text/css">.favored{font-weight:bold;}</style>{% endblock %}
+
+<!-- BREADCRUMBS -->
+{% block breadcrumbs %}
+       <div id="breadcrumbs">
+               {% if queryset|length > 1 %}
+               <a href="../../">{% trans "Home" %}</a> &rsaquo;
+               <a href="../">{{ app_label|capfirst }}</a> &rsaquo;
+               <a href="./">{{ opts.verbose_name_plural|capfirst }}</a> &rsaquo;
+               {% trans 'Search results for multiple objects' %}
+               {% else %}
+               <a href="../../../../">{% trans "Home" %}</a> &rsaquo;
+               <a href="../../../">{{ app_label|capfirst }}</a> &rsaquo;
+               <a href="../../">{{ opts.verbose_name_plural|capfirst }}</a> &rsaquo;
+               <a href="../">{{ queryset|first|truncatewords:"18" }}</a> &rsaquo;
+               {% trans 'Results' %}
+               {% endif %}
+       </div>
+{% endblock %}
+
+<!-- CONTENT -->
+{% block content %}
+       <div class="container-grid delete-confirmation">
+               {% for search in queryset %}
+               {% if not forloop.first and not forloop.last %}<h1>{{ search.string }}</h1>{% endif %}
+               <div class="group tabular">
+                       <h2>{% blocktrans %}Results{% endblocktrans %}</h2>{% comment %}For the favored results, add a class?{% endcomment %}
+                       <div class="module table">
+                               <div class="module thead">
+                                       <div class="tr">
+                                               <div class="th">Weight</div>
+                                               <div class="th">URL</div>
+                                       </div>
+                               </div>
+                               <div class="module tbody">
+                                       {% for result in search.get_weighted_results %}
+                                       <div class="tr{% if result in search.get_favored_results %} favored{% endif %}">
+                                               <div class="td">{{ result.weight }}</div>
+                                               <div class="td">{{ result.url }}</div>
+                                       </div>
+                                       {% endfor %}
+                               </div>
+                       </div>
+               </div>
+               {% endfor %}
+       </div>
+{% endblock %}
\ No newline at end of file
diff --git a/contrib/sobol/templates/admin/sobol/search/results.html b/contrib/sobol/templates/admin/sobol/search/results.html
new file mode 100644 (file)
index 0000000..44d4e7c
--- /dev/null
@@ -0,0 +1,47 @@
+{% extends "admin/base_site.html" %}
+{% load i18n %}
+
+{% block extrastyle %}<style type="text/css">.favored{font-weight:bold;}</style>{% endblock %}
+
+{% block breadcrumbs %}
+<div class="breadcrumbs">
+       {% if queryset|length > 1 %}
+       <a href="../../">{% trans "Home" %}</a> &rsaquo;
+       <a href="../">{{ app_label|capfirst }}</a> &rsaquo;
+       <a href="./">{{ opts.verbose_name_plural|capfirst }}</a> &rsaquo;
+       {% trans 'Search results for multiple objects' %}
+       {% else %}
+       <a href="../../../../">{% trans "Home" %}</a> &rsaquo;
+       <a href="../../../">{{ app_label|capfirst }}</a> &rsaquo; 
+       <a href="../../">{{ opts.verbose_name_plural|capfirst }}</a> &rsaquo;
+       <a href="../">{{ queryset|first|truncatewords:"18" }}</a> &rsaquo;
+       {% trans 'Results' %}
+       {% endif %}
+</div>
+{% endblock %}
+
+
+{% block content %}
+               {% for search in queryset %}
+               {% if not forloop.first and not forloop.last %}<h1>{{ search.string }}</h1>{% endif %}
+                       <fieldset class="module">
+                               <h2>{% blocktrans %}Results{% endblocktrans %}</h2>{% comment %}For the favored results, add a class?{% endcomment %}
+                               <table>
+                                       <thead>
+                                               <tr>
+                                                       <th>Weight</th>
+                                                       <th>URL</th>
+                                               </tr>
+                                       </thead>
+                                       <tbody>
+                                               {% for result in search.get_weighted_results %}
+                                               <tr{% if result in search.favored_results %} class="favored"{% endif %}>
+                                                       <td>{{ result.weight }}</td>
+                                                       <td>{{ result.url }}</td>
+                                               </tr>
+                                               {% endfor %}
+                                       </tbody>
+                               </table>
+                       </fieldset>
+               {% endfor %}
+{% endblock %}
\ No newline at end of file
diff --git a/contrib/sobol/templates/search/googlesearch.html b/contrib/sobol/templates/search/googlesearch.html
new file mode 100644 (file)
index 0000000..1b22388
--- /dev/null
@@ -0,0 +1,4 @@
+<article>
+       <h1><a href="{{ url }}">{{ title|safe }}</a></h1>
+       <p>{{ content|safe }}</p>
+</article>
\ No newline at end of file
diff --git a/contrib/sobol/utils.py b/contrib/sobol/utils.py
new file mode 100644 (file)
index 0000000..723a463
--- /dev/null
@@ -0,0 +1,32 @@
+from django.conf import settings
+from django.http import QueryDict
+from django.utils.encoding import smart_str
+from django.utils.hashcompat import sha_constructor
+from django.utils.http import urlquote_plus, urlquote
+
+
+SEARCH_ARG_GET_KEY = 'q'
+URL_REDIRECT_GET_KEY = 'url'
+HASH_REDIRECT_GET_KEY = 's'
+
+
+def make_redirect_hash(search_arg, url):
+       return sha_constructor(smart_str(search_arg + url + settings.SECRET_KEY)).hexdigest()[::2]
+
+
+def check_redirect_hash(hash, search_arg, url):
+       return hash == make_redirect_hash(search_arg, url)
+
+
+def make_tracking_querydict(search_arg, url):
+       """
+       Returns a QueryDict instance containing the information necessary
+       for tracking clicks of this url.
+       
+       NOTE: will this kind of initialization handle quoting correctly?
+       """
+       return QueryDict("%s=%s&%s=%s&%s=%s" % (
+               SEARCH_ARG_GET_KEY, urlquote_plus(search_arg),
+               URL_REDIRECT_GET_KEY, urlquote(url),
+               HASH_REDIRECT_GET_KEY, make_redirect_hash(search_arg, url))
+       )
\ No newline at end of file
index d8ed839..25af832 100644 (file)
@@ -1,6 +1,10 @@
 from django import forms
+from django.core.exceptions import ValidationError
+from django.core.validators import validate_slug
 from django.db import models
 from django.utils import simplejson as json
+from django.utils.text import capfirst
+from django.utils.translation import ugettext_lazy as _
 from philo.forms.fields import JSONFormField
 from philo.validators import TemplateValidator, json_validator
 #from philo.models.fields.entities import *
@@ -55,10 +59,69 @@ class JSONField(models.TextField):
                return super(JSONField, self).formfield(*args, **kwargs)
 
 
+class SlugMultipleChoiceField(models.Field):
+       __metaclass__ = models.SubfieldBase
+       description = _("Comma-separated slug field")
+       
+       def get_internal_type(self):
+               return "TextField"
+       
+       def to_python(self, value):
+               if not value:
+                       return []
+               
+               if isinstance(value, list):
+                       return value
+               
+               return value.split(',')
+       
+       def get_prep_value(self, value):
+               return ','.join(value)
+       
+       def formfield(self, **kwargs):
+               # This is necessary because django hard-codes TypedChoiceField for things with choices.
+               defaults = {
+                       'widget': forms.CheckboxSelectMultiple,
+                       'choices': self.get_choices(include_blank=False),
+                       'label': capfirst(self.verbose_name),
+                       'required': not self.blank,
+                       'help_text': self.help_text
+               }
+               if self.has_default():
+                       if callable(self.default):
+                               defaults['initial'] = self.default
+                               defaults['show_hidden_initial'] = True
+                       else:
+                               defaults['initial'] = self.get_default()
+               
+               for k in kwargs.keys():
+                       if k not in ('coerce', 'empty_value', 'choices', 'required',
+                                                'widget', 'label', 'initial', 'help_text',
+                                                'error_messages', 'show_hidden_initial'):
+                               del kwargs[k]
+               
+               defaults.update(kwargs)
+               form_class = forms.TypedMultipleChoiceField
+               return form_class(**defaults)
+       
+       def validate(self, value, model_instance):
+               invalid_values = []
+               for val in value:
+                       try:
+                               validate_slug(val)
+                       except ValidationError:
+                               invalid_values.append(val)
+               
+               if invalid_values:
+                       # should really make a custom message.
+                       raise ValidationError(self.error_messages['invalid_choice'] % invalid_values)
+
+
 try:
        from south.modelsinspector import add_introspection_rules
 except ImportError:
        pass
 else:
+       add_introspection_rules([], ["^philo\.models\.fields\.SlugMultipleChoiceField"])
        add_introspection_rules([], ["^philo\.models\.fields\.TemplateField"])
        add_introspection_rules([], ["^philo\.models\.fields\.JSONField"])
\ No newline at end of file