--- /dev/null
+from django.conf.urls.defaults import patterns, url
+from django.contrib import messages
+from django.db import models
+from django.http import HttpResponseRedirect, Http404
+from django.utils import simplejson as json
+from philo.contrib.sobol import registry
+from philo.contrib.sobol.forms import SearchForm
+from philo.contrib.sobol.utils import HASH_REDIRECT_GET_KEY, URL_REDIRECT_GET_KEY, SEARCH_ARG_GET_KEY, check_redirect_hash
+from philo.exceptions import ViewCanNotProvideSubpath
+from philo.models import MultiView, Page, SlugMultipleChoiceField
+from philo.validators import RedirectValidator
+import datetime
+try:
+ import eventlet
+except:
+ eventlet = False
+
+
+class Search(models.Model):
+ string = models.TextField()
+
+ def __unicode__(self):
+ return self.search_string
+
+ class Meta:
+ ordering = ['string']
+ verbose_name_plural = 'searches'
+
+
+class ResultURL(models.Model):
+ search = models.ForeignKey(Search, related_name='result_urls')
+ url = models.TextField(validators=[RedirectValidator()])
+
+ def __unicode__(self):
+ return self.url
+
+ class Meta:
+ ordering = ['url']
+
+
+class Click(models.Model):
+ result = models.ForeignKey(ResultURL, related_name='clicks')
+ datetime = models.DateTimeField()
+
+ def __unicode__(self):
+ return self.datetime.strftime('%B %d, %Y %H:%M:%S')
+
+ class Meta:
+ ordering = ['datetime']
+ get_latest_by = 'datetime'
+
+
+class SearchView(MultiView):
+ results_page = models.ForeignKey(Page, related_name='search_results_related')
+ searches = SlugMultipleChoiceField(choices=registry.iterchoices())
+ allow_partial_loading = models.BooleanField(default=True)
+ placeholder_text = models.CharField(max_length=75, default="Search")
+
+ def get_reverse_params(self, obj):
+ raise ViewCanNotProvideSubpath
+
+ @property
+ def urlpatterns(self):
+ urlpatterns = patterns('',
+ url(r'^$', self.results_view, name='results'),
+ )
+ if self.allow_partial_loading:
+ urlpatterns += patterns('',
+ url(r'^(?P<slug>[\w-]+)/?', self.partial_ajax_results_view, name='partial_ajax_results_view')
+ )
+ return urlpatterns
+
+ def results_view(self, request, extra_context=None):
+ results = None
+
+ context = self.get_context()
+ context.update(extra_context or {})
+
+ if SEARCH_ARG_GET_KEY in request.GET:
+ form = SearchForm(request.GET)
+
+ if form.is_valid():
+ search_string = request.GET[SEARCH_ARG_GET_KEY].lower()
+ url = request.GET.get(URL_REDIRECT_GET_KEY)
+ hash = request.GET.get(HASH_REDIRECT_GET_KEY)
+
+ if url and hash:
+ if check_redirect_hash(hash, search_string, url):
+ # Create the necessary models
+ search = Search.objects.get_or_create(string=search_string)[0]
+ result_url = search.result_urls.get_or_create(url=url)[0]
+ result_url.clicks.create(datetime=datetime.datetime.now())
+ return HttpResponseRedirect(url)
+ else:
+ messages.add_message(request, messages.INFO, "The link you followed had been tampered with. Here are all the results for your search term instead!")
+ # TODO: Should search_string be escaped here?
+ return HttpResponseRedirect("%s?%s=%s" % (request.path, SEARCH_ARG_GET_KEY, search_string))
+ if not self.allow_partial_loading:
+ search_instances = []
+ if eventlet:
+ pool = eventlet.GreenPool()
+ for slug in self.searches:
+ search = registry[slug]
+ search_instance = search(search_string)
+ search_instances.append(search_instance)
+ if eventlet:
+ pool.spawn_n(self.make_result_cache, search_instance)
+ else:
+ self.make_result_cache(search_instance)
+ if eventlet:
+ pool.waitall()
+ context.update({
+ 'searches': search_instances
+ })
+ else:
+ form = SearchForm()
+ context.update({
+ 'form': form
+ })
+ return self.results_page.render_to_response(request, extra_context=context)
+
+ def make_result_cache(self, search_instance):
+ search_instance.results
+
+ def partial_ajax_results_view(self, request, slug, extra_context=None):
+ search_string = request.GET.get(SEARCH_ARG_GET_KEY)
+
+ if not request.is_ajax() or not self.allow_partial_loading or slug not in self.searches or search_string is None:
+ raise Http404
+
+ search = registry[slug]
+ search_instance = search(search_string.lower())
+ results = search_instance.results
+ response = json.dumps({
+ 'results': results,
+ 'template': search_instance.get_ajax_result_template()
+ })
+ return response
\ No newline at end of file
--- /dev/null
+#encoding: utf-8
+
+from django.conf import settings
+from django.contrib.sites.models import Site
+from django.core.cache import cache
+from django.db.models.options import get_verbose_name as convert_camelcase
+from django.utils import simplejson as json
+from django.utils.http import urlquote_plus
+from django.utils.safestring import mark_safe
+from django.utils.text import capfirst
+from django.template import loader, Context, Template
+import datetime
+from philo.contrib.sobol.utils import make_tracking_querydict
+
+try:
+ from eventlet.green import urllib2
+except:
+ import urllib2
+
+
+__all__ = (
+ 'Result', 'BaseSearch', 'DatabaseSearch', 'URLSearch', 'JSONSearch', 'GoogleSearch', 'registry'
+)
+
+
+SEARCH_CACHE_KEY = 'philo_sobol_search_results'
+DEFAULT_RESULT_TEMPLATE_STRING = "{% if url %}<a href='{{ url }}'>{% endif %}{{ title }}{% if url %}</a>{% endif %}"
+
+# Determines the timeout on the entire result cache.
+MAX_CACHE_TIMEOUT = 60*60*24*7
+
+
+class RegistrationError(Exception):
+ pass
+
+
+class SearchRegistry(object):
+ # Holds a registry of search types by slug.
+ def __init__(self):
+ self._registry = {}
+
+ def register(self, search, slug=None):
+ slug = slug or search.slug
+ if slug in self._registry:
+ if self._registry[slug] != search:
+ raise RegistrationError("A different search is already registered as `%s`")
+ else:
+ self._registry[slug] = search
+
+ def unregister(self, search, slug=None):
+ if slug is not None:
+ if slug in self._registry and self._registry[slug] == search:
+ del self._registry[slug]
+ raise RegistrationError("`%s` is not registered as `%s`" % (search, slug))
+ else:
+ for slug, search in self._registry.items():
+ if search == search:
+ del self._registry[slug]
+
+ def items(self):
+ return self._registry.items()
+
+ def iteritems(self):
+ return self._registry.iteritems()
+
+ def iterchoices(self):
+ for slug, search in self.iteritems():
+ yield slug, search.verbose_name
+
+ def __getitem__(self, key):
+ return self._registry[key]
+
+
+registry = SearchRegistry()
+
+
+class Result(object):
+ """
+ A result is instantiated with a configuration dictionary, a search,
+ and a template name. The configuration dictionary is expected to
+ define a `title` and optionally a `url`. Any other variables may be
+ defined; they will be made available through the result object in
+ the template, if one is defined.
+ """
+ def __init__(self, search, result):
+ self.search = search
+ self.result = result
+
+ def get_title(self):
+ return self.search.get_result_title(self.result)
+
+ def get_url(self):
+ return self.search.get_result_querydict(self.result).urlencode()
+
+ def get_template(self):
+ return self.search.get_result_template(self.result)
+
+ def get_extra_context(self):
+ return self.search.get_result_extra_context(self.result)
+
+ def render(self):
+ t = self.get_template()
+ c = Context(self.get_extra_context())
+ c.update({
+ 'title': self.get_title(),
+ 'url': self.get_url()
+ })
+ return t.render(c)
+
+ def __unicode__(self):
+ return self.render()
+
+
+class BaseSearchMetaclass(type):
+ def __new__(cls, name, bases, attrs):
+ if 'verbose_name' not in attrs:
+ attrs['verbose_name'] = capfirst(convert_camelcase(name))
+ if 'slug' not in attrs:
+ attrs['slug'] = name.lower()
+ return super(BaseSearchMetaclass, cls).__new__(cls, name, bases, attrs)
+
+
+class BaseSearch(object):
+ """
+ Defines a generic search interface. Accessing self.results will
+ attempt to retrieve cached results and, if that fails, will
+ initiate a new search and store the results in the cache.
+ """
+ __metaclass__ = BaseSearchMetaclass
+ result_limit = 10
+ _cache_timeout = 60*48
+
+ def __init__(self, search_arg):
+ self.search_arg = search_arg
+
+ def _get_cached_results(self):
+ """Return the cached results if the results haven't timed out. Otherwise return None."""
+ result_cache = cache.get(SEARCH_CACHE_KEY)
+ if result_cache and self.__class__ in result_cache and self.search_arg.lower() in result_cache[self.__class__]:
+ cached = result_cache[self.__class__][self.search_arg.lower()]
+ if cached['timeout'] >= datetime.datetime.now():
+ return cached['results']
+ return None
+
+ def _set_cached_results(self, results, timeout):
+ """Sets the results to the cache for <timeout> minutes."""
+ result_cache = cache.get(SEARCH_CACHE_KEY) or {}
+ cached = result_cache.setdefault(self.__class__, {}).setdefault(self.search_arg.lower(), {})
+ cached.update({
+ 'results': results,
+ 'timeout': datetime.datetime.now() + datetime.timedelta(minutes=timeout)
+ })
+ cache.set(SEARCH_CACHE_KEY, result_cache, MAX_CACHE_TIMEOUT)
+
+ @property
+ def results(self):
+ if not hasattr(self, '_results'):
+ results = self._get_cached_results()
+ if results is None:
+ try:
+ # Cache one extra result so we can see if there are
+ # more results to be had.
+ limit = self.result_limit
+ if limit is not None:
+ limit += 1
+ results = self.get_results(self.result_limit)
+ except:
+ if settings.DEBUG:
+ raise
+ # On exceptions, don't set any cache; just return.
+ return []
+
+ self._set_cached_results(results, self._cache_timeout)
+ self._results = results
+
+ return self._results
+
+ def get_results(self, limit=None, result_class=Result):
+ """
+ Calls self.search() and parses the return value into Result objects.
+ """
+ results = self.search(limit)
+ return [result_class(self, result) for result in results]
+
+ def search(self, limit=None):
+ """
+ Returns an iterable of up to <limit> results. The
+ get_result_title, get_result_url, get_result_template, and
+ get_result_extra_context methods will be used to interpret the
+ individual items that this function returns, so the result can
+ be an object with attributes as easily as a dictionary
+ with keys. The only restriction is that the objects be
+ pickleable so that they can be used with django's cache system.
+ """
+ raise NotImplementedError
+
+ def get_result_title(self, result):
+ raise NotImplementedError
+
+ def get_result_url(self, result):
+ "Subclasses override this to provide the actual URL for the result."
+ raise NotImplementedError
+
+ def get_result_querydict(self, result):
+ return make_tracking_querydict(self.search_arg, self.get_result_url(result))
+
+ def get_result_template(self, result):
+ if hasattr(self, 'result_template'):
+ return loader.get_template(self.result_template)
+ if not hasattr(self, '_result_template'):
+ self._result_template = Template(DEFAULT_RESULT_TEMPLATE_STRING)
+ return self._result_template
+
+ def get_ajax_result_template(self, result):
+ return getattr(self, 'ajax_result_template', DEFAULT_RESULT_TEMPLATE_STRING)
+
+ def get_result_extra_context(self, result):
+ return {}
+
+ def has_more_results(self):
+ """Useful to determine whether to display a `view more results` link."""
+ return len(self.results) > self.result_limit
+
+ @property
+ def more_results_url(self):
+ """
+ Returns the actual url for more results. This will be encoded
+ into a querystring for tracking purposes.
+ """
+ raise NotImplementedError
+
+ @property
+ def more_results_querydict(self):
+ return make_tracking_querydict(self.search_arg, self.more_results_url)
+
+ def __unicode__(self):
+ return ' '.join(self.__class__.verbose_name.rsplit(' ', 1)[:-1]) + ' results'
+
+
+class DatabaseSearch(BaseSearch):
+ model = None
+
+ def has_more_results(self):
+ return self.get_queryset().count() > self.result_limit
+
+ def search(self, limit=None):
+ if not hasattr(self, '_qs'):
+ self._qs = self.get_queryset()
+ if limit is not None:
+ self._qs = self._qs[:limit]
+
+ return self._qs
+
+ def get_queryset(self):
+ return self.model._default_manager.all()
+
+
+class URLSearch(BaseSearch):
+ """
+ Defines a generic interface for searches that require accessing a
+ certain url to get search results.
+ """
+ search_url = ''
+ query_format_str = "%s"
+
+ @property
+ def url(self):
+ "The URL where the search gets its results."
+ return self.search_url + self.query_format_str % urlquote_plus(self.search_arg)
+
+ @property
+ def more_results_url(self):
+ "The URL where the users would go to get more results."
+ return self.url
+
+ def parse_response(self, response, limit=None):
+ raise NotImplementedError
+
+ def search(self, limit=None):
+ return self.parse_response(urllib2.urlopen(self.url), limit=limit)
+
+
+class JSONSearch(URLSearch):
+ """
+ Makes a GET request and parses the results as JSON. The default
+ behavior assumes that the return value is a list of results.
+ """
+ def parse_response(self, response, limit=None):
+ return json.loads(response.read())[:limit]
+
+
+class GoogleSearch(JSONSearch):
+ search_url = "http://ajax.googleapis.com/ajax/services/search/web"
+ query_format_str = "?v=1.0&q=%s"
+ # TODO: Change this template to reflect the app's actual name.
+ result_template = 'search/googlesearch.html'
+ timeout = 60
+
+ def parse_response(self, response, limit=None):
+ responseData = json.loads(response.read())['responseData']
+ results, cursor = responseData['results'], responseData['cursor']
+
+ if results:
+ self._more_results_url = cursor['moreResultsUrl']
+ self._estimated_result_count = cursor['estimatedResultCount']
+
+ return results[:limit]
+
+ @property
+ def url(self):
+ # Google requires that an ajax request have a proper Referer header.
+ return urllib2.Request(
+ super(GoogleSearch, self).url,
+ None,
+ {'Referer': "http://%s" % Site.objects.get_current().domain}
+ )
+
+ @property
+ def has_more_results(self):
+ if self.results and len(self.results) < self._estimated_result_count:
+ return True
+ return False
+
+ @property
+ def more_results_url(self):
+ return self._more_results_url
+
+ def get_result_title(self, result):
+ return result['titleNoFormatting']
+
+ def get_result_url(self, result):
+ return result['unescapedUrl']
+
+ def get_result_extra_context(self, result):
+ return result
+
+
+registry.register(GoogleSearch)
+
+
+try:
+ from BeautifulSoup import BeautifulSoup, SoupStrainer, BeautifulStoneSoup
+except:
+ pass
+else:
+ __all__ += ('ScrapeSearch', 'XMLSearch',)
+ class ScrapeSearch(URLSearch):
+ _strainer_args = []
+ _strainer_kwargs = {}
+
+ @property
+ def strainer(self):
+ if not hasattr(self, '_strainer'):
+ self._strainer = SoupStrainer(*self._strainer_args, **self._strainer_kwargs)
+ return self._strainer
+
+ def parse_response(self, response, limit=None):
+ strainer = self.strainer
+ soup = BeautifulSoup(response, parseOnlyThese=strainer)
+ return self.parse_results(soup[:limit])
+
+ def parse_results(self, results):
+ """
+ Provides a hook for parsing the results of straining. This
+ has no default behavior because the results absolutely
+ must be parsed to properly extract the information.
+ For more information, see http://www.crummy.com/software/BeautifulSoup/documentation.html#Improving%20Memory%20Usage%20with%20extract
+ """
+ raise NotImplementedError
+
+
+ class XMLSearch(ScrapeSearch):
+ _self_closing_tags = []
+
+ def parse_response(self, response, limit=None):
+ strainer = self.strainer
+ soup = BeautifulStoneSoup(page, selfClosingTags=self._self_closing_tags, parseOnlyThese=strainer)
+ return self.parse_results(soup[:limit])
\ No newline at end of file
from django import forms
+from django.core.exceptions import ValidationError
+from django.core.validators import validate_slug
from django.db import models
from django.utils import simplejson as json
+from django.utils.text import capfirst
+from django.utils.translation import ugettext_lazy as _
from philo.forms.fields import JSONFormField
from philo.validators import TemplateValidator, json_validator
return super(JSONField, self).formfield(*args, **kwargs)
+class SlugMultipleChoiceField(models.Field):
+ __metaclass__ = models.SubfieldBase
+ description = _("Comma-separated slug field")
+
+ def get_internal_type(self):
+ return "TextField"
+
+ def to_python(self, value):
+ if not value:
+ return []
+
+ if isinstance(value, list):
+ return value
+
+ return value.split(',')
+
+ def get_prep_value(self, value):
+ return ','.join(value)
+
+ def formfield(self, **kwargs):
+ # This is necessary because django hard-codes TypedChoiceField for things with choices.
+ defaults = {
+ 'widget': forms.CheckboxSelectMultiple,
+ 'choices': self.get_choices(include_blank=False),
+ 'label': capfirst(self.verbose_name),
+ 'required': not self.blank,
+ 'help_text': self.help_text
+ }
+ if self.has_default():
+ if callable(self.default):
+ defaults['initial'] = self.default
+ defaults['show_hidden_initial'] = True
+ else:
+ defaults['initial'] = self.get_default()
+
+ for k in kwargs.keys():
+ if k not in ('coerce', 'empty_value', 'choices', 'required',
+ 'widget', 'label', 'initial', 'help_text',
+ 'error_messages', 'show_hidden_initial'):
+ del kwargs[k]
+
+ defaults.update(kwargs)
+ form_class = forms.TypedMultipleChoiceField
+ return form_class(**defaults)
+
+ def validate(self, value, model_instance):
+ invalid_values = []
+ for val in value:
+ try:
+ validate_slug(val)
+ except ValidationError:
+ invalid_values.append(val)
+
+ if invalid_values:
+ # should really make a custom message.
+ raise ValidationError(self.error_messages['invalid_choice'] % invalid_values)
+
+
try:
from south.modelsinspector import add_introspection_rules
except ImportError:
pass
else:
+ add_introspection_rules([], ["^philo\.models\.fields\.SlugListField"])
add_introspection_rules([], ["^philo\.models\.fields\.TemplateField"])
add_introspection_rules([], ["^philo\.models\.fields\.JSONField"])
\ No newline at end of file