From: Stephen Burrows Date: Tue, 7 Jun 2011 21:05:02 +0000 (-0400) Subject: Adjusted sobol cache usage to make an entry for every string/search combination or... X-Git-Tag: philo-0.9~5^2~6 X-Git-Url: http://git.ithinksw.org/philo.git/commitdiff_plain/1a3157137358d887b9c93d91e87df9ffce6c2f45?ds=inline;hp=-c Adjusted sobol cache usage to make an entry for every string/search combination or not use the cache at all. This is a more correct way of handling things, and anyone worried about too many cache entries from sobol can just increase the size of their cache. Also, sobol now caches search instances rather than just results - searches like GoogleSearch can now persist non-result state acquired with the results. --- 1a3157137358d887b9c93d91e87df9ffce6c2f45 diff --git a/philo/contrib/sobol/__init__.py b/philo/contrib/sobol/__init__.py index cd75f13..74ca4f1 100644 --- a/philo/contrib/sobol/__init__.py +++ b/philo/contrib/sobol/__init__.py @@ -1,5 +1,14 @@ """ -Sobol implements a generic search interface, which can be used to search databases or websites. No assumptions are made about the search method, and the results are cached using django's caching. +Sobol implements a generic search interface, which can be used to search databases or websites. No assumptions are made about the search method. If SOBOL_USE_CACHE is ``True`` (default), the results will be cached using django's cache framework. Be aware that this may use a large number of cache entries, as a unique entry will be made for each search string for each type of search. + +Settings +-------- + +:setting:`SOBOL_USE_CACHE` + Whether sobol will use django's cache framework. Defaults to ``True``; this may cause a lot of entries in the cache. + +:setting:`SOBOL_USE_EVENTLET` + If :mod:`eventlet` is installed and this setting is ``True``, sobol web searches will use :mod:`eventlet.green.urllib2` instead of the built-in :mod:`urllib2` module. Default: ``False``. """ diff --git a/philo/contrib/sobol/models.py b/philo/contrib/sobol/models.py index 7687c09..43b78b4 100644 --- a/philo/contrib/sobol/models.py +++ b/philo/contrib/sobol/models.py @@ -11,7 +11,7 @@ from django.http import HttpResponseRedirect, Http404, HttpResponse from django.utils import simplejson as json from django.utils.datastructures import SortedDict -from philo.contrib.sobol import registry +from philo.contrib.sobol import registry, get_search_instance from philo.contrib.sobol.forms import SearchForm from philo.contrib.sobol.utils import HASH_REDIRECT_GET_KEY, URL_REDIRECT_GET_KEY, SEARCH_ARG_GET_KEY, check_redirect_hash, RegistryIterator from philo.exceptions import ViewCanNotProvideSubpath @@ -206,10 +206,6 @@ class SearchView(MultiView): ) return urlpatterns - def get_search_instance(self, slug, search_string): - """Returns an instance of the :class:`.BaseSearch` subclass corresponding to ``slug`` in the :class:`.SearchRegistry` and instantiated with ``search_string``.""" - return registry[slug](search_string.lower()) - def results_view(self, request, extra_context=None): """ Renders :attr:`results_page` with a context containing an instance of :attr:`search_form`. If the form was submitted and was valid, then one of two things has happened: @@ -245,7 +241,7 @@ class SearchView(MultiView): search_instances = [] for slug in self.searches: - search_instance = self.get_search_instance(slug, search_string) + search_instance = get_search_instance(slug, search_string) search_instances.append(search_instance) if self.enable_ajax_api: @@ -287,7 +283,7 @@ class SearchView(MultiView): if not request.is_ajax() or not self.enable_ajax_api or slug not in self.searches or search_string is None: raise Http404 - search_instance = self.get_search_instance(slug, search_string) + search_instance = get_search_instance(slug, search_string) return HttpResponse(json.dumps({ 'results': [result.get_context() for result in search_instance.results], diff --git a/philo/contrib/sobol/search.py b/philo/contrib/sobol/search.py index 2dbd4a7..5cc8090 100644 --- a/philo/contrib/sobol/search.py +++ b/philo/contrib/sobol/search.py @@ -1,5 +1,6 @@ #encoding: utf-8 import datetime +from hashlib import sha1 from django.conf import settings from django.contrib.sites.models import Site @@ -24,16 +25,12 @@ else: __all__ = ( - 'Result', 'BaseSearch', 'DatabaseSearch', 'URLSearch', 'JSONSearch', 'GoogleSearch', 'SearchRegistry', 'registry' + 'Result', 'BaseSearch', 'DatabaseSearch', 'URLSearch', 'JSONSearch', 'GoogleSearch', 'SearchRegistry', 'registry', 'get_search_instance' ) -SEARCH_CACHE_KEY = 'philo_sobol_search_results' -DEFAULT_RESULT_TEMPLATE_STRING = "{% if url %}{% endif %}{{ title }}{% if url %}{% endif %}" -DEFAULT_RESULT_TEMPLATE = Template(DEFAULT_RESULT_TEMPLATE_STRING) - -# Determines the timeout on the entire result cache. -MAX_CACHE_TIMEOUT = 60*24*7 +SEARCH_CACHE_SEED = 'philo_sobol_search_results' +USE_CACHE = getattr(settings, 'SOBOL_USE_SEARCH', True) class RegistrationError(Exception): @@ -106,6 +103,23 @@ class SearchRegistry(object): registry = SearchRegistry() +def _make_cache_key(search, search_arg): + return sha1(SEARCH_CACHE_SEED + search.slug + search_arg).hexdigest() + + +def get_search_instance(slug, search_arg): + """Returns a search instance for the given slug, either from the cache or newly-instantiated.""" + search = registry[slug] + search_arg = search_arg.lower() + if USE_CACHE: + key = _make_cache_key(search, search_arg) + cached = cache.get(key) + if cached: + return cached + return search(search_arg) + + + class Result(object): """ :class:`Result` is a helper class that, given a search and a result of that search, is able to correctly render itself with a template defined by the search. Every :class:`Result` will pass a ``title``, a ``url`` (if applicable), and the raw ``result`` returned by the search into the template context when rendering. @@ -189,50 +203,34 @@ class BaseSearch(object): result_limit = 10 #: How long the items for the search should be cached (in minutes). Default: 48 hours. _cache_timeout = 60*48 + #: The path to the template which will be used to render the :class:`Result`\ s for this search. + result_template = "sobol/search/basesearch.html" def __init__(self, search_arg): self.search_arg = search_arg - def _get_cached_results(self): - """Return the cached results if the results haven't timed out. Otherwise return None.""" - result_cache = cache.get(SEARCH_CACHE_KEY) - if result_cache and self.__class__ in result_cache and self.search_arg.lower() in result_cache[self.__class__]: - cached = result_cache[self.__class__][self.search_arg.lower()] - if cached['timeout'] >= datetime.datetime.now(): - return cached['results'] - return None - - def _set_cached_results(self, results, timeout): - """Sets the results to the cache for minutes.""" - result_cache = cache.get(SEARCH_CACHE_KEY) or {} - cached = result_cache.setdefault(self.__class__, {}).setdefault(self.search_arg.lower(), {}) - cached.update({ - 'results': results, - 'timeout': datetime.datetime.now() + datetime.timedelta(minutes=timeout) - }) - cache.set(SEARCH_CACHE_KEY, result_cache, MAX_CACHE_TIMEOUT) - @property def results(self): """Retrieves cached results or initiates a new search via :meth:`get_results` and caches the results.""" if not hasattr(self, '_results'): - results = self._get_cached_results() - if results is None: - try: - # Cache one extra result so we can see if there are - # more results to be had. - limit = self.result_limit - if limit is not None: - limit += 1 - results = self.get_results(limit) - except: - if settings.DEBUG: - raise - # On exceptions, don't set any cache; just return. - return [] + try: + # Cache one extra result so we can see if there are + # more results to be had. + limit = self.result_limit + if limit is not None: + limit += 1 + results = self.get_results(limit) + except: + if settings.DEBUG: + raise + # On exceptions, don't set any cache; just return. + return [] - self._set_cached_results(results, self._cache_timeout) self._results = results + + if USE_CACHE: + key = _make_cache_key(self, self.search_arg) + cache.set(key, self, self._cache_timeout) return self._results @@ -268,11 +266,7 @@ class BaseSearch(object): def get_result_template(self, result): """Returns the template to be used for rendering the ``result``.""" - if hasattr(self, 'result_template'): - return loader.get_template(self.result_template) - if not hasattr(self, '_result_template'): - self._result_template = DEFAULT_RESULT_TEMPLATE - return self._result_template + return loader.get_template(self.result_template) def get_result_extra_context(self, result): """Returns any extra context to be used when rendering the ``result``.""" @@ -347,9 +341,9 @@ class JSONSearch(URLSearch): class GoogleSearch(JSONSearch): """An example implementation of a :class:`JSONSearch`.""" search_url = "http://ajax.googleapis.com/ajax/services/search/web" - result_template = 'search/googlesearch.html' _cache_timeout = 60 verbose_name = "Google search (current site)" + result_template = "sobol/search/googlesearch.html" @property def query_format_str(self): diff --git a/philo/contrib/sobol/templates/sobol/search/basesearch.html b/philo/contrib/sobol/templates/sobol/search/basesearch.html new file mode 100644 index 0000000..9469143 --- /dev/null +++ b/philo/contrib/sobol/templates/sobol/search/basesearch.html @@ -0,0 +1 @@ +{% if url %}{% endif %}{{ title }}{% if url %}{% endif %} \ No newline at end of file diff --git a/philo/contrib/sobol/templates/search/googlesearch.html b/philo/contrib/sobol/templates/sobol/search/googlesearch.html similarity index 100% rename from philo/contrib/sobol/templates/search/googlesearch.html rename to philo/contrib/sobol/templates/sobol/search/googlesearch.html