#encoding: utf-8
+import datetime
+from hashlib import sha1
from django.conf import settings
from django.contrib.sites.models import Site
from django.utils.safestring import mark_safe
from django.utils.text import capfirst
from django.template import loader, Context, Template
-import datetime
-from philo.contrib.sobol.utils import make_tracking_querydict
-try:
- from eventlet.green import urllib2
-except:
+from philo.contrib.sobol.utils import make_tracking_querydict, RegistryIterator
+
+
+if getattr(settings, 'SOBOL_USE_EVENTLET', False):
+ try:
+ from eventlet.green import urllib2
+ except:
+ import urllib2
+else:
import urllib2
__all__ = (
- 'Result', 'BaseSearch', 'DatabaseSearch', 'URLSearch', 'JSONSearch', 'GoogleSearch', 'registry'
+ 'Result', 'BaseSearch', 'DatabaseSearch', 'URLSearch', 'JSONSearch', 'GoogleSearch', 'SearchRegistry', 'registry', 'get_search_instance'
)
-SEARCH_CACHE_KEY = 'philo_sobol_search_results'
-DEFAULT_RESULT_TEMPLATE_STRING = "{% if url %}<a href='{{ url }}'>{% endif %}{{ title }}{% if url %}</a>{% endif %}"
-DEFAULT_RESULT_TEMPLATE = Template(DEFAULT_RESULT_TEMPLATE_STRING)
-
-# Determines the timeout on the entire result cache.
-MAX_CACHE_TIMEOUT = 60*24*7
+SEARCH_CACHE_SEED = 'philo_sobol_search_results'
+USE_CACHE = getattr(settings, 'SOBOL_USE_SEARCH', True)
class RegistrationError(Exception):
+ """Raised if there is a problem registering a search with a :class:`SearchRegistry`"""
pass
class SearchRegistry(object):
- # Holds a registry of search types by slug.
+ """Holds a registry of search types by slug."""
+
def __init__(self):
self._registry = {}
def register(self, search, slug=None):
+ """
+ Register a search with the registry.
+
+ :param search: The search class to register - generally a subclass of :class:`BaseSearch`
+ :param slug: The slug which will be used to register the search class. If ``slug`` is ``None``, the search's default slug will be used.
+ :raises: :class:`RegistrationError` if a different search is already registered with ``slug``.
+
+ """
slug = slug or search.slug
if slug in self._registry:
registered = self._registry[slug]
self._registry[slug] = search
def unregister(self, search, slug=None):
+ """
+ Unregister a search from the registry.
+
+ :param search: The search class to unregister - generally a subclass of :class:`BaseSearch`
+ :param slug: If provided, the search will only be removed if it was registered with ``slug``. If not provided, the search class will be unregistered no matter what slug it was registered with.
+ :raises: :class:`RegistrationError` if a slug is provided but the search registered with that slug is not ``search``.
+
+ """
if slug is not None:
if slug in self._registry and self._registry[slug] == search:
del self._registry[slug]
del self._registry[slug]
def items(self):
+ """Returns a list of (slug, search) items in the registry."""
return self._registry.items()
def iteritems(self):
- return self._registry.iteritems()
+ """Returns an iterator over the (slug, search) pairs in the registry."""
+ return RegistryIterator(self._registry, 'iteritems')
def iterchoices(self):
- for slug, search in self.iteritems():
- yield slug, search.verbose_name
+ """Returns an iterator over (slug, search.verbose_name) pairs for the registry."""
+ return RegistryIterator(self._registry, 'iteritems', lambda x: (x[0], x[1].verbose_name))
def __getitem__(self, key):
+ """Returns the search registered with ``key``."""
return self._registry[key]
def __iter__(self):
+ """Returns an iterator over the keys in the registry."""
return self._registry.__iter__()
registry = SearchRegistry()
+def _make_cache_key(search, search_arg):
+ return sha1(SEARCH_CACHE_SEED + search.slug + search_arg).hexdigest()
+
+
+def get_search_instance(slug, search_arg):
+ """Returns a search instance for the given slug, either from the cache or newly-instantiated."""
+ search = registry[slug]
+ search_arg = search_arg.lower()
+ if USE_CACHE:
+ key = _make_cache_key(search, search_arg)
+ cached = cache.get(key)
+ if cached:
+ return cached
+ return search(search_arg)
+
+
+
class Result(object):
"""
- A result is instantiated with a configuration dictionary, a search,
- and a template name. The configuration dictionary is expected to
- define a `title` and optionally a `url`. Any other variables may be
- defined; they will be made available through the result object in
- the template, if one is defined.
+ :class:`Result` is a helper class that, given a search and a result of that search, is able to correctly render itself with a template defined by the search. Every :class:`Result` will pass a ``title``, a ``url`` (if applicable), and the raw ``result`` returned by the search into the template context when rendering.
+
+ :param search: An instance of a :class:`BaseSearch` subclass or an object that implements the same API.
+ :param result: An arbitrary result from the ``search``.
+
"""
def __init__(self, search, result):
self.search = search
self.result = result
def get_title(self):
+ """Returns the title of the result by calling :meth:`BaseSearch.get_result_title` on the raw result."""
return self.search.get_result_title(self.result)
def get_url(self):
+ """Returns the url of the result or an empty string by calling :meth:`BaseSearch.get_result_querydict` on the raw result and then encoding the querydict returned."""
qd = self.search.get_result_querydict(self.result)
if qd is None:
return ""
return "?%s" % qd.urlencode()
def get_template(self):
+ """Returns the template for the result by calling :meth:`BaseSearch.get_result_template` on the raw result."""
return self.search.get_result_template(self.result)
def get_extra_context(self):
+ """Returns any extra context for the result by calling :meth:`BaseSearch.get_result_extra_context` on the raw result."""
return self.search.get_result_extra_context(self.result)
def get_context(self):
+ """
+ Returns the context dictionary for the result. This is used both in rendering the result and in the AJAX return value for :meth:`.SearchView.ajax_api_view`. The context will contain everything from :meth:`get_extra_context` as well as the following keys:
+
+ title
+ The result of calling :meth:`get_title`
+ url
+ The result of calling :meth:`get_url`
+ result
+ The raw result which the :class:`Result` was instantiated with.
+
+ """
context = self.get_extra_context()
context.update({
'title': self.get_title(),
- 'url': self.get_url()
+ 'url': self.get_url(),
+ 'result': self.result
})
return context
def render(self):
+ """Returns the template from :meth:`get_template` rendered with the context from :meth:`get_context`."""
t = self.get_template()
c = Context(self.get_context())
return t.render(c)
def __unicode__(self):
+ """Returns :meth:`render`"""
return self.render()
class BaseSearchMetaclass(type):
def __new__(cls, name, bases, attrs):
if 'verbose_name' not in attrs:
- attrs['verbose_name'] = capfirst(convert_camelcase(name))
+ attrs['verbose_name'] = capfirst(' '.join(convert_camelcase(name).rsplit(' ', 1)[:-1]))
if 'slug' not in attrs:
- attrs['slug'] = name.lower()
+ attrs['slug'] = name[:-6].lower() if name.endswith("Search") else name.lower()
return super(BaseSearchMetaclass, cls).__new__(cls, name, bases, attrs)
class BaseSearch(object):
"""
- Defines a generic search interface. Accessing self.results will
- attempt to retrieve cached results and, if that fails, will
- initiate a new search and store the results in the cache.
+ Defines a generic search api. Accessing :attr:`results` will attempt to retrieve cached results and, if that fails, will initiate a new search and store the results in the cache. Each search has a ``verbose_name`` and a ``slug``. If these are not provided as attributes, they will be automatically generated based on the name of the class.
+
+ :param search_arg: The string which is being searched for.
+
"""
__metaclass__ = BaseSearchMetaclass
- result_limit = 10
+ #: The number of results to return from the complete list. Default: 5
+ result_limit = 5
+ #: How long the items for the search should be cached (in minutes). Default: 48 hours.
_cache_timeout = 60*48
+ #: The path to the template which will be used to render the :class:`Result`\ s for this search.
+ result_template = "sobol/search/basesearch.html"
def __init__(self, search_arg):
self.search_arg = search_arg
- def _get_cached_results(self):
- """Return the cached results if the results haven't timed out. Otherwise return None."""
- result_cache = cache.get(SEARCH_CACHE_KEY)
- if result_cache and self.__class__ in result_cache and self.search_arg.lower() in result_cache[self.__class__]:
- cached = result_cache[self.__class__][self.search_arg.lower()]
- if cached['timeout'] >= datetime.datetime.now():
- return cached['results']
- return None
-
- def _set_cached_results(self, results, timeout):
- """Sets the results to the cache for <timeout> minutes."""
- result_cache = cache.get(SEARCH_CACHE_KEY) or {}
- cached = result_cache.setdefault(self.__class__, {}).setdefault(self.search_arg.lower(), {})
- cached.update({
- 'results': results,
- 'timeout': datetime.datetime.now() + datetime.timedelta(minutes=timeout)
- })
- cache.set(SEARCH_CACHE_KEY, result_cache, MAX_CACHE_TIMEOUT)
-
@property
def results(self):
+ """Retrieves cached results or initiates a new search via :meth:`get_results` and caches the results."""
if not hasattr(self, '_results'):
- results = self._get_cached_results()
- if results is None:
- try:
- # Cache one extra result so we can see if there are
- # more results to be had.
- limit = self.result_limit
- if limit is not None:
- limit += 1
- results = self.get_results(limit)
- except:
- if settings.DEBUG:
- raise
- # On exceptions, don't set any cache; just return.
- return []
+ try:
+ # Cache one extra result so we can see if there are
+ # more results to be had.
+ limit = self.result_limit
+ if limit is not None:
+ limit += 1
+ results = self.get_results(limit)
+ except:
+ if settings.DEBUG:
+ raise
+ # On exceptions, don't set any cache; just return.
+ return []
- self._set_cached_results(results, self._cache_timeout)
self._results = results
+
+ if USE_CACHE:
+ key = _make_cache_key(self, self.search_arg)
+ cache.set(key, self, self._cache_timeout)
return self._results
def get_results(self, limit=None, result_class=Result):
"""
- Calls self.search() and parses the return value into Result objects.
+ Calls :meth:`search` and parses the return value into :class:`Result` instances.
+
+ :param limit: Passed directly to :meth:`search`.
+ :param result_class: The class used to represent the results. This will be instantiated with the :class:`BaseSearch` instance and the raw result from the search.
+
"""
results = self.search(limit)
return [result_class(self, result) for result in results]
def search(self, limit=None):
- """
- Returns an iterable of up to <limit> results. The
- get_result_title, get_result_url, get_result_template, and
- get_result_extra_context methods will be used to interpret the
- individual items that this function returns, so the result can
- be an object with attributes as easily as a dictionary
- with keys. The only restriction is that the objects be
- pickleable so that they can be used with django's cache system.
- """
+ """Returns an iterable of up to ``limit`` results. The :meth:`get_result_title`, :meth:`get_result_url`, :meth:`get_result_template`, and :meth:`get_result_extra_context` methods will be used to interpret the individual items that this function returns, so the result can be an object with attributes as easily as a dictionary with keys. However, keep in mind that the raw results will be stored with django's caching mechanisms and will be converted to JSON."""
raise NotImplementedError
def get_result_title(self, result):
+ """Returns the title of the ``result``. Must be implemented by subclasses."""
raise NotImplementedError
def get_result_url(self, result):
- "Subclasses override this to provide the actual URL for the result."
+ """Returns the actual URL for the ``result`` or ``None`` if there is no URL. Must be implemented by subclasses."""
raise NotImplementedError
def get_result_querydict(self, result):
+ """Returns a querydict for tracking selection of the result, or ``None`` if there is no URL for the result."""
url = self.get_result_url(result)
if url is None:
return None
return make_tracking_querydict(self.search_arg, url)
def get_result_template(self, result):
- if hasattr(self, 'result_template'):
- return loader.get_template(self.result_template)
- if not hasattr(self, '_result_template'):
- self._result_template = DEFAULT_RESULT_TEMPLATE
- return self._result_template
+ """Returns the template to be used for rendering the ``result``."""
+ return loader.get_template(self.result_template)
def get_result_extra_context(self, result):
+ """Returns any extra context to be used when rendering the ``result``."""
return {}
+ @property
def has_more_results(self):
- """Useful to determine whether to display a `view more results` link."""
+ """Returns ``True`` if there are more results than :attr:`result_limit` and ``False`` otherwise."""
return len(self.results) > self.result_limit
@property
def more_results_url(self):
- """
- Returns the actual url for more results. This will be encoded
- into a querystring for tracking purposes.
- """
- raise NotImplementedError
+ """Returns the actual url for more results. This should be accessed through :attr:`more_results_querydict` in the template so that the click can be tracked. By default, simply returns ``None``."""
+ return None
@property
def more_results_querydict(self):
- return make_tracking_querydict(self.search_arg, self.more_results_url)
+ """Returns a :class:`QueryDict` for tracking whether people click on a 'more results' link."""
+ url = self.more_results_url
+ if url:
+ return make_tracking_querydict(self.search_arg, url)
+ return None
def __unicode__(self):
- return ' '.join(self.__class__.verbose_name.rsplit(' ', 1)[:-1]) + ' results'
+ return self.verbose_name
class DatabaseSearch(BaseSearch):
+ """Implements :meth:`~BaseSearch.search` and :meth:`get_queryset` methods to handle database queries."""
+ #: The model which should be searched by the :class:`DatabaseSearch`.
model = None
def search(self, limit=None):
return self._qs
def get_queryset(self):
+ """Returns a :class:`QuerySet` of all instances of :attr:`model`. This method should be overridden by subclasses to specify how the search should actually be implemented for the model."""
return self.model._default_manager.all()
class URLSearch(BaseSearch):
- """
- Defines a generic interface for searches that require accessing a
- certain url to get search results.
- """
+ """Defines a generic interface for searches that require accessing a certain url to get search results."""
+ #: The base URL which will be accessed to get the search results.
search_url = ''
+ #: The url-encoded query string to be used for fetching search results from :attr:`search_url`. Must have one ``%s`` to contain the search argument.
query_format_str = "%s"
@property
def url(self):
- "The URL where the search gets its results."
+ """The URL where the search gets its results. Composed from :attr:`search_url` and :attr:`query_format_str`."""
return self.search_url + self.query_format_str % urlquote_plus(self.search_arg)
@property
def more_results_url(self):
- "The URL where the users would go to get more results."
return self.url
def parse_response(self, response, limit=None):
+ """Handles the ``response`` from accessing :attr:`url` (with :func:`urllib2.urlopen`) and returns a list of up to ``limit`` results."""
raise NotImplementedError
def search(self, limit=None):
class JSONSearch(URLSearch):
- """
- Makes a GET request and parses the results as JSON. The default
- behavior assumes that the return value is a list of results.
- """
+ """Makes a GET request and parses the results as JSON. The default behavior assumes that the response contains a list of results."""
def parse_response(self, response, limit=None):
return json.loads(response.read())[:limit]
class GoogleSearch(JSONSearch):
+ """An example implementation of a :class:`JSONSearch`."""
search_url = "http://ajax.googleapis.com/ajax/services/search/web"
- # TODO: Change this template to reflect the app's actual name.
- result_template = 'search/googlesearch.html'
_cache_timeout = 60
verbose_name = "Google search (current site)"
+ result_template = "sobol/search/googlesearch.html"
+ _more_results_url = None
@property
def query_format_str(self):
@property
def default_args(self):
+ """Unquoted default arguments for the :class:`GoogleSearch`."""
return "site:%s" % Site.objects.get_current().domain
def parse_response(self, response, limit=None):
def get_result_url(self, result):
return result['unescapedUrl']
-
- def get_result_extra_context(self, result):
- return result
registry.register(GoogleSearch)
else:
__all__ += ('ScrapeSearch', 'XMLSearch',)
class ScrapeSearch(URLSearch):
- _strainer_args = []
- _strainer_kwargs = {}
+ """A base class for scrape-style searching, available if :mod:`BeautifulSoup` is installed."""
+ #: Arguments to be passed into a :class:`SoupStrainer`.
+ strainer_args = []
+ #: Keyword arguments to be passed into a :class:`SoupStrainer`.
+ strainer_kwargs = {}
@property
def strainer(self):
+ """
+ Caches and returns a :class:`SoupStrainer` initialized with :attr:`strainer_args` and :attr:`strainer_kwargs`. This strainer will be used to parse only certain parts of the document.
+
+ .. seealso:: `BeautifulSoup: Improving Performance by Parsing Only Part of the Document <http://www.crummy.com/software/BeautifulSoup/documentation.html#Improving%20Performance%20by%20Parsing%20Only%20Part%20of%20the%20Document>`_
+
+ """
if not hasattr(self, '_strainer'):
- self._strainer = SoupStrainer(*self._strainer_args, **self._strainer_kwargs)
+ self._strainer = SoupStrainer(*self.strainer_args, **self.strainer_kwargs)
return self._strainer
def parse_response(self, response, limit=None):
def parse_results(self, results):
"""
- Provides a hook for parsing the results of straining. This
- has no default behavior because the results absolutely
- must be parsed to properly extract the information.
- For more information, see http://www.crummy.com/software/BeautifulSoup/documentation.html#Improving%20Memory%20Usage%20with%20extract
+ Provides a hook for parsing the results of straining. This has no default behavior and must be implemented by subclasses because the results absolutely must be parsed to properly extract the information.
+
+ .. seealso:: `BeautifulSoup: Improving Memory Usage with extract <http://www.crummy.com/software/BeautifulSoup/documentation.html#Improving%20Memory%20Usage%20with%20extract>`_
"""
raise NotImplementedError
class XMLSearch(ScrapeSearch):
- _self_closing_tags = []
+ """A base class for searching XML results."""
+ #: Self-closing tag names to be used when interpreting the XML document
+ #:
+ #: .. seealso:: `BeautifulSoup: Parsing XML <http://www.crummy.com/software/BeautifulSoup/documentation.html#Parsing%20XML>`_
+ self_closing_tags = []
def parse_response(self, response, limit=None):
strainer = self.strainer
- soup = BeautifulStoneSoup(response, selfClosingTags=self._self_closing_tags, parseOnlyThese=strainer)
+ soup = BeautifulStoneSoup(response, selfClosingTags=self.self_closing_tags, parseOnlyThese=strainer)
return self.parse_results(soup.findAll(recursive=False, limit=limit))
\ No newline at end of file