Moved sobol's search registry into core as a generic registry utility. Added support...

[philo.git] / philo / contrib / sobol / search.py
diff --git a/philo/contrib/sobol/search.py b/philo/contrib/sobol/search.py

index 6cd577d..b0dca84 100644 (file)
--- a/philo/contrib/sobol/search.py
+++ b/philo/contrib/sobol/search.py
@@ -12,6 +12,7 @@ from django.utils.text import capfirst
  from django.template import loader, Context, Template
  
  from philo.contrib.sobol.utils import make_tracking_querydict
+from philo.utils.registry import Registry
  
  
  if getattr(settings, 'SOBOL_USE_EVENTLET', False):
@@ -36,102 +37,76 @@ DEFAULT_RESULT_TEMPLATE = Template(DEFAULT_RESULT_TEMPLATE_STRING)
  MAX_CACHE_TIMEOUT = 60*24*7
  
  
-class RegistrationError(Exception):
-       pass
-
-
-class SearchRegistry(object):
-       # Holds a registry of search types by slug.
-       def __init__(self):
-               self._registry = {}
-       
-       def register(self, search, slug=None):
-               slug = slug or search.slug
-               if slug in self._registry:
-                       registered = self._registry[slug]
-                       if registered.__module__ != search.__module__:
-                               raise RegistrationError("A different search is already registered as `%s`" % slug)
-               else:
-                       self._registry[slug] = search
-       
-       def unregister(self, search, slug=None):
-               if slug is not None:
-                       if slug in self._registry and self._registry[slug] == search:
-                               del self._registry[slug]
-                       raise RegistrationError("`%s` is not registered as `%s`" % (search, slug))
-               else:
-                       for slug, search in self._registry.items():
-                               if search == search:
-                                       del self._registry[slug]
-       
-       def items(self):
-               return self._registry.items()
-       
-       def iteritems(self):
-               return self._registry.iteritems()
-       
-       def iterchoices(self):
-               for slug, search in self.iteritems():
-                       yield slug, search.verbose_name
-       
-       def __getitem__(self, key):
-               return self._registry[key]
-       
-       def __iter__(self):
-               return self._registry.__iter__()
-
-
-registry = SearchRegistry()
+#: A registry for :class:`BaseSearch` subclasses that should be available in the admin.
+registry = Registry()
  
  
  class Result(object):
         """
-       A result is instantiated with a configuration dictionary, a search,
-       and a template name. The configuration dictionary is expected to
-       define a `title` and optionally a `url`. Any other variables may be
-       defined; they will be made available through the result object in
-       the template, if one is defined.
+       :class:`Result` is a helper class that, given a search and a result of that search, is able to correctly render itself with a template defined by the search. Every :class:`Result` will pass a ``title``, a ``url`` (if applicable), and the raw ``result`` returned by the search into the template context when rendering.
+       
+       :param search: An instance of a :class:`BaseSearch` subclass or an object that implements the same API.
+       :param result: An arbitrary result from the ``search``.
+       
         """
         def __init__(self, search, result):
                 self.search = search
                 self.result = result
         
         def get_title(self):
+               """Returns the title of the result by calling :meth:`BaseSearch.get_result_title` on the raw result."""
                 return self.search.get_result_title(self.result)
         
         def get_url(self):
+               """Returns the url of the result or an empty string by calling :meth:`BaseSearch.get_result_querydict` on the raw result and then encoding the querydict returned."""
                 qd = self.search.get_result_querydict(self.result)
                 if qd is None:
                         return ""
                 return "?%s" % qd.urlencode()
         
         def get_template(self):
+               """Returns the template for the result by calling :meth:`BaseSearch.get_result_template` on the raw result."""
                 return self.search.get_result_template(self.result)
         
         def get_extra_context(self):
+               """Returns any extra context for the result by calling :meth:`BaseSearch.get_result_extra_context` on the raw result."""
                 return self.search.get_result_extra_context(self.result)
         
         def get_context(self):
+               """
+               Returns the context dictionary for the result. This is used both in rendering the result and in the AJAX return value for :meth:`.SearchView.ajax_api_view`. The context will contain everything from :meth:`get_extra_context` as well as the following keys:
+               
+               title
+                       The result of calling :meth:`get_title`
+               url
+                       The result of calling :meth:`get_url`
+               result
+                       The raw result which the :class:`Result` was instantiated with.
+               
+               """
                 context = self.get_extra_context()
                 context.update({
                         'title': self.get_title(),
-                       'url': self.get_url()
+                       'url': self.get_url(),
+                       'result': self.result
                 })
                 return context
         
         def render(self):
+               """Returns the template from :meth:`get_template` rendered with the context from :meth:`get_context`."""
                 t = self.get_template()
                 c = Context(self.get_context())
                 return t.render(c)
         
         def __unicode__(self):
+               """Returns :meth:`render`"""
                 return self.render()
  
  
  class BaseSearchMetaclass(type):
         def __new__(cls, name, bases, attrs):
                 if 'verbose_name' not in attrs:
-                       attrs['verbose_name'] = capfirst(convert_camelcase(name))
+                       attrs['verbose_name'] = capfirst(' '.join(convert_camelcase(name).rsplit(' ', 1)[:-1]))
                 if 'slug' not in attrs:
                         attrs['slug'] = name.lower()
                 return super(BaseSearchMetaclass, cls).__new__(cls, name, bases, attrs)
@@ -139,12 +114,15 @@ class BaseSearchMetaclass(type):
  
  class BaseSearch(object):
         """
-       Defines a generic search interface. Accessing self.results will
-       attempt to retrieve cached results and, if that fails, will
-       initiate a new search and store the results in the cache.
+       Defines a generic search api. Accessing :attr:`results` will attempt to retrieve cached results and, if that fails, will initiate a new search and store the results in the cache. Each search has a ``verbose_name`` and a ``slug``. If these are not provided as attributes, they will be automatically generated based on the name of the class.
+       
+       :param search_arg: The string which is being searched for.
+       
         """
         __metaclass__ = BaseSearchMetaclass
+       #: The number of results to return from the complete list. Default: 10
         result_limit = 10
+       #: How long the items for the search should be cached (in minutes). Default: 48 hours.
         _cache_timeout = 60*48
         
         def __init__(self, search_arg):
@@ -171,6 +149,7 @@ class BaseSearch(object):
         
         @property
         def results(self):
+               """Retrieves cached results or initiates a new search via :meth:`get_results` and caches the results."""
                 if not hasattr(self, '_results'):
                         results = self._get_cached_results()
                         if results is None:
@@ -194,37 +173,36 @@ class BaseSearch(object):
         
         def get_results(self, limit=None, result_class=Result):
                 """
-               Calls self.search() and parses the return value into Result objects.
+               Calls :meth:`search` and parses the return value into :class:`Result` instances.
+               
+               :param limit: Passed directly to :meth:`search`.
+               :param result_class: The class used to represent the results. This will be instantiated with the :class:`BaseSearch` instance and the raw result from the search.
+               
                 """
                 results = self.search(limit)
                 return [result_class(self, result) for result in results]
         
         def search(self, limit=None):
-               """
-               Returns an iterable of up to <limit> results. The
-               get_result_title, get_result_url, get_result_template, and
-               get_result_extra_context methods will be used to interpret the
-               individual items that this function returns, so the result can
-               be an object with attributes as easily as a dictionary
-               with keys. The only restriction is that the objects be
-               pickleable so that they can be used with django's cache system.
-               """
+               """Returns an iterable of up to ``limit`` results. The :meth:`get_result_title`, :meth:`get_result_url`, :meth:`get_result_template`, and :meth:`get_result_extra_context` methods will be used to interpret the individual items that this function returns, so the result can be an object with attributes as easily as a dictionary with keys. However, keep in mind that the raw results will be stored with django's caching mechanisms and will be converted to JSON."""
                 raise NotImplementedError
         
         def get_result_title(self, result):
+               """Returns the title of the ``result``. Must be implemented by subclasses."""
                 raise NotImplementedError
         
         def get_result_url(self, result):
-               "Subclasses override this to provide the actual URL for the result."
+               """Returns the actual URL for the ``result`` or ``None`` if there is no URL. Must be implemented by subclasses."""
                 raise NotImplementedError
         
         def get_result_querydict(self, result):
+               """Returns a querydict for tracking selection of the result, or ``None`` if there is no URL for the result."""
                 url = self.get_result_url(result)
                 if url is None:
                         return None
                 return make_tracking_querydict(self.search_arg, url)
         
         def get_result_template(self, result):
+               """Returns the template to be used for rendering the ``result``."""
                 if hasattr(self, 'result_template'):
                         return loader.get_template(self.result_template)
                 if not hasattr(self, '_result_template'):
@@ -232,29 +210,30 @@ class BaseSearch(object):
                 return self._result_template
         
         def get_result_extra_context(self, result):
+               """Returns any extra context to be used when rendering the ``result``."""
                 return {}
         
         def has_more_results(self):
-               """Useful to determine whether to display a `view more results` link."""
+               """Returns ``True`` if there are more results than :attr:`result_limit` and ``False`` otherwise."""
                 return len(self.results) > self.result_limit
         
         @property
         def more_results_url(self):
-               """
-               Returns the actual url for more results. This will be encoded
-               into a querystring for tracking purposes.
-               """
+               """Returns the actual url for more results. This should be accessed through :attr:`more_results_querydict` in the template so that the click can be tracked."""
                 raise NotImplementedError
         
         @property
         def more_results_querydict(self):
+               """Returns a :class:`QueryDict` for tracking whether people click on a 'more results' link."""
                 return make_tracking_querydict(self.search_arg, self.more_results_url)
         
         def __unicode__(self):
-               return ' '.join(self.__class__.verbose_name.rsplit(' ', 1)[:-1]) + ' results'
+               return self.verbose_name
  
  
  class DatabaseSearch(BaseSearch):
+       """Implements :meth:`~BaseSearch.search` and :meth:`get_queryset` methods to handle database queries."""
+       #: The model which should be searched by the :class:`DatabaseSearch`.
         model = None
         
         def search(self, limit=None):
@@ -266,28 +245,28 @@ class DatabaseSearch(BaseSearch):
                 return self._qs
         
         def get_queryset(self):
+               """Returns a :class:`QuerySet` of all instances of :attr:`model`. This method should be overridden by subclasses to specify how the search should actually be implemented for the model."""
                 return self.model._default_manager.all()
  
  
  class URLSearch(BaseSearch):
-       """
-       Defines a generic interface for searches that require accessing a
-       certain url to get search results.
-       """
+       """Defines a generic interface for searches that require accessing a certain url to get search results."""
+       #: The base URL which will be accessed to get the search results.
         search_url = ''
+       #: The url-encoded query string to be used for fetching search results from :attr:`search_url`. Must have one ``%s`` to contain the search argument.
         query_format_str = "%s"
  
         @property
         def url(self):
-               "The URL where the search gets its results."
+               """The URL where the search gets its results. Composed from :attr:`search_url` and :attr:`query_format_str`."""
                 return self.search_url + self.query_format_str % urlquote_plus(self.search_arg)
  
         @property
         def more_results_url(self):
-               "The URL where the users would go to get more results."
                 return self.url
         
         def parse_response(self, response, limit=None):
+               """Handles the ``response`` from accessing :attr:`url` (with :func:`urllib2.urlopen`) and returns a list of up to ``limit`` results."""
                 raise NotImplementedError
         
         def search(self, limit=None):
@@ -295,17 +274,14 @@ class URLSearch(BaseSearch):
  
  
  class JSONSearch(URLSearch):
-       """
-       Makes a GET request and parses the results as JSON. The default
-       behavior assumes that the return value is a list of results.
-       """
+       """Makes a GET request and parses the results as JSON. The default behavior assumes that the response contains a list of results."""
         def parse_response(self, response, limit=None):
                 return json.loads(response.read())[:limit]
  
  
  class GoogleSearch(JSONSearch):
+       """An example implementation of a :class:`JSONSearch`."""
         search_url = "http://ajax.googleapis.com/ajax/services/search/web"
-       # TODO: Change this template to reflect the app's actual name.
         result_template = 'search/googlesearch.html'
         _cache_timeout = 60
         verbose_name = "Google search (current site)"
@@ -319,6 +295,7 @@ class GoogleSearch(JSONSearch):
         
         @property
         def default_args(self):
+               """Unquoted default arguments for the :class:`GoogleSearch`."""
                 return "site:%s" % Site.objects.get_current().domain
         
         def parse_response(self, response, limit=None):
@@ -355,9 +332,6 @@ class GoogleSearch(JSONSearch):
         
         def get_result_url(self, result):
                 return result['unescapedUrl']
-       
-       def get_result_extra_context(self, result):
-               return result
  
  
  registry.register(GoogleSearch)
@@ -370,13 +344,22 @@ except:
  else:
         __all__ += ('ScrapeSearch', 'XMLSearch',)
         class ScrapeSearch(URLSearch):
-               _strainer_args = []
-               _strainer_kwargs = {}
+               """A base class for scrape-style searching, available if :mod:`BeautifulSoup` is installed."""
+               #: Arguments to be passed into a :class:`SoupStrainer`.
+               strainer_args = []
+               #: Keyword arguments to be passed into a :class:`SoupStrainer`.
+               strainer_kwargs = {}
                 
                 @property
                 def strainer(self):
+                       """
+                       Caches and returns a :class:`SoupStrainer` initialized with :attr:`strainer_args` and :attr:`strainer_kwargs`. This strainer will be used to parse only certain parts of the document.
+                       
+                       .. seealso:: `BeautifulSoup: Improving Performance by Parsing Only Part of the Document <http://www.crummy.com/software/BeautifulSoup/documentation.html#Improving%20Performance%20by%20Parsing%20Only%20Part%20of%20the%20Document>`_
+                       
+                       """
                         if not hasattr(self, '_strainer'):
-                               self._strainer = SoupStrainer(*self._strainer_args, **self._strainer_kwargs)
+                               self._strainer = SoupStrainer(*self.strainer_args, **self.strainer_kwargs)
                         return self._strainer
                 
                 def parse_response(self, response, limit=None):
@@ -386,18 +369,21 @@ else:
                 
                 def parse_results(self, results):
                         """
-                       Provides a hook for parsing the results of straining. This
-                       has no default behavior because the results absolutely
-                       must be parsed to properly extract the information.
-                       For more information, see http://www.crummy.com/software/BeautifulSoup/documentation.html#Improving%20Memory%20Usage%20with%20extract
+                       Provides a hook for parsing the results of straining. This has no default behavior and must be implemented by subclasses because the results absolutely must be parsed to properly extract the information.
+                       
+                       .. seealso:: `BeautifulSoup: Improving Memory Usage with extract <http://www.crummy.com/software/BeautifulSoup/documentation.html#Improving%20Memory%20Usage%20with%20extract>`_
                         """
                         raise NotImplementedError
         
         
         class XMLSearch(ScrapeSearch):
-               _self_closing_tags = []
+               """A base class for searching XML results."""
+               #: Self-closing tag names to be used when interpreting the XML document
+               #:
+               #: .. seealso:: `BeautifulSoup: Parsing XML <http://www.crummy.com/software/BeautifulSoup/documentation.html#Parsing%20XML>`_
+               self_closing_tags = []
                 
                 def parse_response(self, response, limit=None):
                         strainer = self.strainer
-                       soup = BeautifulStoneSoup(response, selfClosingTags=self._self_closing_tags, parseOnlyThese=strainer)
+                       soup = BeautifulStoneSoup(response, selfClosingTags=self.self_closing_tags, parseOnlyThese=strainer)
                         return self.parse_results(soup.findAll(recursive=False, limit=limit))
 \ No newline at end of file