From 40322363b700deb95d52f5b50190856bfe135f08 Mon Sep 17 00:00:00 2001 From: Stephen Burrows Date: Fri, 20 May 2011 14:47:01 -0400 Subject: [PATCH] Converted, corrected, improved, and added to documentation for sobol's models and search API. --- docs/contrib/sobol.rst | 6 ++ philo/contrib/sobol/models.py | 13 +++- philo/contrib/sobol/search.py | 141 +++++++++++++++++++++++----------- philo/contrib/sobol/utils.py | 9 +-- 4 files changed, 115 insertions(+), 54 deletions(-) diff --git a/docs/contrib/sobol.rst b/docs/contrib/sobol.rst index 5ac62ee..353b547 100644 --- a/docs/contrib/sobol.rst +++ b/docs/contrib/sobol.rst @@ -9,3 +9,9 @@ Models .. automodule:: philo.contrib.sobol.models :members: + +Search API +++++++++++ + +.. automodule:: philo.contrib.sobol.search + :members: diff --git a/philo/contrib/sobol/models.py b/philo/contrib/sobol/models.py index 82fe947..fb31dce 100644 --- a/philo/contrib/sobol/models.py +++ b/philo/contrib/sobol/models.py @@ -172,7 +172,7 @@ else: class SearchView(MultiView): - """Handles a view for the results of a search, and an AJAX API for asynchronous search result loading. This can be particularly useful if some searches are slow.""" + """Handles a view for the results of a search, anonymously tracks the selections made by end users, and provides an AJAX API for asynchronous search result loading. This can be particularly useful if some searches are slow.""" #: :class:`ForeignKey` to a :class:`.Page` which will be used to render the search results. results_page = models.ForeignKey(Page, related_name='search_results_related') #: A class:`.SlugMultipleChoiceField` whose choices are the contents of the :class:`.SearchRegistry` @@ -214,7 +214,7 @@ class SearchView(MultiView): """ Renders :attr:`results_page` with a context containing an instance of :attr:`search_form`. If the form was submitted and was valid, then one of two things has happened: - * A search has been initiated. In this case, a list of search instances will be added to the context as 'searches'. If :attr:`enable_ajax_api` is enabled, each instance will have an "ajax_api_url" attribute containing the url needed to make an AJAX request for the search results. + * A search has been initiated. In this case, a list of search instances will be added to the context as ``searches``. If :attr:`enable_ajax_api` is enabled, each instance will have an ``ajax_api_url`` attribute containing the url needed to make an AJAX request for the search results. * A link has been chosen. In this case, corresponding :class:`Search`, :class:`ResultURL`, and :class:`Click` instances will be created and the user will be redirected to the link's actual url. """ @@ -274,9 +274,12 @@ class SearchView(MultiView): results Contains the results of :meth:`.Result.get_context` for each result. - rendered Contains the results of :meth:`.Result.render` for each result. + hasMoreResults + ``True`` or ``False`` whether the search has more results according to :meth:`BaseSearch.has_more_results` + moreResultsURL + Contains None or a querystring which, once accessed, will note the :class:`Click` and redirect the user to a page containing more results. """ search_string = request.GET.get(SEARCH_ARG_GET_KEY) @@ -288,5 +291,7 @@ class SearchView(MultiView): return HttpResponse(json.dumps({ 'results': [result.get_context() for result in search_instance.results], - 'rendered': [result.render() for result in search_instance.results] + 'rendered': [result.render() for result in search_instance.results], + 'hasMoreResults': search.has_more_results(), + 'moreResultsURL': (u"?%s" % search.more_results_querydict.urlencode()) if search.more_results_querydict else None, }), mimetype="application/json") \ No newline at end of file diff --git a/philo/contrib/sobol/search.py b/philo/contrib/sobol/search.py index 4ab5980..2dbd4a7 100644 --- a/philo/contrib/sobol/search.py +++ b/philo/contrib/sobol/search.py @@ -37,6 +37,7 @@ MAX_CACHE_TIMEOUT = 60*24*7 class RegistrationError(Exception): + """Raised if there is a problem registering a search with a :class:`SearchRegistry`""" pass @@ -47,6 +48,14 @@ class SearchRegistry(object): self._registry = {} def register(self, search, slug=None): + """ + Register a search with the registry. + + :param search: The search class to register - generally a subclass of :class:`BaseSearch` + :param slug: The slug which will be used to register the search class. If ``slug`` is ``None``, the search's default slug will be used. + :raises: :class:`RegistrationError` if a different search is already registered with ``slug``. + + """ slug = slug or search.slug if slug in self._registry: registered = self._registry[slug] @@ -56,6 +65,14 @@ class SearchRegistry(object): self._registry[slug] = search def unregister(self, search, slug=None): + """ + Unregister a search from the registry. + + :param search: The search class to unregister - generally a subclass of :class:`BaseSearch` + :param slug: If provided, the search will only be removed if it was registered with ``slug``. If not provided, the search class will be unregistered no matter what slug it was registered with. + :raises: :class:`RegistrationError` if a slug is provided but the search registered with that slug is not ``search``. + + """ if slug is not None: if slug in self._registry and self._registry[slug] == search: del self._registry[slug] @@ -66,18 +83,23 @@ class SearchRegistry(object): del self._registry[slug] def items(self): + """Returns a list of (slug, search) items in the registry.""" return self._registry.items() def iteritems(self): + """Returns an iterator over the (slug, search) pairs in the registry.""" return RegistryIterator(self._registry, 'iteritems') def iterchoices(self): + """Returns an iterator over (slug, search.verbose_name) pairs for the registry.""" return RegistryIterator(self._registry, 'iteritems', lambda x: (x[0], x[1].verbose_name)) def __getitem__(self, key): + """Returns the search registered with ``key``.""" return self._registry[key] def __iter__(self): + """Returns an iterator over the keys in the registry.""" return self._registry.__iter__() @@ -86,32 +108,47 @@ registry = SearchRegistry() class Result(object): """ - A result is instantiated with a configuration dictionary, a search, - and a template name. The configuration dictionary is expected to - define a `title` and optionally a `url`. Any other variables may be - defined; they will be made available through the result object in - the template, if one is defined. + :class:`Result` is a helper class that, given a search and a result of that search, is able to correctly render itself with a template defined by the search. Every :class:`Result` will pass a ``title``, a ``url`` (if applicable), and the raw ``result`` returned by the search into the template context when rendering. + + :param search: An instance of a :class:`BaseSearch` subclass or an object that implements the same API. + :param result: An arbitrary result from the ``search``. + """ def __init__(self, search, result): self.search = search self.result = result def get_title(self): + """Returns the title of the result by calling :meth:`BaseSearch.get_result_title` on the raw result.""" return self.search.get_result_title(self.result) def get_url(self): + """Returns the url of the result or an empty string by calling :meth:`BaseSearch.get_result_querydict` on the raw result and then encoding the querydict returned.""" qd = self.search.get_result_querydict(self.result) if qd is None: return "" return "?%s" % qd.urlencode() def get_template(self): + """Returns the template for the result by calling :meth:`BaseSearch.get_result_template` on the raw result.""" return self.search.get_result_template(self.result) def get_extra_context(self): + """Returns any extra context for the result by calling :meth:`BaseSearch.get_result_extra_context` on the raw result.""" return self.search.get_result_extra_context(self.result) def get_context(self): + """ + Returns the context dictionary for the result. This is used both in rendering the result and in the AJAX return value for :meth:`.SearchView.ajax_api_view`. The context will contain everything from :meth:`get_extra_context` as well as the following keys: + + title + The result of calling :meth:`get_title` + url + The result of calling :meth:`get_url` + result + The raw result which the :class:`Result` was instantiated with. + + """ context = self.get_extra_context() context.update({ 'title': self.get_title(), @@ -121,11 +158,13 @@ class Result(object): return context def render(self): + """Returns the template from :meth:`get_template` rendered with the context from :meth:`get_context`.""" t = self.get_template() c = Context(self.get_context()) return t.render(c) def __unicode__(self): + """Returns :meth:`render`""" return self.render() @@ -140,12 +179,15 @@ class BaseSearchMetaclass(type): class BaseSearch(object): """ - Defines a generic search interface. Accessing self.results will - attempt to retrieve cached results and, if that fails, will - initiate a new search and store the results in the cache. + Defines a generic search api. Accessing :attr:`results` will attempt to retrieve cached results and, if that fails, will initiate a new search and store the results in the cache. Each search has a ``verbose_name`` and a ``slug``. If these are not provided as attributes, they will be automatically generated based on the name of the class. + + :param search_arg: The string which is being searched for. + """ __metaclass__ = BaseSearchMetaclass + #: The number of results to return from the complete list. Default: 10 result_limit = 10 + #: How long the items for the search should be cached (in minutes). Default: 48 hours. _cache_timeout = 60*48 def __init__(self, search_arg): @@ -172,6 +214,7 @@ class BaseSearch(object): @property def results(self): + """Retrieves cached results or initiates a new search via :meth:`get_results` and caches the results.""" if not hasattr(self, '_results'): results = self._get_cached_results() if results is None: @@ -195,37 +238,36 @@ class BaseSearch(object): def get_results(self, limit=None, result_class=Result): """ - Calls self.search() and parses the return value into Result objects. + Calls :meth:`search` and parses the return value into :class:`Result` instances. + + :param limit: Passed directly to :meth:`search`. + :param result_class: The class used to represent the results. This will be instantiated with the :class:`BaseSearch` instance and the raw result from the search. + """ results = self.search(limit) return [result_class(self, result) for result in results] def search(self, limit=None): - """ - Returns an iterable of up to results. The - get_result_title, get_result_url, get_result_template, and - get_result_extra_context methods will be used to interpret the - individual items that this function returns, so the result can - be an object with attributes as easily as a dictionary - with keys. The only restriction is that the objects be - pickleable so that they can be used with django's cache system. - """ + """Returns an iterable of up to ``limit`` results. The :meth:`get_result_title`, :meth:`get_result_url`, :meth:`get_result_template`, and :meth:`get_result_extra_context` methods will be used to interpret the individual items that this function returns, so the result can be an object with attributes as easily as a dictionary with keys. However, keep in mind that the raw results will be stored with django's caching mechanisms and will be converted to JSON.""" raise NotImplementedError def get_result_title(self, result): + """Returns the title of the ``result``. Must be implemented by subclasses.""" raise NotImplementedError def get_result_url(self, result): - "Subclasses override this to provide the actual URL for the result." + """Returns the actual URL for the ``result`` or ``None`` if there is no URL. Must be implemented by subclasses.""" raise NotImplementedError def get_result_querydict(self, result): + """Returns a querydict for tracking selection of the result, or ``None`` if there is no URL for the result.""" url = self.get_result_url(result) if url is None: return None return make_tracking_querydict(self.search_arg, url) def get_result_template(self, result): + """Returns the template to be used for rendering the ``result``.""" if hasattr(self, 'result_template'): return loader.get_template(self.result_template) if not hasattr(self, '_result_template'): @@ -233,22 +275,21 @@ class BaseSearch(object): return self._result_template def get_result_extra_context(self, result): + """Returns any extra context to be used when rendering the ``result``.""" return {} def has_more_results(self): - """Useful to determine whether to display a `view more results` link.""" + """Returns ``True`` if there are more results than :attr:`result_limit` and ``False`` otherwise.""" return len(self.results) > self.result_limit @property def more_results_url(self): - """ - Returns the actual url for more results. This will be encoded - into a querystring for tracking purposes. - """ + """Returns the actual url for more results. This should be accessed through :attr:`more_results_querydict` in the template so that the click can be tracked.""" raise NotImplementedError @property def more_results_querydict(self): + """Returns a :class:`QueryDict` for tracking whether people click on a 'more results' link.""" return make_tracking_querydict(self.search_arg, self.more_results_url) def __unicode__(self): @@ -256,6 +297,8 @@ class BaseSearch(object): class DatabaseSearch(BaseSearch): + """Implements :meth:`~BaseSearch.search` and :meth:`get_queryset` methods to handle database queries.""" + #: The model which should be searched by the :class:`DatabaseSearch`. model = None def search(self, limit=None): @@ -267,28 +310,28 @@ class DatabaseSearch(BaseSearch): return self._qs def get_queryset(self): + """Returns a :class:`QuerySet` of all instances of :attr:`model`. This method should be overridden by subclasses to specify how the search should actually be implemented for the model.""" return self.model._default_manager.all() class URLSearch(BaseSearch): - """ - Defines a generic interface for searches that require accessing a - certain url to get search results. - """ + """Defines a generic interface for searches that require accessing a certain url to get search results.""" + #: The base URL which will be accessed to get the search results. search_url = '' + #: The url-encoded query string to be used for fetching search results from :attr:`search_url`. Must have one ``%s`` to contain the search argument. query_format_str = "%s" @property def url(self): - "The URL where the search gets its results." + """The URL where the search gets its results. Composed from :attr:`search_url` and :attr:`query_format_str`.""" return self.search_url + self.query_format_str % urlquote_plus(self.search_arg) @property def more_results_url(self): - "The URL where the users would go to get more results." return self.url def parse_response(self, response, limit=None): + """Handles the ``response`` from accessing :attr:`url` (with :func:`urllib2.urlopen`) and returns a list of up to ``limit`` results.""" raise NotImplementedError def search(self, limit=None): @@ -296,17 +339,14 @@ class URLSearch(BaseSearch): class JSONSearch(URLSearch): - """ - Makes a GET request and parses the results as JSON. The default - behavior assumes that the return value is a list of results. - """ + """Makes a GET request and parses the results as JSON. The default behavior assumes that the response contains a list of results.""" def parse_response(self, response, limit=None): return json.loads(response.read())[:limit] class GoogleSearch(JSONSearch): + """An example implementation of a :class:`JSONSearch`.""" search_url = "http://ajax.googleapis.com/ajax/services/search/web" - # TODO: Change this template to reflect the app's actual name. result_template = 'search/googlesearch.html' _cache_timeout = 60 verbose_name = "Google search (current site)" @@ -320,6 +360,7 @@ class GoogleSearch(JSONSearch): @property def default_args(self): + """Unquoted default arguments for the :class:`GoogleSearch`.""" return "site:%s" % Site.objects.get_current().domain def parse_response(self, response, limit=None): @@ -368,13 +409,22 @@ except: else: __all__ += ('ScrapeSearch', 'XMLSearch',) class ScrapeSearch(URLSearch): - _strainer_args = [] - _strainer_kwargs = {} + """A base class for scrape-style searching, available if :mod:`BeautifulSoup` is installed.""" + #: Arguments to be passed into a :class:`SoupStrainer`. + strainer_args = [] + #: Keyword arguments to be passed into a :class:`SoupStrainer`. + strainer_kwargs = {} @property def strainer(self): + """ + Caches and returns a :class:`SoupStrainer` initialized with :attr:`strainer_args` and :attr:`strainer_kwargs`. This strainer will be used to parse only certain parts of the document. + + .. seealso:: `BeautifulSoup: Improving Performance by Parsing Only Part of the Document `_ + + """ if not hasattr(self, '_strainer'): - self._strainer = SoupStrainer(*self._strainer_args, **self._strainer_kwargs) + self._strainer = SoupStrainer(*self.strainer_args, **self.strainer_kwargs) return self._strainer def parse_response(self, response, limit=None): @@ -384,18 +434,21 @@ else: def parse_results(self, results): """ - Provides a hook for parsing the results of straining. This - has no default behavior because the results absolutely - must be parsed to properly extract the information. - For more information, see http://www.crummy.com/software/BeautifulSoup/documentation.html#Improving%20Memory%20Usage%20with%20extract + Provides a hook for parsing the results of straining. This has no default behavior and must be implemented by subclasses because the results absolutely must be parsed to properly extract the information. + + .. seealso:: `BeautifulSoup: Improving Memory Usage with extract `_ """ raise NotImplementedError class XMLSearch(ScrapeSearch): - _self_closing_tags = [] + """A base class for searching XML results.""" + #: Self-closing tag names to be used when interpreting the XML document + #: + #: .. seealso:: `BeautifulSoup: Parsing XML `_ + self_closing_tags = [] def parse_response(self, response, limit=None): strainer = self.strainer - soup = BeautifulStoneSoup(response, selfClosingTags=self._self_closing_tags, parseOnlyThese=strainer) + soup = BeautifulStoneSoup(response, selfClosingTags=self.self_closing_tags, parseOnlyThese=strainer) return self.parse_results(soup.findAll(recursive=False, limit=limit)) \ No newline at end of file diff --git a/philo/contrib/sobol/utils.py b/philo/contrib/sobol/utils.py index 50d2113..6fd5a49 100644 --- a/philo/contrib/sobol/utils.py +++ b/philo/contrib/sobol/utils.py @@ -12,20 +12,17 @@ HASH_REDIRECT_GET_KEY = 's' def make_redirect_hash(search_arg, url): + """Hashes a redirect for a ``search_arg`` and ``url`` to avoid providing a simple URL spoofing service.""" return sha1(smart_str(search_arg + url + settings.SECRET_KEY)).hexdigest()[::2] def check_redirect_hash(hash, search_arg, url): + """Checks whether a hash is valid for a given ``search_arg`` and ``url``.""" return hash == make_redirect_hash(search_arg, url) def make_tracking_querydict(search_arg, url): - """ - Returns a QueryDict instance containing the information necessary - for tracking clicks of this url. - - NOTE: will this kind of initialization handle quoting correctly? - """ + """Returns a :class:`QueryDict` instance containing the information necessary for tracking :class:`.Click`\ s on the ``url``.""" return QueryDict("%s=%s&%s=%s&%s=%s" % ( SEARCH_ARG_GET_KEY, urlquote_plus(search_arg), URL_REDIRECT_GET_KEY, urlquote(url), -- 2.20.1