From 40322363b700deb95d52f5b50190856bfe135f08 Mon Sep 17 00:00:00 2001
From: Stephen Burrows <stephen.r.burrows@gmail.com>
Date: Fri, 20 May 2011 14:47:01 -0400
Subject: [PATCH] Converted, corrected, improved, and added to documentation
 for sobol's models and search API.

---
 docs/contrib/sobol.rst        |   6 ++
 philo/contrib/sobol/models.py |  13 +++-
 philo/contrib/sobol/search.py | 141 +++++++++++++++++++++++-----------
 philo/contrib/sobol/utils.py  |   9 +--
 4 files changed, 115 insertions(+), 54 deletions(-)

diff --git a/docs/contrib/sobol.rst b/docs/contrib/sobol.rst
index 5ac62ee..353b547 100644
--- a/docs/contrib/sobol.rst
+++ b/docs/contrib/sobol.rst
@@ -9,3 +9,9 @@ Models
 
 .. automodule:: philo.contrib.sobol.models
 	:members:
+
+Search API
+++++++++++
+
+.. automodule:: philo.contrib.sobol.search
+	:members:
diff --git a/philo/contrib/sobol/models.py b/philo/contrib/sobol/models.py
index 82fe947..fb31dce 100644
--- a/philo/contrib/sobol/models.py
+++ b/philo/contrib/sobol/models.py
@@ -172,7 +172,7 @@ else:
 
 
 class SearchView(MultiView):
-	"""Handles a view for the results of a search, and an AJAX API for asynchronous search result loading. This can be particularly useful if some searches are slow."""
+	"""Handles a view for the results of a search, anonymously tracks the selections made by end users, and provides an AJAX API for asynchronous search result loading. This can be particularly useful if some searches are slow."""
 	#: :class:`ForeignKey` to a :class:`.Page` which will be used to render the search results.
 	results_page = models.ForeignKey(Page, related_name='search_results_related')
 	#: A class:`.SlugMultipleChoiceField` whose choices are the contents of the :class:`.SearchRegistry`
@@ -214,7 +214,7 @@ class SearchView(MultiView):
 		"""
 		Renders :attr:`results_page` with a context containing an instance of :attr:`search_form`. If the form was submitted and was valid, then one of two things has happened:
 		
-		* A search has been initiated. In this case, a list of search instances will be added to the context as 'searches'. If :attr:`enable_ajax_api` is enabled, each instance will have an "ajax_api_url" attribute containing the url needed to make an AJAX request for the search results.
+		* A search has been initiated. In this case, a list of search instances will be added to the context as ``searches``. If :attr:`enable_ajax_api` is enabled, each instance will have an ``ajax_api_url`` attribute containing the url needed to make an AJAX request for the search results.
 		* A link has been chosen. In this case, corresponding :class:`Search`, :class:`ResultURL`, and :class:`Click` instances will be created and the user will be redirected to the link's actual url.
 		
 		"""
@@ -274,9 +274,12 @@ class SearchView(MultiView):
 		
 		results
 			Contains the results of :meth:`.Result.get_context` for each result.
-		
 		rendered
 			Contains the results of :meth:`.Result.render` for each result.
+		hasMoreResults
+			``True`` or ``False`` whether the search has more results according to :meth:`BaseSearch.has_more_results`
+		moreResultsURL
+			Contains None or a querystring which, once accessed, will note the :class:`Click` and redirect the user to a page containing more results.
 		
 		"""
 		search_string = request.GET.get(SEARCH_ARG_GET_KEY)
@@ -288,5 +291,7 @@ class SearchView(MultiView):
 		
 		return HttpResponse(json.dumps({
 			'results': [result.get_context() for result in search_instance.results],
-			'rendered': [result.render() for result in search_instance.results]
+			'rendered': [result.render() for result in search_instance.results],
+			'hasMoreResults': search.has_more_results(),
+			'moreResultsURL': (u"?%s" % search.more_results_querydict.urlencode()) if search.more_results_querydict else None,
 		}), mimetype="application/json")
\ No newline at end of file
diff --git a/philo/contrib/sobol/search.py b/philo/contrib/sobol/search.py
index 4ab5980..2dbd4a7 100644
--- a/philo/contrib/sobol/search.py
+++ b/philo/contrib/sobol/search.py
@@ -37,6 +37,7 @@ MAX_CACHE_TIMEOUT = 60*24*7
 
 
 class RegistrationError(Exception):
+	"""Raised if there is a problem registering a search with a :class:`SearchRegistry`"""
 	pass
 
 
@@ -47,6 +48,14 @@ class SearchRegistry(object):
 		self._registry = {}
 	
 	def register(self, search, slug=None):
+		"""
+		Register a search with the registry.
+		
+		:param search: The search class to register - generally a subclass of :class:`BaseSearch`
+		:param slug: The slug which will be used to register the search class. If ``slug`` is ``None``, the search's default slug will be used.
+		:raises: :class:`RegistrationError` if a different search is already registered with ``slug``.
+		
+		"""
 		slug = slug or search.slug
 		if slug in self._registry:
 			registered = self._registry[slug]
@@ -56,6 +65,14 @@ class SearchRegistry(object):
 			self._registry[slug] = search
 	
 	def unregister(self, search, slug=None):
+		"""
+		Unregister a search from the registry.
+		
+		:param search: The search class to unregister - generally a subclass of :class:`BaseSearch`
+		:param slug: If provided, the search will only be removed if it was registered with ``slug``. If not provided, the search class will be unregistered no matter what slug it was registered with.
+		:raises: :class:`RegistrationError` if a slug is provided but the search registered with that slug is not ``search``.
+		
+		"""
 		if slug is not None:
 			if slug in self._registry and self._registry[slug] == search:
 				del self._registry[slug]
@@ -66,18 +83,23 @@ class SearchRegistry(object):
 					del self._registry[slug]
 	
 	def items(self):
+		"""Returns a list of (slug, search) items in the registry."""
 		return self._registry.items()
 	
 	def iteritems(self):
+		"""Returns an iterator over the (slug, search) pairs in the registry."""
 		return RegistryIterator(self._registry, 'iteritems')
 	
 	def iterchoices(self):
+		"""Returns an iterator over (slug, search.verbose_name) pairs for the registry."""
 		return RegistryIterator(self._registry, 'iteritems', lambda x: (x[0], x[1].verbose_name))
 	
 	def __getitem__(self, key):
+		"""Returns the search registered with ``key``."""
 		return self._registry[key]
 	
 	def __iter__(self):
+		"""Returns an iterator over the keys in the registry."""
 		return self._registry.__iter__()
 
 
@@ -86,32 +108,47 @@ registry = SearchRegistry()
 
 class Result(object):
 	"""
-	A result is instantiated with a configuration dictionary, a search,
-	and a template name. The configuration dictionary is expected to
-	define a `title` and optionally a `url`. Any other variables may be
-	defined; they will be made available through the result object in
-	the template, if one is defined.
+	:class:`Result` is a helper class that, given a search and a result of that search, is able to correctly render itself with a template defined by the search. Every :class:`Result` will pass a ``title``, a ``url`` (if applicable), and the raw ``result`` returned by the search into the template context when rendering.
+	
+	:param search: An instance of a :class:`BaseSearch` subclass or an object that implements the same API.
+	:param result: An arbitrary result from the ``search``.
+	
 	"""
 	def __init__(self, search, result):
 		self.search = search
 		self.result = result
 	
 	def get_title(self):
+		"""Returns the title of the result by calling :meth:`BaseSearch.get_result_title` on the raw result."""
 		return self.search.get_result_title(self.result)
 	
 	def get_url(self):
+		"""Returns the url of the result or an empty string by calling :meth:`BaseSearch.get_result_querydict` on the raw result and then encoding the querydict returned."""
 		qd = self.search.get_result_querydict(self.result)
 		if qd is None:
 			return ""
 		return "?%s" % qd.urlencode()
 	
 	def get_template(self):
+		"""Returns the template for the result by calling :meth:`BaseSearch.get_result_template` on the raw result."""
 		return self.search.get_result_template(self.result)
 	
 	def get_extra_context(self):
+		"""Returns any extra context for the result by calling :meth:`BaseSearch.get_result_extra_context` on the raw result."""
 		return self.search.get_result_extra_context(self.result)
 	
 	def get_context(self):
+		"""
+		Returns the context dictionary for the result. This is used both in rendering the result and in the AJAX return value for :meth:`.SearchView.ajax_api_view`. The context will contain everything from :meth:`get_extra_context` as well as the following keys:
+		
+		title
+			The result of calling :meth:`get_title`
+		url
+			The result of calling :meth:`get_url`
+		result
+			The raw result which the :class:`Result` was instantiated with.
+		
+		"""
 		context = self.get_extra_context()
 		context.update({
 			'title': self.get_title(),
@@ -121,11 +158,13 @@ class Result(object):
 		return context
 	
 	def render(self):
+		"""Returns the template from :meth:`get_template` rendered with the context from :meth:`get_context`."""
 		t = self.get_template()
 		c = Context(self.get_context())
 		return t.render(c)
 	
 	def __unicode__(self):
+		"""Returns :meth:`render`"""
 		return self.render()
 
 
@@ -140,12 +179,15 @@ class BaseSearchMetaclass(type):
 
 class BaseSearch(object):
 	"""
-	Defines a generic search interface. Accessing self.results will
-	attempt to retrieve cached results and, if that fails, will
-	initiate a new search and store the results in the cache.
+	Defines a generic search api. Accessing :attr:`results` will attempt to retrieve cached results and, if that fails, will initiate a new search and store the results in the cache. Each search has a ``verbose_name`` and a ``slug``. If these are not provided as attributes, they will be automatically generated based on the name of the class.
+	
+	:param search_arg: The string which is being searched for.
+	
 	"""
 	__metaclass__ = BaseSearchMetaclass
+	#: The number of results to return from the complete list. Default: 10
 	result_limit = 10
+	#: How long the items for the search should be cached (in minutes). Default: 48 hours.
 	_cache_timeout = 60*48
 	
 	def __init__(self, search_arg):
@@ -172,6 +214,7 @@ class BaseSearch(object):
 	
 	@property
 	def results(self):
+		"""Retrieves cached results or initiates a new search via :meth:`get_results` and caches the results."""
 		if not hasattr(self, '_results'):
 			results = self._get_cached_results()
 			if results is None:
@@ -195,37 +238,36 @@ class BaseSearch(object):
 	
 	def get_results(self, limit=None, result_class=Result):
 		"""
-		Calls self.search() and parses the return value into Result objects.
+		Calls :meth:`search` and parses the return value into :class:`Result` instances.
+		
+		:param limit: Passed directly to :meth:`search`.
+		:param result_class: The class used to represent the results. This will be instantiated with the :class:`BaseSearch` instance and the raw result from the search.
+		
 		"""
 		results = self.search(limit)
 		return [result_class(self, result) for result in results]
 	
 	def search(self, limit=None):
-		"""
-		Returns an iterable of up to <limit> results. The
-		get_result_title, get_result_url, get_result_template, and
-		get_result_extra_context methods will be used to interpret the
-		individual items that this function returns, so the result can
-		be an object with attributes as easily as a dictionary
- 		with keys. The only restriction is that the objects be
-		pickleable so that they can be used with django's cache system.
-		"""
+		"""Returns an iterable of up to ``limit`` results. The :meth:`get_result_title`, :meth:`get_result_url`, :meth:`get_result_template`, and :meth:`get_result_extra_context` methods will be used to interpret the individual items that this function returns, so the result can be an object with attributes as easily as a dictionary with keys. However, keep in mind that the raw results will be stored with django's caching mechanisms and will be converted to JSON."""
 		raise NotImplementedError
 	
 	def get_result_title(self, result):
+		"""Returns the title of the ``result``. Must be implemented by subclasses."""
 		raise NotImplementedError
 	
 	def get_result_url(self, result):
-		"Subclasses override this to provide the actual URL for the result."
+		"""Returns the actual URL for the ``result`` or ``None`` if there is no URL. Must be implemented by subclasses."""
 		raise NotImplementedError
 	
 	def get_result_querydict(self, result):
+		"""Returns a querydict for tracking selection of the result, or ``None`` if there is no URL for the result."""
 		url = self.get_result_url(result)
 		if url is None:
 			return None
 		return make_tracking_querydict(self.search_arg, url)
 	
 	def get_result_template(self, result):
+		"""Returns the template to be used for rendering the ``result``."""
 		if hasattr(self, 'result_template'):
 			return loader.get_template(self.result_template)
 		if not hasattr(self, '_result_template'):
@@ -233,22 +275,21 @@ class BaseSearch(object):
 		return self._result_template
 	
 	def get_result_extra_context(self, result):
+		"""Returns any extra context to be used when rendering the ``result``."""
 		return {}
 	
 	def has_more_results(self):
-		"""Useful to determine whether to display a `view more results` link."""
+		"""Returns ``True`` if there are more results than :attr:`result_limit` and ``False`` otherwise."""
 		return len(self.results) > self.result_limit
 	
 	@property
 	def more_results_url(self):
-		"""
-		Returns the actual url for more results. This will be encoded
-		into a querystring for tracking purposes.
-		"""
+		"""Returns the actual url for more results. This should be accessed through :attr:`more_results_querydict` in the template so that the click can be tracked."""
 		raise NotImplementedError
 	
 	@property
 	def more_results_querydict(self):
+		"""Returns a :class:`QueryDict` for tracking whether people click on a 'more results' link."""
 		return make_tracking_querydict(self.search_arg, self.more_results_url)
 	
 	def __unicode__(self):
@@ -256,6 +297,8 @@ class BaseSearch(object):
 
 
 class DatabaseSearch(BaseSearch):
+	"""Implements :meth:`~BaseSearch.search` and :meth:`get_queryset` methods to handle database queries."""
+	#: The model which should be searched by the :class:`DatabaseSearch`.
 	model = None
 	
 	def search(self, limit=None):
@@ -267,28 +310,28 @@ class DatabaseSearch(BaseSearch):
 		return self._qs
 	
 	def get_queryset(self):
+		"""Returns a :class:`QuerySet` of all instances of :attr:`model`. This method should be overridden by subclasses to specify how the search should actually be implemented for the model."""
 		return self.model._default_manager.all()
 
 
 class URLSearch(BaseSearch):
-	"""
-	Defines a generic interface for searches that require accessing a
-	certain url to get search results.
-	"""
+	"""Defines a generic interface for searches that require accessing a certain url to get search results."""
+	#: The base URL which will be accessed to get the search results.
 	search_url = ''
+	#: The url-encoded query string to be used for fetching search results from :attr:`search_url`. Must have one ``%s`` to contain the search argument.
 	query_format_str = "%s"
 
 	@property
 	def url(self):
-		"The URL where the search gets its results."
+		"""The URL where the search gets its results. Composed from :attr:`search_url` and :attr:`query_format_str`."""
 		return self.search_url + self.query_format_str % urlquote_plus(self.search_arg)
 
 	@property
 	def more_results_url(self):
-		"The URL where the users would go to get more results."
 		return self.url
 	
 	def parse_response(self, response, limit=None):
+		"""Handles the ``response`` from accessing :attr:`url` (with :func:`urllib2.urlopen`) and returns a list of up to ``limit`` results."""
 		raise NotImplementedError
 	
 	def search(self, limit=None):
@@ -296,17 +339,14 @@ class URLSearch(BaseSearch):
 
 
 class JSONSearch(URLSearch):
-	"""
-	Makes a GET request and parses the results as JSON. The default
-	behavior assumes that the return value is a list of results.
-	"""
+	"""Makes a GET request and parses the results as JSON. The default behavior assumes that the response contains a list of results."""
 	def parse_response(self, response, limit=None):
 		return json.loads(response.read())[:limit]
 
 
 class GoogleSearch(JSONSearch):
+	"""An example implementation of a :class:`JSONSearch`."""
 	search_url = "http://ajax.googleapis.com/ajax/services/search/web"
-	# TODO: Change this template to reflect the app's actual name.
 	result_template = 'search/googlesearch.html'
 	_cache_timeout = 60
 	verbose_name = "Google search (current site)"
@@ -320,6 +360,7 @@ class GoogleSearch(JSONSearch):
 	
 	@property
 	def default_args(self):
+		"""Unquoted default arguments for the :class:`GoogleSearch`."""
 		return "site:%s" % Site.objects.get_current().domain
 	
 	def parse_response(self, response, limit=None):
@@ -368,13 +409,22 @@ except:
 else:
 	__all__ += ('ScrapeSearch', 'XMLSearch',)
 	class ScrapeSearch(URLSearch):
-		_strainer_args = []
-		_strainer_kwargs = {}
+		"""A base class for scrape-style searching, available if :mod:`BeautifulSoup` is installed."""
+		#: Arguments to be passed into a :class:`SoupStrainer`.
+		strainer_args = []
+		#: Keyword arguments to be passed into a :class:`SoupStrainer`.
+		strainer_kwargs = {}
 		
 		@property
 		def strainer(self):
+			"""
+			Caches and returns a :class:`SoupStrainer` initialized with :attr:`strainer_args` and :attr:`strainer_kwargs`. This strainer will be used to parse only certain parts of the document.
+			
+			.. seealso:: `BeautifulSoup: Improving Performance by Parsing Only Part of the Document <http://www.crummy.com/software/BeautifulSoup/documentation.html#Improving%20Performance%20by%20Parsing%20Only%20Part%20of%20the%20Document>`_
+			
+			"""
 			if not hasattr(self, '_strainer'):
-				self._strainer = SoupStrainer(*self._strainer_args, **self._strainer_kwargs)
+				self._strainer = SoupStrainer(*self.strainer_args, **self.strainer_kwargs)
 			return self._strainer
 		
 		def parse_response(self, response, limit=None):
@@ -384,18 +434,21 @@ else:
 		
 		def parse_results(self, results):
 			"""
-			Provides a hook for parsing the results of straining. This
-			has no default behavior because the results absolutely
-			must be parsed to properly extract the information.
-			For more information, see http://www.crummy.com/software/BeautifulSoup/documentation.html#Improving%20Memory%20Usage%20with%20extract
+			Provides a hook for parsing the results of straining. This has no default behavior and must be implemented by subclasses because the results absolutely must be parsed to properly extract the information.
+			
+			.. seealso:: `BeautifulSoup: Improving Memory Usage with extract <http://www.crummy.com/software/BeautifulSoup/documentation.html#Improving%20Memory%20Usage%20with%20extract>`_
 			"""
 			raise NotImplementedError
 	
 	
 	class XMLSearch(ScrapeSearch):
-		_self_closing_tags = []
+		"""A base class for searching XML results."""
+		#: Self-closing tag names to be used when interpreting the XML document
+		#:
+		#: .. seealso:: `BeautifulSoup: Parsing XML <http://www.crummy.com/software/BeautifulSoup/documentation.html#Parsing%20XML>`_
+		self_closing_tags = []
 		
 		def parse_response(self, response, limit=None):
 			strainer = self.strainer
-			soup = BeautifulStoneSoup(response, selfClosingTags=self._self_closing_tags, parseOnlyThese=strainer)
+			soup = BeautifulStoneSoup(response, selfClosingTags=self.self_closing_tags, parseOnlyThese=strainer)
 			return self.parse_results(soup.findAll(recursive=False, limit=limit))
\ No newline at end of file
diff --git a/philo/contrib/sobol/utils.py b/philo/contrib/sobol/utils.py
index 50d2113..6fd5a49 100644
--- a/philo/contrib/sobol/utils.py
+++ b/philo/contrib/sobol/utils.py
@@ -12,20 +12,17 @@ HASH_REDIRECT_GET_KEY = 's'
 
 
 def make_redirect_hash(search_arg, url):
+	"""Hashes a redirect for a ``search_arg`` and ``url`` to avoid providing a simple URL spoofing service."""
 	return sha1(smart_str(search_arg + url + settings.SECRET_KEY)).hexdigest()[::2]
 
 
 def check_redirect_hash(hash, search_arg, url):
+	"""Checks whether a hash is valid for a given ``search_arg`` and ``url``."""
 	return hash == make_redirect_hash(search_arg, url)
 
 
 def make_tracking_querydict(search_arg, url):
-	"""
-	Returns a QueryDict instance containing the information necessary
-	for tracking clicks of this url.
-	
-	NOTE: will this kind of initialization handle quoting correctly?
-	"""
+	"""Returns a :class:`QueryDict` instance containing the information necessary for tracking :class:`.Click`\ s on the ``url``."""
 	return QueryDict("%s=%s&%s=%s&%s=%s" % (
 		SEARCH_ARG_GET_KEY, urlquote_plus(search_arg),
  		URL_REDIRECT_GET_KEY, urlquote(url),
-- 
2.20.1