X-Git-Url: http://git.ithinksw.org/philo.git/blobdiff_plain/700615d15881697c10a77ca65c84a84a5dd1e4d6..4a170a70ed8171fc66d9d139df5f7be5208d838c:/philo/contrib/sobol/search.py

diff --git a/philo/contrib/sobol/search.py b/philo/contrib/sobol/search.py
index 4ab5980..a79030a 100644
--- a/philo/contrib/sobol/search.py
+++ b/philo/contrib/sobol/search.py
@@ -1,5 +1,6 @@
 #encoding: utf-8
 import datetime
+from hashlib import sha1
 
 from django.conf import settings
 from django.contrib.sites.models import Site
@@ -9,9 +10,10 @@ from django.utils import simplejson as json
 from django.utils.http import urlquote_plus
 from django.utils.safestring import mark_safe
 from django.utils.text import capfirst
-from django.template import loader, Context, Template
+from django.template import loader, Context, Template, TemplateDoesNotExist
 
-from philo.contrib.sobol.utils import make_tracking_querydict, RegistryIterator
+from philo.contrib.sobol.utils import make_tracking_querydict
+from philo.utils.registry import Registry
 
 
 if getattr(settings, 'SOBOL_USE_EVENTLET', False):
@@ -24,108 +26,97 @@ else:
 
 
 __all__ = (
-	'Result', 'BaseSearch', 'DatabaseSearch', 'URLSearch', 'JSONSearch', 'GoogleSearch', 'SearchRegistry', 'registry'
+	'Result', 'BaseSearch', 'DatabaseSearch', 'URLSearch', 'JSONSearch', 'GoogleSearch', 'registry', 'get_search_instance'
 )
 
 
-SEARCH_CACHE_KEY = 'philo_sobol_search_results'
-DEFAULT_RESULT_TEMPLATE_STRING = "{% if url %}<a href='{{ url }}'>{% endif %}{{ title }}{% if url %}</a>{% endif %}"
-DEFAULT_RESULT_TEMPLATE = Template(DEFAULT_RESULT_TEMPLATE_STRING)
+SEARCH_CACHE_SEED = 'philo_sobol_search_results'
+USE_CACHE = getattr(settings, 'SOBOL_USE_CACHE', True)
 
-# Determines the timeout on the entire result cache.
-MAX_CACHE_TIMEOUT = 60*24*7
 
-
-class RegistrationError(Exception):
-	pass
+#: A registry for :class:`BaseSearch` subclasses that should be available in the admin.
+registry = Registry()
 
 
-class SearchRegistry(object):
-	"""Holds a registry of search types by slug."""
-	
-	def __init__(self):
-		self._registry = {}
-	
-	def register(self, search, slug=None):
-		slug = slug or search.slug
-		if slug in self._registry:
-			registered = self._registry[slug]
-			if registered.__module__ != search.__module__:
-				raise RegistrationError("A different search is already registered as `%s`" % slug)
-		else:
-			self._registry[slug] = search
-	
-	def unregister(self, search, slug=None):
-		if slug is not None:
-			if slug in self._registry and self._registry[slug] == search:
-				del self._registry[slug]
-			raise RegistrationError("`%s` is not registered as `%s`" % (search, slug))
-		else:
-			for slug, search in self._registry.items():
-				if search == search:
-					del self._registry[slug]
-	
-	def items(self):
-		return self._registry.items()
-	
-	def iteritems(self):
-		return RegistryIterator(self._registry, 'iteritems')
-	
-	def iterchoices(self):
-		return RegistryIterator(self._registry, 'iteritems', lambda x: (x[0], x[1].verbose_name))
-	
-	def __getitem__(self, key):
-		return self._registry[key]
-	
-	def __iter__(self):
-		return self._registry.__iter__()
+def _make_cache_key(search, search_arg):
+	return sha1(SEARCH_CACHE_SEED + search.slug + search_arg).hexdigest()
 
 
-registry = SearchRegistry()
+def get_search_instance(slug, search_arg):
+	"""Returns a search instance for the given slug, either from the cache or newly-instantiated."""
+	search = registry[slug]
+	search_arg = search_arg.lower()
+	if USE_CACHE:
+		key = _make_cache_key(search, search_arg)
+		cached = cache.get(key)
+		if cached:
+			return cached
+	instance = search(search_arg)
+	instance.slug = slug
+	return instance
 
 
 class Result(object):
 	"""
-	A result is instantiated with a configuration dictionary, a search,
-	and a template name. The configuration dictionary is expected to
-	define a `title` and optionally a `url`. Any other variables may be
-	defined; they will be made available through the result object in
-	the template, if one is defined.
+	:class:`Result` is a helper class that, given a search and a result of that search, is able to correctly render itself with a template defined by the search. Every :class:`Result` will pass a ``title``, a ``url`` (if applicable), and the raw ``result`` returned by the search into the template context when rendering.
+	
+	:param search: An instance of a :class:`BaseSearch` subclass or an object that implements the same API.
+	:param result: An arbitrary result from the ``search``.
+	
 	"""
 	def __init__(self, search, result):
 		self.search = search
 		self.result = result
 	
 	def get_title(self):
+		"""Returns the title of the result by calling :meth:`BaseSearch.get_result_title` on the raw result."""
 		return self.search.get_result_title(self.result)
 	
 	def get_url(self):
-		qd = self.search.get_result_querydict(self.result)
-		if qd is None:
-			return ""
-		return "?%s" % qd.urlencode()
+		"""Returns the url of the result or ``None`` by calling :meth:`BaseSearch.get_result_url` on the raw result. This url will contain a querystring which, if used, will track a :class:`.Click` for the actual url."""
+		return self.search.get_result_url(self.result)
+	
+	def get_actual_url(self):
+		"""Returns the actual url of the result by calling :meth:`BaseSearch.get_actual_result_url` on the raw result."""
+		return self.search.get_actual_result_url(self.result)
+	
+	def get_content(self):
+		"""Returns the content of the result by calling :meth:`BaseSearch.get_result_content` on the raw result."""
+		return self.search.get_result_content(self.result)
 	
 	def get_template(self):
+		"""Returns the template which will be used to render the :class:`Result` by calling :meth:`BaseSearch.get_result_template` on the raw result."""
 		return self.search.get_result_template(self.result)
 	
-	def get_extra_context(self):
-		return self.search.get_result_extra_context(self.result)
-	
 	def get_context(self):
-		context = self.get_extra_context()
-		context.update({
-			'title': self.get_title(),
-			'url': self.get_url(),
-			'result': self.result
-		})
-		return context
+		"""
+		Returns the context dictionary for the result. This is used both in rendering the result and in the AJAX return value for :meth:`.SearchView.ajax_api_view`. The context will contain the following keys:
+		
+		title
+			The result of calling :meth:`get_title`
+		url
+			The result of calling :meth:`get_url`
+		content
+			The result of calling :meth:`get_content`
+		
+		"""
+		if not hasattr(self, '_context'):
+			self._context = {
+				'title': self.get_title(),
+				'url': self.get_url(),
+				'actual_url': self.get_actual_url(),
+				'content': self.get_content()
+			}
+		return self._context
 	
 	def render(self):
+		"""Returns the template from :meth:`get_template` rendered with the context from :meth:`get_context`."""
 		t = self.get_template()
 		c = Context(self.get_context())
 		return t.render(c)
 	
 	def __unicode__(self):
+		"""Returns :meth:`render`"""
 		return self.render()
 
 
@@ -134,128 +125,149 @@ class BaseSearchMetaclass(type):
 		if 'verbose_name' not in attrs:
 			attrs['verbose_name'] = capfirst(' '.join(convert_camelcase(name).rsplit(' ', 1)[:-1]))
 		if 'slug' not in attrs:
-			attrs['slug'] = name.lower()
+			attrs['slug'] = name[:-6].lower() if name.endswith("Search") else name.lower()
 		return super(BaseSearchMetaclass, cls).__new__(cls, name, bases, attrs)
 
 
 class BaseSearch(object):
 	"""
-	Defines a generic search interface. Accessing self.results will
-	attempt to retrieve cached results and, if that fails, will
-	initiate a new search and store the results in the cache.
+	Defines a generic search api. Accessing :attr:`results` will attempt to retrieve cached results and, if that fails, will initiate a new search and store the results in the cache. Each search has a ``verbose_name`` and a ``slug``. If these are not provided as attributes, they will be automatically generated based on the name of the class.
+	
+	:param search_arg: The string which is being searched for.
+	
 	"""
 	__metaclass__ = BaseSearchMetaclass
-	result_limit = 10
+	#: The number of results to return from the complete list. Default: 5
+	result_limit = 5
+	#: How long the items for the search should be cached (in minutes). Default: 48 hours.
 	_cache_timeout = 60*48
+	#: The path to the template which will be used to render the :class:`Result`\ s for this search. If this is ``None``, then the framework will try ``sobol/search/<slug>/result.html`` and ``sobol/search/result.html``.
+	result_template = None
+	#: The path to the template which will be used to generate the title of the :class:`Result`\ s for this search. If this is ``None``, then the framework will try ``sobol/search/<slug>/title.html`` and ``sobol/search/title.html``.
+	title_template = None
+	#: The path to the template which will be used to generate the content of the :class:`Result`\ s for this search. If this is ``None``, then the framework will try ``sobol/search/<slug>/content.html`` and ``sobol/search/content.html``.
+	content_template = None
 	
 	def __init__(self, search_arg):
 		self.search_arg = search_arg
 	
-	def _get_cached_results(self):
-		"""Return the cached results if the results haven't timed out. Otherwise return None."""
-		result_cache = cache.get(SEARCH_CACHE_KEY)
-		if result_cache and self.__class__ in result_cache and self.search_arg.lower() in result_cache[self.__class__]:
-			cached = result_cache[self.__class__][self.search_arg.lower()]
-			if cached['timeout'] >= datetime.datetime.now():
-				return cached['results']
-		return None
-	
-	def _set_cached_results(self, results, timeout):
-		"""Sets the results to the cache for <timeout> minutes."""
-		result_cache = cache.get(SEARCH_CACHE_KEY) or {}
-		cached = result_cache.setdefault(self.__class__, {}).setdefault(self.search_arg.lower(), {})
-		cached.update({
-			'results': results,
-			'timeout': datetime.datetime.now() + datetime.timedelta(minutes=timeout)
-		})
-		cache.set(SEARCH_CACHE_KEY, result_cache, MAX_CACHE_TIMEOUT)
-	
 	@property
 	def results(self):
+		"""Retrieves cached results or initiates a new search via :meth:`get_results` and caches the results."""
 		if not hasattr(self, '_results'):
-			results = self._get_cached_results()
-			if results is None:
-				try:
-					# Cache one extra result so we can see if there are
-					# more results to be had.
-					limit = self.result_limit
-					if limit is not None:
-						limit += 1
-					results = self.get_results(limit)
-				except:
-					if settings.DEBUG:
-						raise
-					#  On exceptions, don't set any cache; just return.
-					return []
+			try:
+				# Cache one extra result so we can see if there are
+				# more results to be had.
+				limit = self.result_limit
+				if limit is not None:
+					limit += 1
+				results = self.get_results(limit)
+			except:
+				if settings.DEBUG:
+					raise
+				#  On exceptions, don't set any cache; just return.
+				return []
 			
-				self._set_cached_results(results, self._cache_timeout)
 			self._results = results
+			
+			if USE_CACHE:
+				for result in results:
+					result.get_context()
+				key = _make_cache_key(self, self.search_arg)
+				cache.set(key, self, self._cache_timeout)
 		
 		return self._results
 	
 	def get_results(self, limit=None, result_class=Result):
 		"""
-		Calls self.search() and parses the return value into Result objects.
+		Calls :meth:`search` and parses the return value into :class:`Result` instances.
+		
+		:param limit: Passed directly to :meth:`search`.
+		:param result_class: The class used to represent the results. This will be instantiated with the :class:`BaseSearch` instance and the raw result from the search.
+		
 		"""
 		results = self.search(limit)
 		return [result_class(self, result) for result in results]
 	
 	def search(self, limit=None):
-		"""
-		Returns an iterable of up to <limit> results. The
-		get_result_title, get_result_url, get_result_template, and
-		get_result_extra_context methods will be used to interpret the
-		individual items that this function returns, so the result can
-		be an object with attributes as easily as a dictionary
- 		with keys. The only restriction is that the objects be
-		pickleable so that they can be used with django's cache system.
-		"""
+		"""Returns an iterable of up to ``limit`` results. The :meth:`get_result_title`, :meth:`get_result_url`, :meth:`get_result_template`, and :meth:`get_result_extra_context` methods will be used to interpret the individual items that this function returns, so the result can be an object with attributes as easily as a dictionary with keys. However, keep in mind that the raw results will be stored with django's caching mechanisms and will be converted to JSON."""
 		raise NotImplementedError
 	
-	def get_result_title(self, result):
-		raise NotImplementedError
-	
-	def get_result_url(self, result):
-		"Subclasses override this to provide the actual URL for the result."
+	def get_actual_result_url(self, result):
+		"""Returns the actual URL for the ``result`` or ``None`` if there is no URL. Must be implemented by subclasses."""
 		raise NotImplementedError
 	
 	def get_result_querydict(self, result):
-		url = self.get_result_url(result)
+		"""Returns a querydict for tracking selection of the result, or ``None`` if there is no URL for the result."""
+		url = self.get_actual_result_url(result)
 		if url is None:
 			return None
 		return make_tracking_querydict(self.search_arg, url)
 	
+	def get_result_url(self, result):
+		"""Returns ``None`` or a url which, when accessed, will register a :class:`.Click` for that url."""
+		qd = self.get_result_querydict(result)
+		if qd is None:
+			return None
+		return "?%s" % qd.urlencode()
+	
+	def get_result_title(self, result):
+		"""Returns the title of the ``result``. By default, renders ``sobol/search/<slug>/title.html`` or ``sobol/search/title.html`` with the result in the context. This can be overridden by setting :attr:`title_template` or simply overriding :meth:`get_result_title`. If no template can be found, this will raise :exc:`TemplateDoesNotExist`."""
+		return loader.render_to_string(self.title_template or [
+			'sobol/search/%s/title.html' % self.slug,
+			'sobol/search/title.html'
+		], {'result': result})
+	
+	def get_result_content(self, result):
+		"""Returns the content for the ``result``. By default, renders ``sobol/search/<slug>/content.html`` or ``sobol/search/content.html`` with the result in the context. This can be overridden by setting :attr:`content_template` or simply overriding :meth:`get_result_content`. If no template is found, this will return an empty string."""
+		try:
+			return loader.render_to_string(self.content_template or [
+				'sobol/search/%s/content.html' % self.slug,
+				'sobol/search/content.html'
+			], {'result': result})
+		except TemplateDoesNotExist:
+			return ""
+	
 	def get_result_template(self, result):
-		if hasattr(self, 'result_template'):
+		"""Returns the template to be used for rendering the ``result``. For a search with slug ``google``, this would first try ``sobol/search/google/result.html``, then fall back on ``sobol/search/result.html``. Subclasses can override this by setting :attr:`result_template` to the path of another template."""
+		if self.result_template:
 			return loader.get_template(self.result_template)
-		if not hasattr(self, '_result_template'):
-			self._result_template = DEFAULT_RESULT_TEMPLATE
-		return self._result_template
-	
-	def get_result_extra_context(self, result):
-		return {}
+		return loader.select_template([
+			'sobol/search/%s/result.html' % self.slug,
+			'sobol/search/result.html'
+		])
 	
+	@property
 	def has_more_results(self):
-		"""Useful to determine whether to display a `view more results` link."""
+		"""Returns ``True`` if there are more results than :attr:`result_limit` and ``False`` otherwise."""
 		return len(self.results) > self.result_limit
 	
-	@property
-	def more_results_url(self):
-		"""
-		Returns the actual url for more results. This will be encoded
-		into a querystring for tracking purposes.
-		"""
-		raise NotImplementedError
+	def get_actual_more_results_url(self):
+		"""Returns the actual url for more results. By default, simply returns ``None``."""
+		return None
+	
+	def get_more_results_querydict(self):
+		"""Returns a :class:`QueryDict` for tracking whether people click on a 'more results' link."""
+		url = self.get_actual_more_results_url()
+		if url:
+			return make_tracking_querydict(self.search_arg, url)
+		return None
 	
 	@property
-	def more_results_querydict(self):
-		return make_tracking_querydict(self.search_arg, self.more_results_url)
+	def more_results_url(self):
+		"""Returns a URL which consists of a querystring which, when accessed, will log a :class:`.Click` for the actual URL."""
+		qd = self.get_more_results_querydict()
+		if qd is None:
+			return None
+		return "?%s" % qd.urlencode()
 	
 	def __unicode__(self):
 		return self.verbose_name
 
 
 class DatabaseSearch(BaseSearch):
+	"""Implements :meth:`~BaseSearch.search` and :meth:`get_queryset` methods to handle database queries."""
+	#: The model which should be searched by the :class:`DatabaseSearch`.
 	model = None
 	
 	def search(self, limit=None):
@@ -267,28 +279,27 @@ class DatabaseSearch(BaseSearch):
 		return self._qs
 	
 	def get_queryset(self):
+		"""Returns a :class:`QuerySet` of all instances of :attr:`model`. This method should be overridden by subclasses to specify how the search should actually be implemented for the model."""
 		return self.model._default_manager.all()
 
 
 class URLSearch(BaseSearch):
-	"""
-	Defines a generic interface for searches that require accessing a
-	certain url to get search results.
-	"""
+	"""Defines a generic interface for searches that require accessing a certain url to get search results."""
+	#: The base URL which will be accessed to get the search results.
 	search_url = ''
+	#: The url-encoded query string to be used for fetching search results from :attr:`search_url`. Must have one ``%s`` to contain the search argument.
 	query_format_str = "%s"
 
 	@property
 	def url(self):
-		"The URL where the search gets its results."
+		"""The URL where the search gets its results. Composed from :attr:`search_url` and :attr:`query_format_str`."""
 		return self.search_url + self.query_format_str % urlquote_plus(self.search_arg)
-
-	@property
-	def more_results_url(self):
-		"The URL where the users would go to get more results."
+	
+	def get_actual_more_results_url(self):
 		return self.url
 	
 	def parse_response(self, response, limit=None):
+		"""Handles the ``response`` from accessing :attr:`url` (with :func:`urllib2.urlopen`) and returns a list of up to ``limit`` results."""
 		raise NotImplementedError
 	
 	def search(self, limit=None):
@@ -296,20 +307,17 @@ class URLSearch(BaseSearch):
 
 
 class JSONSearch(URLSearch):
-	"""
-	Makes a GET request and parses the results as JSON. The default
-	behavior assumes that the return value is a list of results.
-	"""
+	"""Makes a GET request and parses the results as JSON. The default behavior assumes that the response contains a list of results."""
 	def parse_response(self, response, limit=None):
 		return json.loads(response.read())[:limit]
 
 
 class GoogleSearch(JSONSearch):
+	"""An example implementation of a :class:`JSONSearch`."""
 	search_url = "http://ajax.googleapis.com/ajax/services/search/web"
-	# TODO: Change this template to reflect the app's actual name.
-	result_template = 'search/googlesearch.html'
 	_cache_timeout = 60
 	verbose_name = "Google search (current site)"
+	_more_results_url = None
 	
 	@property
 	def query_format_str(self):
@@ -320,6 +328,7 @@ class GoogleSearch(JSONSearch):
 	
 	@property
 	def default_args(self):
+		"""Unquoted default arguments for the :class:`GoogleSearch`."""
 		return "site:%s" % Site.objects.get_current().domain
 	
 	def parse_response(self, response, limit=None):
@@ -347,15 +356,17 @@ class GoogleSearch(JSONSearch):
 			return True
 		return False
 	
-	@property
-	def more_results_url(self):
+	def get_actual_more_results_url(self):
 		return self._more_results_url
 	
+	def get_actual_result_url(self, result):
+		return result['unescapedUrl']
+	
 	def get_result_title(self, result):
-		return result['titleNoFormatting']
+		return mark_safe(result['titleNoFormatting'])
 	
-	def get_result_url(self, result):
-		return result['unescapedUrl']
+	def get_result_content(self, result):
+		return mark_safe(result['content'])
 
 
 registry.register(GoogleSearch)
@@ -368,13 +379,22 @@ except:
 else:
 	__all__ += ('ScrapeSearch', 'XMLSearch',)
 	class ScrapeSearch(URLSearch):
-		_strainer_args = []
-		_strainer_kwargs = {}
+		"""A base class for scrape-style searching, available if :mod:`BeautifulSoup` is installed."""
+		#: Arguments to be passed into a :class:`SoupStrainer`.
+		strainer_args = []
+		#: Keyword arguments to be passed into a :class:`SoupStrainer`.
+		strainer_kwargs = {}
 		
 		@property
 		def strainer(self):
+			"""
+			Caches and returns a :class:`SoupStrainer` initialized with :attr:`strainer_args` and :attr:`strainer_kwargs`. This strainer will be used to parse only certain parts of the document.
+			
+			.. seealso:: `BeautifulSoup: Improving Performance by Parsing Only Part of the Document <http://www.crummy.com/software/BeautifulSoup/documentation.html#Improving%20Performance%20by%20Parsing%20Only%20Part%20of%20the%20Document>`_
+			
+			"""
 			if not hasattr(self, '_strainer'):
-				self._strainer = SoupStrainer(*self._strainer_args, **self._strainer_kwargs)
+				self._strainer = SoupStrainer(*self.strainer_args, **self.strainer_kwargs)
 			return self._strainer
 		
 		def parse_response(self, response, limit=None):
@@ -384,18 +404,21 @@ else:
 		
 		def parse_results(self, results):
 			"""
-			Provides a hook for parsing the results of straining. This
-			has no default behavior because the results absolutely
-			must be parsed to properly extract the information.
-			For more information, see http://www.crummy.com/software/BeautifulSoup/documentation.html#Improving%20Memory%20Usage%20with%20extract
+			Provides a hook for parsing the results of straining. This has no default behavior and must be implemented by subclasses because the results absolutely must be parsed to properly extract the information.
+			
+			.. seealso:: `BeautifulSoup: Improving Memory Usage with extract <http://www.crummy.com/software/BeautifulSoup/documentation.html#Improving%20Memory%20Usage%20with%20extract>`_
 			"""
 			raise NotImplementedError
 	
 	
 	class XMLSearch(ScrapeSearch):
-		_self_closing_tags = []
+		"""A base class for searching XML results."""
+		#: Self-closing tag names to be used when interpreting the XML document
+		#:
+		#: .. seealso:: `BeautifulSoup: Parsing XML <http://www.crummy.com/software/BeautifulSoup/documentation.html#Parsing%20XML>`_
+		self_closing_tags = []
 		
 		def parse_response(self, response, limit=None):
 			strainer = self.strainer
-			soup = BeautifulStoneSoup(response, selfClosingTags=self._self_closing_tags, parseOnlyThese=strainer)
+			soup = BeautifulStoneSoup(response, selfClosingTags=self.self_closing_tags, parseOnlyThese=strainer)
 			return self.parse_results(soup.findAll(recursive=False, limit=limit))
\ No newline at end of file