Clarified sobol Search framework - now automatically finds result templates at "sobol...
[philo.git] / philo / contrib / sobol / models.py
1 import datetime
2 import itertools
3
4 from django.conf import settings
5 from django.conf.urls.defaults import patterns, url
6 from django.contrib import messages
7 from django.core.exceptions import ValidationError
8 from django.core.validators import URLValidator
9 from django.db import models
10 from django.http import HttpResponseRedirect, Http404, HttpResponse
11 from django.utils import simplejson as json
12 from django.utils.datastructures import SortedDict
13
14 from philo.contrib.sobol import registry, get_search_instance
15 from philo.contrib.sobol.forms import SearchForm
16 from philo.contrib.sobol.utils import HASH_REDIRECT_GET_KEY, URL_REDIRECT_GET_KEY, SEARCH_ARG_GET_KEY, check_redirect_hash, RegistryIterator
17 from philo.exceptions import ViewCanNotProvideSubpath
18 from philo.models import MultiView, Page
19 from philo.models.fields import SlugMultipleChoiceField
20
21 eventlet = None
22 if getattr(settings, 'SOBOL_USE_EVENTLET', False):
23         try:
24                 import eventlet
25         except:
26                 pass
27
28
29 class Search(models.Model):
30         """Represents all attempts to search for a unique string."""
31         #: The string which was searched for.
32         string = models.TextField()
33         
34         def __unicode__(self):
35                 return self.string
36         
37         def get_weighted_results(self, threshhold=None):
38                 """
39                 Returns a list of :class:`ResultURL` instances related to the search and ordered by decreasing weight. This will be cached on the instance.
40                 
41                 :param threshhold: The earliest datetime that a :class:`Click` can have been made on a related :class:`ResultURL` in order to be included in the weighted results (or ``None`` to include all :class:`Click`\ s and :class:`ResultURL`\ s).
42                 
43                 """
44                 if not hasattr(self, '_weighted_results'):
45                         result_qs = self.result_urls.all()
46                         
47                         if threshhold is not None:
48                                 result_qs = result_qs.filter(counts__datetime__gte=threshhold)
49                         
50                         results = [result for result in result_qs]
51                         
52                         results.sort(cmp=lambda x,y: cmp(y.weight, x.weight))
53                         
54                         self._weighted_results = results
55                 
56                 return self._weighted_results
57         
58         def get_favored_results(self, error=5, threshhold=None):
59                 """
60                 Calculates the set of most-favored results based on their weight. Evenly-weighted results will be grouped together and either added or excluded as a group.
61                 
62                 :param error: An arbitrary number; higher values will cause this method to be more reticent about adding new items to the favored results.
63                 :param threshhold: Will be passed directly into :meth:`get_weighted_results`
64                 
65                 """
66                 if not hasattr(self, '_favored_results'):
67                         results = self.get_weighted_results(threshhold)
68                         
69                         grouped_results = SortedDict()
70                         
71                         for result in results:
72                                 grouped_results.setdefault(result.weight, []).append(result)
73                         
74                         self._favored_results = []
75                         
76                         for value, subresults in grouped_results.items():
77                                 cost = error * sum([(value - result.weight)**2 for result in self._favored_results])
78                                 if value > cost:
79                                         self._favored_results += subresults
80                                 else:
81                                         break
82                         if len(self._favored_results) == len(results):
83                                 self._favored_results = []
84                 return self._favored_results
85         
86         class Meta:
87                 ordering = ['string']
88                 verbose_name_plural = 'searches'
89
90
91 class ResultURL(models.Model):
92         """Represents a URL which has been selected one or more times for a :class:`Search`."""
93         #: A :class:`ForeignKey` to the :class:`Search` which the :class:`ResultURL` is related to.
94         search = models.ForeignKey(Search, related_name='result_urls')
95         #: The URL which was selected.
96         url = models.TextField(validators=[URLValidator()])
97         
98         def __unicode__(self):
99                 return self.url
100         
101         def get_weight(self, threshhold=None):
102                 """
103                 Calculates, caches, and returns the weight of the :class:`ResultURL`.
104                 
105                 :param threshhold: The datetime limit before which :class:`Click`\ s will not contribute to the weight of the :class:`ResultURL`.
106                 
107                 """
108                 if not hasattr(self, '_weight'):
109                         clicks = self.clicks.all()
110                         
111                         if threshhold is not None:
112                                 clicks = clicks.filter(datetime__gte=threshhold)
113                         
114                         self._weight = sum([click.weight for click in clicks])
115                 
116                 return self._weight
117         weight = property(get_weight)
118         
119         class Meta:
120                 ordering = ['url']
121
122
123 class Click(models.Model):
124         """Represents a click on a :class:`ResultURL`."""
125         #: A :class:`ForeignKey` to the :class:`ResultURL` which the :class:`Click` is related to.
126         result = models.ForeignKey(ResultURL, related_name='clicks')
127         #: The datetime when the click was registered in the system.
128         datetime = models.DateTimeField()
129         
130         def __unicode__(self):
131                 return self.datetime.strftime('%B %d, %Y %H:%M:%S')
132         
133         def get_weight(self, default=1, weighted=lambda value, days: value/days**2):
134                 """Calculates and returns the weight of the :class:`Click`."""
135                 if not hasattr(self, '_weight'):
136                         days = (datetime.datetime.now() - self.datetime).days
137                         if days < 0:
138                                 raise ValueError("Click dates must be in the past.")
139                         default = float(default)
140                         if days == 0:
141                                 self._weight = float(default)
142                         else:
143                                 self._weight = weighted(default, days)
144                 return self._weight
145         weight = property(get_weight)
146         
147         def clean(self):
148                 if self.datetime > datetime.datetime.now():
149                         raise ValidationError("Click dates must be in the past.")
150         
151         class Meta:
152                 ordering = ['datetime']
153                 get_latest_by = 'datetime'
154
155
156 class RegistryChoiceField(SlugMultipleChoiceField):
157         def _get_choices(self):
158                 if isinstance(self._choices, RegistryIterator):
159                         return self._choices.copy()
160                 elif hasattr(self._choices, 'next'):
161                         choices, self._choices = itertools.tee(self._choices)
162                         return choices
163                 else:
164                         return self._choices
165         choices = property(_get_choices)
166
167
168 try:
169         from south.modelsinspector import add_introspection_rules
170 except ImportError:
171         pass
172 else:
173         add_introspection_rules([], ["^philo\.contrib\.sobol\.models\.RegistryChoiceField"])
174
175
176 class SearchView(MultiView):
177         """Handles a view for the results of a search, anonymously tracks the selections made by end users, and provides an AJAX API for asynchronous search result loading. This can be particularly useful if some searches are slow."""
178         #: :class:`ForeignKey` to a :class:`.Page` which will be used to render the search results.
179         results_page = models.ForeignKey(Page, related_name='search_results_related')
180         #: A :class:`.SlugMultipleChoiceField` whose choices are the contents of the :class:`.SearchRegistry`
181         searches = RegistryChoiceField(choices=registry.iterchoices())
182         #: A :class:`BooleanField` which controls whether or not the AJAX API is enabled.
183         #:
184         #: .. note:: If the AJAX API is enabled, a ``ajax_api_url`` attribute will be added to each search instance containing the url and get parameters for an AJAX request to retrieve results for that search.
185         #:
186         #: .. note:: Be careful not to access :attr:`search_instance.results <.BaseSearch.results>` if the AJAX API is enabled - otherwise the search will be run immediately rather than on the AJAX request.
187         enable_ajax_api = models.BooleanField("Enable AJAX API", default=True)
188         #: A :class:`CharField` containing the placeholder text which is intended to be used for the search box for the :class:`SearchView`. It is the template author's responsibility to make use of this information.
189         placeholder_text = models.CharField(max_length=75, default="Search")
190         
191         #: The form which will be used to validate the input to the search box for this :class:`SearchView`.
192         search_form = SearchForm
193         
194         def __unicode__(self):
195                 return u"%s (%s)" % (self.placeholder_text, u", ".join([display for slug, display in registry.iterchoices() if slug in self.searches]))
196         
197         def get_reverse_params(self, obj):
198                 raise ViewCanNotProvideSubpath
199         
200         @property
201         def urlpatterns(self):
202                 urlpatterns = patterns('',
203                         url(r'^$', self.results_view, name='results'),
204                 )
205                 if self.enable_ajax_api:
206                         urlpatterns += patterns('',
207                                 url(r'^(?P<slug>[\w-]+)$', self.ajax_api_view, name='ajax_api_view')
208                         )
209                 return urlpatterns
210         
211         def results_view(self, request, extra_context=None):
212                 """
213                 Renders :attr:`results_page` with a context containing an instance of :attr:`search_form`. If the form was submitted and was valid, then one of two things has happened:
214                 
215                 * A search has been initiated. In this case, a list of search instances will be added to the context as ``searches``. If :attr:`enable_ajax_api` is enabled, each instance will have an ``ajax_api_url`` attribute containing the url needed to make an AJAX request for the search results.
216                 * A link has been chosen. In this case, corresponding :class:`Search`, :class:`ResultURL`, and :class:`Click` instances will be created and the user will be redirected to the link's actual url.
217                 
218                 """
219                 results = None
220                 
221                 context = self.get_context()
222                 context.update(extra_context or {})
223                 
224                 if SEARCH_ARG_GET_KEY in request.GET:
225                         form = self.search_form(request.GET)
226                         
227                         if form.is_valid():
228                                 search_string = request.GET[SEARCH_ARG_GET_KEY].lower()
229                                 url = request.GET.get(URL_REDIRECT_GET_KEY)
230                                 hash = request.GET.get(HASH_REDIRECT_GET_KEY)
231                                 
232                                 if url and hash:
233                                         if check_redirect_hash(hash, search_string, url):
234                                                 # Create the necessary models
235                                                 search = Search.objects.get_or_create(string=search_string)[0]
236                                                 result_url = search.result_urls.get_or_create(url=url)[0]
237                                                 result_url.clicks.create(datetime=datetime.datetime.now())
238                                                 return HttpResponseRedirect(url)
239                                         else:
240                                                 messages.add_message(request, messages.INFO, "The link you followed had been tampered with. Here are all the results for your search term instead!")
241                                                 # TODO: Should search_string be escaped here?
242                                                 return HttpResponseRedirect("%s?%s=%s" % (request.path, SEARCH_ARG_GET_KEY, search_string))
243                                 
244                                 search_instances = []
245                                 for slug in self.searches:
246                                         if slug in registry:
247                                                 search_instance = get_search_instance(slug, search_string)
248                                                 search_instances.append(search_instance)
249                                         
250                                                 if self.enable_ajax_api:
251                                                         search_instance.ajax_api_url = "%s?%s=%s" % (self.reverse('ajax_api_view', kwargs={'slug': slug}, node=request.node), SEARCH_ARG_GET_KEY, search_string)
252                                 
253                                 if eventlet and not self.enable_ajax_api:
254                                         pool = eventlet.GreenPool()
255                                         for instance in search_instances:
256                                                 pool.spawn_n(lambda x: x.results, search_instance)
257                                         pool.waitall()
258                                 
259                                 context.update({
260                                         'searches': search_instances,
261                                         'favored_results': []
262                                 })
263                                 
264                                 try:
265                                         search = Search.objects.get(string=search_string)
266                                 except Search.DoesNotExist:
267                                         pass
268                                 else:
269                                         context['favored_results'] = [r.url for r in search.get_favored_results()]
270                 else:
271                         form = SearchForm()
272                 
273                 context.update({
274                         'form': form
275                 })
276                 return self.results_page.render_to_response(request, extra_context=context)
277         
278         def ajax_api_view(self, request, slug, extra_context=None):
279                 """
280                 Returns a JSON object containing the following variables:
281                 
282                 search
283                         Contains the slug for the search.
284                 results
285                         Contains the results of :meth:`.Result.get_context` for each result.
286                 rendered
287                         Contains the results of :meth:`.Result.render` for each result.
288                 hasMoreResults
289                         ``True`` or ``False`` whether the search has more results according to :meth:`BaseSearch.has_more_results`
290                 moreResultsURL
291                         Contains ``None`` or a querystring which, once accessed, will note the :class:`Click` and redirect the user to a page containing more results.
292                 
293                 """
294                 search_string = request.GET.get(SEARCH_ARG_GET_KEY)
295                 
296                 if not request.is_ajax() or not self.enable_ajax_api or slug not in registry or slug not in self.searches or search_string is None:
297                         raise Http404
298                 
299                 search_instance = get_search_instance(slug, search_string)
300                 
301                 return HttpResponse(json.dumps({
302                         'search': search_instance.slug,
303                         'results': [result.get_context() for result in search_instance.results],
304                         'rendered': [result.render() for result in search_instance.results],
305                         'hasMoreResults': search_instance.has_more_results,
306                         'moreResultsURL': search_instance.more_results_url,
307                 }), mimetype="application/json")