Implemented more robust delayed registry iteration. Modules declaring new searches...
[philo.git] / philo / contrib / sobol / models.py
1 import datetime
2 import itertools
3
4 from django.conf import settings
5 from django.conf.urls.defaults import patterns, url
6 from django.contrib import messages
7 from django.core.exceptions import ValidationError
8 from django.core.validators import URLValidator
9 from django.db import models
10 from django.http import HttpResponseRedirect, Http404, HttpResponse
11 from django.utils import simplejson as json
12 from django.utils.datastructures import SortedDict
13
14 from philo.contrib.sobol import registry
15 from philo.contrib.sobol.forms import SearchForm
16 from philo.contrib.sobol.utils import HASH_REDIRECT_GET_KEY, URL_REDIRECT_GET_KEY, SEARCH_ARG_GET_KEY, check_redirect_hash, RegistryIterator
17 from philo.exceptions import ViewCanNotProvideSubpath
18 from philo.models import MultiView, Page
19 from philo.models.fields import SlugMultipleChoiceField
20
21 eventlet = None
22 if getattr(settings, 'SOBOL_USE_EVENTLET', False):
23         try:
24                 import eventlet
25         except:
26                 pass
27
28
29 class Search(models.Model):
30         string = models.TextField()
31         
32         def __unicode__(self):
33                 return self.string
34         
35         def get_weighted_results(self, threshhold=None):
36                 "Returns this search's results ordered by decreasing weight."
37                 if not hasattr(self, '_weighted_results'):
38                         result_qs = self.result_urls.all()
39                         
40                         if threshhold is not None:
41                                 result_qs = result_qs.filter(counts__datetime__gte=threshhold)
42                         
43                         results = [result for result in result_qs]
44                         
45                         results.sort(cmp=lambda x,y: cmp(y.weight, x.weight))
46                         
47                         self._weighted_results = results
48                 
49                 return self._weighted_results
50         
51         def get_favored_results(self, error=5, threshhold=None):
52                 """
53                 Calculate the set of most-favored results. A higher error
54                 will cause this method to be more reticent about adding new
55                 items.
56                 
57                 The thought is to see whether there are any results which
58                 vastly outstrip the other options. As such, evenly-weighted
59                 results should be grouped together and either added or
60                 excluded as a group.
61                 """
62                 if not hasattr(self, '_favored_results'):
63                         results = self.get_weighted_results(threshhold)
64                         
65                         grouped_results = SortedDict()
66                         
67                         for result in results:
68                                 grouped_results.setdefault(result.weight, []).append(result)
69                         
70                         self._favored_results = []
71                         
72                         for value, subresults in grouped_results.items():
73                                 cost = error * sum([(value - result.weight)**2 for result in self._favored_results])
74                                 if value > cost:
75                                         self._favored_results += subresults
76                                 else:
77                                         break
78                 return self._favored_results
79         
80         class Meta:
81                 ordering = ['string']
82                 verbose_name_plural = 'searches'
83
84
85 class ResultURL(models.Model):
86         search = models.ForeignKey(Search, related_name='result_urls')
87         url = models.TextField(validators=[URLValidator()])
88         
89         def __unicode__(self):
90                 return self.url
91         
92         def get_weight(self, threshhold=None):
93                 if not hasattr(self, '_weight'):
94                         clicks = self.clicks.all()
95                         
96                         if threshhold is not None:
97                                 clicks = clicks.filter(datetime__gte=threshhold)
98                         
99                         self._weight = sum([click.weight for click in clicks])
100                 
101                 return self._weight
102         weight = property(get_weight)
103         
104         class Meta:
105                 ordering = ['url']
106
107
108 class Click(models.Model):
109         result = models.ForeignKey(ResultURL, related_name='clicks')
110         datetime = models.DateTimeField()
111         
112         def __unicode__(self):
113                 return self.datetime.strftime('%B %d, %Y %H:%M:%S')
114         
115         def get_weight(self, default=1, weighted=lambda value, days: value/days**2):
116                 if not hasattr(self, '_weight'):
117                         days = (datetime.datetime.now() - self.datetime).days
118                         if days < 0:
119                                 raise ValueError("Click dates must be in the past.")
120                         default = float(default)
121                         if days == 0:
122                                 self._weight = float(default)
123                         else:
124                                 self._weight = weighted(default, days)
125                 return self._weight
126         weight = property(get_weight)
127         
128         def clean(self):
129                 if self.datetime > datetime.datetime.now():
130                         raise ValidationError("Click dates must be in the past.")
131         
132         class Meta:
133                 ordering = ['datetime']
134                 get_latest_by = 'datetime'
135
136
137 class RegistryChoiceField(SlugMultipleChoiceField):
138         def _get_choices(self):
139                 if isinstance(self._choices, RegistryIterator):
140                         return self._choices.copy()
141                 elif hasattr(self._choices, 'next'):
142                         choices, self._choices = itertools.tee(self._choices)
143                         return choices
144                 else:
145                         return self._choices
146         choices = property(_get_choices)
147
148
149 try:
150         from south.modelsinspector import add_introspection_rules
151 except ImportError:
152         pass
153 else:
154         add_introspection_rules([], ["^philo\.contrib\.shipherd\.models\.RegistryChoiceField"])
155
156
157 class SearchView(MultiView):
158         results_page = models.ForeignKey(Page, related_name='search_results_related')
159         searches = RegistryChoiceField(choices=registry.iterchoices())
160         enable_ajax_api = models.BooleanField("Enable AJAX API", default=True, help_text="Search results will be available <i>only</i> by AJAX, not as template variables.")
161         placeholder_text = models.CharField(max_length=75, default="Search")
162         
163         search_form = SearchForm
164         
165         def __unicode__(self):
166                 return u"%s (%s)" % (self.placeholder_text, u", ".join([display for slug, display in registry.iterchoices() if slug in self.searches]))
167         
168         def get_reverse_params(self, obj):
169                 raise ViewCanNotProvideSubpath
170         
171         @property
172         def urlpatterns(self):
173                 urlpatterns = patterns('',
174                         url(r'^$', self.results_view, name='results'),
175                 )
176                 if self.enable_ajax_api:
177                         urlpatterns += patterns('',
178                                 url(r'^(?P<slug>[\w-]+)$', self.ajax_api_view, name='ajax_api_view')
179                         )
180                 return urlpatterns
181         
182         def get_search_instance(self, slug, search_string):
183                 return registry[slug](search_string.lower())
184         
185         def results_view(self, request, extra_context=None):
186                 results = None
187                 
188                 context = self.get_context()
189                 context.update(extra_context or {})
190                 
191                 if SEARCH_ARG_GET_KEY in request.GET:
192                         form = self.search_form(request.GET)
193                         
194                         if form.is_valid():
195                                 search_string = request.GET[SEARCH_ARG_GET_KEY].lower()
196                                 url = request.GET.get(URL_REDIRECT_GET_KEY)
197                                 hash = request.GET.get(HASH_REDIRECT_GET_KEY)
198                                 
199                                 if url and hash:
200                                         if check_redirect_hash(hash, search_string, url):
201                                                 # Create the necessary models
202                                                 search = Search.objects.get_or_create(string=search_string)[0]
203                                                 result_url = search.result_urls.get_or_create(url=url)[0]
204                                                 result_url.clicks.create(datetime=datetime.datetime.now())
205                                                 return HttpResponseRedirect(url)
206                                         else:
207                                                 messages.add_message(request, messages.INFO, "The link you followed had been tampered with. Here are all the results for your search term instead!")
208                                                 # TODO: Should search_string be escaped here?
209                                                 return HttpResponseRedirect("%s?%s=%s" % (request.path, SEARCH_ARG_GET_KEY, search_string))
210                                 if not self.enable_ajax_api:
211                                         search_instances = []
212                                         if eventlet:
213                                                 pool = eventlet.GreenPool()
214                                         for slug in self.searches:
215                                                 search_instance = self.get_search_instance(slug, search_string)
216                                                 search_instances.append(search_instance)
217                                                 if eventlet:
218                                                         pool.spawn_n(self.make_result_cache, search_instance)
219                                                 else:
220                                                         self.make_result_cache(search_instance)
221                                         if eventlet:
222                                                 pool.waitall()
223                                         context.update({
224                                                 'searches': search_instances
225                                         })
226                                 else:
227                                         context.update({
228                                                 'searches': [{'verbose_name': verbose_name, 'slug': slug, 'url': "%s?%s=%s" % (self.reverse('ajax_api_view', kwargs={'slug': slug}, node=request.node), SEARCH_ARG_GET_KEY, search_string), 'result_template': registry[slug].result_template} for slug, verbose_name in registry.iterchoices() if slug in self.searches]
229                                         })
230                 else:
231                         form = SearchForm()
232                 
233                 context.update({
234                         'form': form
235                 })
236                 return self.results_page.render_to_response(request, extra_context=context)
237         
238         def make_result_cache(self, search_instance):
239                 search_instance.results
240         
241         def ajax_api_view(self, request, slug, extra_context=None):
242                 search_string = request.GET.get(SEARCH_ARG_GET_KEY)
243                 
244                 if not request.is_ajax() or not self.enable_ajax_api or slug not in self.searches or search_string is None:
245                         raise Http404
246                 
247                 search_instance = self.get_search_instance(slug, search_string)
248                 
249                 return HttpResponse(json.dumps({
250                         'results': [result.get_context() for result in search_instance.results],
251                         'rendered': [result.render() for result in search_instance.results]
252                 }))