Refactored weight code to split the work over Search, ResultURL, and Click models...
[philo.git] / contrib / sobol / models.py
index 8113750..7e11882 100644 (file)
@@ -1,13 +1,16 @@
 from django.conf.urls.defaults import patterns, url
 from django.contrib import messages
+from django.core.exceptions import ValidationError
 from django.db import models
 from django.http import HttpResponseRedirect, Http404
 from django.utils import simplejson as json
+from django.utils.datastructures import SortedDict
 from philo.contrib.sobol import registry
 from philo.contrib.sobol.forms import SearchForm
 from philo.contrib.sobol.utils import HASH_REDIRECT_GET_KEY, URL_REDIRECT_GET_KEY, SEARCH_ARG_GET_KEY, check_redirect_hash
 from philo.exceptions import ViewCanNotProvideSubpath
-from philo.models import MultiView, Page, SlugMultipleChoiceField
+from philo.models import MultiView, Page
+from philo.models.fields import SlugMultipleChoiceField
 from philo.validators import RedirectValidator
 import datetime
 try:
@@ -20,7 +23,52 @@ class Search(models.Model):
        string = models.TextField()
        
        def __unicode__(self):
-               return self.search_string
+               return self.string
+       
+       def get_weighted_results(self, threshhold=None):
+               "Returns this search's results ordered by decreasing weight."
+               if not hasattr(self, '_weighted_results'):
+                       result_qs = self.result_urls.all()
+                       
+                       if threshhold is not None:
+                               result_qs = result_qs.filter(counts__datetime__gte=threshhold)
+                       
+                       results = [result for result in result_qs]
+                       
+                       results.sort(cmp=lambda x,y: cmp(y.weight, x.weight))
+                       
+                       self._weighted_results = results
+               
+               return self._weighted_results
+       
+       def get_favored_results(self, error=5, threshhold=None):
+               """
+               Calculate the set of most-favored results. A higher error
+               will cause this method to be more reticent about adding new
+               items.
+               
+               The thought is to see whether there are any results which
+               vastly outstrip the other options. As such, evenly-weighted
+               results should be grouped together and either added or
+               excluded as a group.
+               """
+               if not hasattr(self, '_favored_results'):
+                       results = self.get_weighted_results(threshhold)
+                       
+                       grouped_results = SortedDict()
+                       
+                       for result in results:
+                               grouped_results.setdefault(result.weight, []).append(result)
+                       
+                       self._favored_results = []
+                       
+                       for value, subresults in grouped_results.items():
+                               cost = error * sum([(value - result.weight)**2 for result in results])
+                               if value > cost:
+                                       self._favored_results += subresults
+                               else:
+                                       break
+               return self._favored_results
        
        class Meta:
                ordering = ['string']
@@ -34,6 +82,18 @@ class ResultURL(models.Model):
        def __unicode__(self):
                return self.url
        
+       def get_weight(self, threshhold=None):
+               if not hasattr(self, '_weight'):
+                       clicks = self.clicks.all()
+                       
+                       if threshhold is not None:
+                               clicks = clicks.filter(datetime__gte=threshhold)
+                       
+                       self._weight = sum([click.weight for click in clicks])
+               
+               return self._weight
+       weight = property(get_weight)
+       
        class Meta:
                ordering = ['url']
 
@@ -45,6 +105,23 @@ class Click(models.Model):
        def __unicode__(self):
                return self.datetime.strftime('%B %d, %Y %H:%M:%S')
        
+       def get_weight(self, default=1, weighted=lambda value, days: value/days**2):
+               if not hasattr(self, '_weight'):
+                       days = (datetime.datetime.now() - self.datetime).days
+                       if days < 0:
+                               raise ValueError("Click dates must be in the past.")
+                       default = float(default)
+                       if days == 0:
+                               self._weight = float(default)
+                       else:
+                               self._weight = weighted(default, days)
+               return self._weight
+       weight = property(get_weight)
+       
+       def clean(self):
+               if self.datetime > datetime.datetime.now():
+                       raise ValidationError("Click dates must be in the past.")
+       
        class Meta:
                ordering = ['datetime']
                get_latest_by = 'datetime'
@@ -53,9 +130,14 @@ class Click(models.Model):
 class SearchView(MultiView):
        results_page = models.ForeignKey(Page, related_name='search_results_related')
        searches = SlugMultipleChoiceField(choices=registry.iterchoices())
-       allow_partial_loading = models.BooleanField(default=True)
+       enable_ajax_api = models.BooleanField("Enable AJAX API", default=True)
        placeholder_text = models.CharField(max_length=75, default="Search")
        
+       search_form = SearchForm
+       
+       def __unicode__(self):
+               return u"%s (%s)" % (self.placeholder_text, u", ".join([display for slug, display in registry.iterchoices()]))
+       
        def get_reverse_params(self, obj):
                raise ViewCanNotProvideSubpath
        
@@ -64,12 +146,15 @@ class SearchView(MultiView):
                urlpatterns = patterns('',
                        url(r'^$', self.results_view, name='results'),
                )
-               if self.allow_partial_loading:
+               if self.enable_ajax_api:
                        urlpatterns += patterns('',
-                               url(r'^(?P<slug>[\w-]+)/?', self.partial_ajax_results_view, name='partial_ajax_results_view')
+                               url(r'^(?P<slug>[\w-]+)', self.ajax_api_view, name='ajax_api_view')
                        )
                return urlpatterns
        
+       def get_search_instance(self, slug, search_string):
+               return registry[slug](search_string.lower())
+       
        def results_view(self, request, extra_context=None):
                results = None
                
@@ -77,7 +162,7 @@ class SearchView(MultiView):
                context.update(extra_context or {})
                
                if SEARCH_ARG_GET_KEY in request.GET:
-                       form = SearchForm(request.GET)
+                       form = self.search_form(request.GET)
                        
                        if form.is_valid():
                                search_string = request.GET[SEARCH_ARG_GET_KEY].lower()
@@ -95,13 +180,12 @@ class SearchView(MultiView):
                                                messages.add_message(request, messages.INFO, "The link you followed had been tampered with. Here are all the results for your search term instead!")
                                                # TODO: Should search_string be escaped here?
                                                return HttpResponseRedirect("%s?%s=%s" % (request.path, SEARCH_ARG_GET_KEY, search_string))
-                               if not self.allow_partial_loading:
+                               if not self.enable_ajax_api:
                                        search_instances = []
                                        if eventlet:
                                                pool = eventlet.GreenPool()
                                        for slug in self.searches:
-                                               search = registry[slug]
-                                               search_instance = search(search_string)
+                                               search_instance = self.get_search_instance(slug, search_string)
                                                search_instances.append(search_instance)
                                                if eventlet:
                                                        pool.spawn_n(self.make_result_cache, search_instance)
@@ -114,6 +198,7 @@ class SearchView(MultiView):
                                        })
                else:
                        form = SearchForm()
+               
                context.update({
                        'form': form
                })
@@ -122,17 +207,15 @@ class SearchView(MultiView):
        def make_result_cache(self, search_instance):
                search_instance.results
        
-       def partial_ajax_results_view(self, request, slug, extra_context=None):
+       def ajax_api_view(self, request, slug, extra_context=None):
                search_string = request.GET.get(SEARCH_ARG_GET_KEY)
                
-               if not request.is_ajax() or not self.allow_partial_loading or slug not in self.searches or search_string is None:
+               if not request.is_ajax() or not self.enable_ajax_api or slug not in self.searches or search_string is None:
                        raise Http404
                
-               search = registry[slug]
-               search_instance = search(search_string.lower())
-               results = search_instance.results
+               search_instance = self.get_search_instance(slug, search_string)
                response = json.dumps({
-                       'results': results,
-                       'template': search_instance.get_ajax_result_template()
+                       'results': search_instance.results,
+                       'template': search_instance.get_template()
                })
                return response
\ No newline at end of file