From: Stephen Burrows Date: Tue, 1 Mar 2011 22:25:45 +0000 (-0500) Subject: Refactored weight code to split the work over Search, ResultURL, and Click models... X-Git-Tag: philo-0.9~16^2~15^2~2 X-Git-Url: http://git.ithinksw.org/philo.git/commitdiff_plain/55a07da1814663143cc32628edad0b4f4edb752d Refactored weight code to split the work over Search, ResultURL, and Click models. This is probably somewhat less efficient, but it makes more intuitive sense. It also allows for weight caching on instances. Initial work on a results action/view for the SearchAdmin. Set SearchView to have SearchForm set as an attribute on itself rather than blindly using it. --- diff --git a/contrib/sobol/admin.py b/contrib/sobol/admin.py index 5407796..1ebbf5e 100644 --- a/contrib/sobol/admin.py +++ b/contrib/sobol/admin.py @@ -1,12 +1,18 @@ +from django.conf import settings +from django.conf.urls.defaults import patterns, url from django.contrib import admin +from django.core.urlresolvers import reverse from django.db.models import Count +from django.http import HttpResponseRedirect, Http404 +from django.shortcuts import render_to_response +from django.template import RequestContext +from django.utils.functional import update_wrapper from philo.admin import EntityAdmin from philo.contrib.sobol.models import Search, ResultURL, SearchView class ResultURLInline(admin.TabularInline): model = ResultURL - template = 'search/admin/chosen_result_inline.html' readonly_fields = ('url',) can_delete = False extra = 0 @@ -18,6 +24,27 @@ class SearchAdmin(admin.ModelAdmin): inlines = [ResultURLInline] list_display = ['string', 'unique_urls', 'total_clicks'] search_fields = ['string', 'result_urls__url'] + actions = ['results_action'] + if 'grappelli' in settings.INSTALLED_APPS: + results_template = 'admin/sobol/search/grappelli_results.html' + else: + results_template = 'admin/sobol/search/results.html' + + def get_urls(self): + urlpatterns = super(SearchAdmin, self).get_urls() + + def wrap(view): + def wrapper(*args, **kwargs): + return self.admin_site.admin_view(view)(*args, **kwargs) + return update_wrapper(wrapper, view) + + info = self.model._meta.app_label, self.model._meta.module_name + + urlpatterns = patterns('', + url(r'^results/$', wrap(self.results_view), name="%s_%s_selected_results" % info), + url(r'^(.+)/results/$', wrap(self.results_view), name="%s_%s_results" % info) + ) + urlpatterns + return urlpatterns def unique_urls(self, obj): return obj.unique_urls @@ -30,6 +57,35 @@ class SearchAdmin(admin.ModelAdmin): def queryset(self, request): qs = super(SearchAdmin, self).queryset(request) return qs.annotate(total_clicks=Count('result_urls__clicks', distinct=True), unique_urls=Count('result_urls', distinct=True)) + + def results_action(self, request, queryset): + info = self.model._meta.app_label, self.model._meta.module_name + if len(queryset) == 1: + return HttpResponseRedirect(reverse("admin:%s_%s_results" % info, args=(queryset[0].pk,))) + else: + url = reverse("admin:%s_%s_selected_results" % info) + return HttpResponseRedirect("%s?ids=%s" % (url, ','.join([str(item.pk) for item in queryset]))) + results_action.short_description = "View results for selected %(verbose_name_plural)s" + + def results_view(self, request, object_id=None, extra_context=None): + if object_id is not None: + object_ids = [object_id] + else: + object_ids = request.GET.get('ids').split(',') + + if object_ids is None: + raise Http404 + + qs = self.queryset(request).filter(pk__in=object_ids) + opts = self.model._meta + + context = { + 'queryset': qs, + 'opts': opts, + 'root_path': self.admin_site.root_path, + 'app_label': opts.app_label + } + return render_to_response(self.results_template, context, context_instance=RequestContext(request)) class SearchViewAdmin(EntityAdmin): diff --git a/contrib/sobol/models.py b/contrib/sobol/models.py index cd9b698..7e11882 100644 --- a/contrib/sobol/models.py +++ b/contrib/sobol/models.py @@ -1,8 +1,10 @@ from django.conf.urls.defaults import patterns, url from django.contrib import messages +from django.core.exceptions import ValidationError from django.db import models from django.http import HttpResponseRedirect, Http404 from django.utils import simplejson as json +from django.utils.datastructures import SortedDict from philo.contrib.sobol import registry from philo.contrib.sobol.forms import SearchForm from philo.contrib.sobol.utils import HASH_REDIRECT_GET_KEY, URL_REDIRECT_GET_KEY, SEARCH_ARG_GET_KEY, check_redirect_hash @@ -23,51 +25,50 @@ class Search(models.Model): def __unicode__(self): return self.string - def get_favored_results(self, error=5): - """Calculate the set of most-favored results. A higher error - will cause this method to be more reticent about adding new - items.""" - results = self.result_urls.values_list('pk', 'url',) - - result_dict = {} - for pk, url in results: - result_dict[pk] = {'url': url, 'value': 0} - - clicks = Click.objects.filter(result__pk__in=result_dict.keys()).values_list('result__pk', 'datetime') - - now = datetime.datetime.now() - - def datetime_value(dt): - days = (now - dt).days - if days < 0: - raise ValueError("Click dates must be in the past.") - if days == 0: - value = 1.0 - else: - value = 1.0/days**2 - return value - - for pk, dt in clicks: - value = datetime_value(dt) - result_dict[pk]['value'] += value - - #TODO: is there a reasonable minimum value for consideration? - subsets = {} - for d in result_dict.values(): - subsets.setdefault(d['value'], []).append(d) - - # Now calculate the result set. - results = [] + def get_weighted_results(self, threshhold=None): + "Returns this search's results ordered by decreasing weight." + if not hasattr(self, '_weighted_results'): + result_qs = self.result_urls.all() + + if threshhold is not None: + result_qs = result_qs.filter(counts__datetime__gte=threshhold) + + results = [result for result in result_qs] + + results.sort(cmp=lambda x,y: cmp(y.weight, x.weight)) + + self._weighted_results = results - def cost(value): - return error*sum([(value - item['value'])**2 for item in results]) + return self._weighted_results + + def get_favored_results(self, error=5, threshhold=None): + """ + Calculate the set of most-favored results. A higher error + will cause this method to be more reticent about adding new + items. - for value, subset in sorted(subsets.items(), cmp=lambda x,y: cmp(y[0], x[0])): - if value > cost(value): - results += subset - else: - break - return results + The thought is to see whether there are any results which + vastly outstrip the other options. As such, evenly-weighted + results should be grouped together and either added or + excluded as a group. + """ + if not hasattr(self, '_favored_results'): + results = self.get_weighted_results(threshhold) + + grouped_results = SortedDict() + + for result in results: + grouped_results.setdefault(result.weight, []).append(result) + + self._favored_results = [] + + for value, subresults in grouped_results.items(): + cost = error * sum([(value - result.weight)**2 for result in results]) + if value > cost: + self._favored_results += subresults + else: + break + return self._favored_results class Meta: ordering = ['string'] @@ -81,6 +82,18 @@ class ResultURL(models.Model): def __unicode__(self): return self.url + def get_weight(self, threshhold=None): + if not hasattr(self, '_weight'): + clicks = self.clicks.all() + + if threshhold is not None: + clicks = clicks.filter(datetime__gte=threshhold) + + self._weight = sum([click.weight for click in clicks]) + + return self._weight + weight = property(get_weight) + class Meta: ordering = ['url'] @@ -92,6 +105,23 @@ class Click(models.Model): def __unicode__(self): return self.datetime.strftime('%B %d, %Y %H:%M:%S') + def get_weight(self, default=1, weighted=lambda value, days: value/days**2): + if not hasattr(self, '_weight'): + days = (datetime.datetime.now() - self.datetime).days + if days < 0: + raise ValueError("Click dates must be in the past.") + default = float(default) + if days == 0: + self._weight = float(default) + else: + self._weight = weighted(default, days) + return self._weight + weight = property(get_weight) + + def clean(self): + if self.datetime > datetime.datetime.now(): + raise ValidationError("Click dates must be in the past.") + class Meta: ordering = ['datetime'] get_latest_by = 'datetime' @@ -103,6 +133,8 @@ class SearchView(MultiView): enable_ajax_api = models.BooleanField("Enable AJAX API", default=True) placeholder_text = models.CharField(max_length=75, default="Search") + search_form = SearchForm + def __unicode__(self): return u"%s (%s)" % (self.placeholder_text, u", ".join([display for slug, display in registry.iterchoices()])) @@ -130,7 +162,7 @@ class SearchView(MultiView): context.update(extra_context or {}) if SEARCH_ARG_GET_KEY in request.GET: - form = SearchForm(request.GET) + form = self.search_form(request.GET) if form.is_valid(): search_string = request.GET[SEARCH_ARG_GET_KEY].lower() diff --git a/contrib/sobol/templates/admin/sobol/search/grappelli_results.html b/contrib/sobol/templates/admin/sobol/search/grappelli_results.html new file mode 100644 index 0000000..28d5af7 --- /dev/null +++ b/contrib/sobol/templates/admin/sobol/search/grappelli_results.html @@ -0,0 +1,32 @@ +{% extends "admin/base_site.html" %} + + +{% load i18n %} + + +{% block breadcrumbs %} + +{% endblock %} + + +{% block content %} +
+ {% for search in queryset %} +

{{ search.string }}

+
+

{% blocktrans %}Results{% endblocktrans %}

{% comment %}For the favored results, add a class?{% endcomment %} + {% for result in search.get_weighted_results %} +
+
{{ result.url }}
+
{{ result.weight }}
+
+ {% endfor %} +
+ {% endfor %} +
+{% endblock %} \ No newline at end of file diff --git a/contrib/sobol/templates/admin/sobol/search/results.html b/contrib/sobol/templates/admin/sobol/search/results.html new file mode 100644 index 0000000..e69de29