24dcdf44c728f22374711922958dd687c4dcee87
[~jspiros/reader.git] /
1 from .models import Feed, Entry
2 import datetime
3 import feedparser
4
5
6 def _rate_content(content):
7         if content.type == 'application/xhtml+xml':
8                 return 0
9         elif content.type == 'text/html':
10                 return 1
11         elif content.type == 'text/plain':
12                 return 2
13         else:
14                 return 3
15
16
17 def _choose_content(contents):
18         limited_contents = [content for content in contents if content.type in ('application/xhtml+xml', 'text/html', 'text/plain')]
19         limited_contents.sort(key=_rate_content)
20         return limited_contents[0] if len(limited_contents) > 0 else None
21
22
23 def _parse_date(date):
24         try:
25                 return datetime.datetime(*(date[0:6]))
26         except:
27                 return None
28
29
30 def _add_entry(feed, parsed_entry):
31         title = parsed_entry.get('title', 'Untitled')
32         link = parsed_entry.get('link', feed.link)
33         published = _parse_date(parsed_entry.get('published_parsed', parsed_entry.get('created_parsed', parsed_entry.get('updated_parsed', None))))
34         if not published:
35                 published = datetime.datetime.now()
36         updated = _parse_date(parsed_entry.get('updated_parsed', None))
37         if not updated:
38                 updated = published
39         contents = parsed_entry.get('content', None)
40         if contents:
41                 content = _choose_content(contents).value
42         else:
43                 content = None
44         summary = parsed_entry.get('summary', None)
45         
46         if summary or content:
47                 entry, created = feed.entries.get_or_create(uri=parsed_entry.id, defaults={
48                         'title': title,
49                         'link': link,
50                         'published': published,
51                         'updated': updated,
52                         'summary': summary,
53                         'content': content
54                 })
55                 if not created:
56                         entry.title = title
57                         entry.link = link
58                         entry.published = published
59                         entry.updated = updated
60                         entry.summary = summary
61                         entry.content = content
62                         entry.save()
63
64
65 def _add_entries(feed, parsed):
66         for parsed_entry in parsed.entries:
67                 _add_entry(feed, parsed_entry)
68
69
70 def refresh_feed(feed):
71         if feed.alive:
72                 parsed = feedparser.parse(feed.url, etag=feed.etag, modified=(feed.modified.timetuple() if feed.modified else None))
73                 if parsed.get('status', None) == 304:
74                         return
75                 if parsed.get('status', None) == 301 and parsed.has_key('href'):
76                         feed.url = parsed.href
77                 if parsed.get('status', None) == 410:
78                         feed.alive = False
79                 if parsed.has_key('etag'):
80                         feed.etag = parsed.etag
81                 if parsed.has_key('modified'):
82                         feed.modified = datetime.datetime(*(parsed.modified[0:6]))
83                 feed.title = parsed.feed.get('title', feed.url)
84                 feed.updated = _parse_date(parsed.feed.get('updated_parsed', datetime.datetime.now().timetuple())[0:6])
85                 feed.link = parsed.feed.get('link', feed.url)
86                 
87                 feed.save()
88                 _add_entries(feed, parsed)
89
90
91 def refresh_all_feeds():
92         for feed in Feed.objects.all():
93                 refresh_feed(feed)
94
95
96 def add_feed(url):
97         feed = Feed(url=url)
98         refresh_feed(feed)
99         return feed