1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # ============================================================================
21 # Author : Yves Marcoz
23 # Description : Simple RSS Reader
24 # ============================================================================
27 from os.path import isfile, isdir
28 from shutil import rmtree
29 from os import mkdir, remove, utime
34 from BeautifulSoup import BeautifulSoup
35 from urlparse import urljoin
36 from calendar import timegm
39 return md5.new(string).hexdigest()
42 def __init__(self, configdir, key):
44 self.configdir = configdir
45 self.dir = "%s/%s.d" %(self.configdir, self.key)
46 if not isdir(self.dir):
48 if not isfile("%s/%s.db" %(self.dir, self.key)):
49 self.db = sqlite3.connect("%s/%s.db" %(self.dir, self.key) )
50 self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
51 self.db.execute("CREATE TABLE images (id text, imagePath text);")
54 self.db = sqlite3.connect("%s/%s.db" %(self.dir, self.key) )
56 def addImage(self, configdir, key, baseurl, url):
57 filename = configdir+key+".d/"+getId(url)
58 if not isfile(filename):
60 f = urllib2.urlopen(urljoin(baseurl,url))
61 outf = open(filename, "w")
66 print "Could not download " + url
68 #open(filename,"a").close() # "Touch" the file
69 file = open(filename,"a")
74 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
75 # Expiry time is in hours
77 tmp=feedparser.parse(url, etag = etag, modified = modified)
79 tmp=feedparser.parse(url, etag = etag, modified = modified, handlers = [proxy])
80 expiry = float(expiryTime) * 3600.
83 # Check if the parse was succesful (number of entries > 0, else do nothing)
84 if len(tmp["entries"])>0:
85 currentTime = time.time()
86 # The etag and modified value should only be updated if the content was not null
92 modified = tmp["modified"]
96 f = urllib2.urlopen(urljoin(tmp["feed"]["link"],"/favicon.ico"))
99 outf = open(self.dir+"/favicon.ico", "w")
105 #traceback.print_exc()
109 #reversedEntries = self.getEntries()
110 #reversedEntries.reverse()
114 tmp["entries"].reverse()
115 for entry in tmp["entries"]:
116 date = self.extractDate(entry)
120 entry["title"] = "No Title"
128 entry["author"] = None
129 if(not(entry.has_key("id"))):
131 tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
132 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
133 id = self.generateUniqueId(tmpEntry)
135 #articleTime = time.mktime(self.entries[id]["dateTuple"])
136 soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
138 baseurl = tmpEntry["link"]
143 filename = self.addImage(configdir, self.key, baseurl, img['src'])
144 img['src']="file://%s" %filename
145 count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
147 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
150 traceback.print_exc()
151 print "Error downloading image %s" % img
152 tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
153 file = open(tmpEntry["contentLink"], "w")
154 file.write(soup.prettify())
157 self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
160 values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
161 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
164 # self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
166 # filename = configdir+self.key+".d/"+id+".html"
167 # file = open(filename,"a")
168 # utime(filename, None)
170 # images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
171 # for image in images:
172 # file = open(image[0],"a")
173 # utime(image[0], None)
180 rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
182 self.removeEntry(row[0])
184 from glob import glob
186 for file in glob(configdir+self.key+".d/*"):
190 # put the two dates into matching format
192 lastmodDate = stats[8]
194 expDate = time.time()-expiry*3
195 # check if image-last-modified-date is outdated
197 if expDate > lastmodDate:
201 #print 'Removing', file
203 remove(file) # commented out for testing
207 print 'Could not remove', file
209 rows = self.db.execute("SELECT MAX(date) FROM feed;")
212 return (updateTime, etag, modified)
214 def setEntryRead(self, id):
215 self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
218 def setEntryUnread(self, id):
219 self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
222 def markAllAsRead(self):
223 self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
226 def isEntryRead(self, id):
227 read_status = self.db.execute("SELECT read FROM feed WHERE id=?;", (id,) ).fetchone()[0]
228 return read_status==1 # Returns True if read==1, and False if read==0
230 def getTitle(self, id):
231 return self.db.execute("SELECT title FROM feed WHERE id=?;", (id,) ).fetchone()[0]
233 def getContentLink(self, id):
234 return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
236 def getExternalLink(self, id):
237 return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
239 def getDate(self, id):
240 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
241 return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
243 def getDateTuple(self, id):
244 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
245 return time.localtime(dateStamp)
247 def getDateStamp(self, id):
248 return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
250 def generateUniqueId(self, entry):
251 if(entry["id"] != None):
252 return getId(str(entry["id"]))
254 return getId(str(entry["date"]) + str(entry["title"]))
256 def getIds(self, onlyUnread=False):
258 rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
260 rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
267 def getNextId(self, id):
269 index = ids.index(id)
270 return ids[(index+1)%len(ids)]
272 def getPreviousId(self, id):
274 index = ids.index(id)
275 return ids[(index-1)%len(ids)]
277 def getNumberOfUnreadItems(self):
278 return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
280 def getNumberOfEntries(self):
281 return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
283 def getArticle(self, entry):
284 #self.setEntryRead(id)
285 #entry = self.entries[id]
286 title = entry['title']
287 #content = entry.get('content', entry.get('summary_detail', {}))
288 content = entry["content"]
291 author = entry['author']
292 date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
294 #text = '''<div style="color: black; background-color: white;">'''
295 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
296 text += "<html><head><title>" + title + "</title>"
297 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
298 #text += '<style> body {-webkit-user-select: none;} </style>'
299 text += '</head><body bgcolor=\"#ffffff\"><div><a href=\"' + link + '\">' + title + "</a>"
301 text += "<BR /><small><i>Author: " + author + "</i></small>"
302 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
303 text += "<BR /><BR />"
305 text += "</body></html>"
308 def getContent(self, id):
309 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
311 file = open(self.entries[id]["contentLink"])
312 content = file.read()
315 content = "Content unavailable"
318 def extractDate(self, entry):
319 if entry.has_key("updated_parsed"):
320 return timegm(entry["updated_parsed"])
321 elif entry.has_key("published_parsed"):
322 return timegm(entry["published_parsed"])
326 def extractContent(self, entry):
328 if entry.has_key('summary'):
329 content = entry.get('summary', '')
330 if entry.has_key('content'):
331 if len(entry.content[0].value) > len(content):
332 content = entry.content[0].value
334 content = entry.get('description', '')
337 def removeEntry(self, id):
338 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
341 os.remove(contentLink)
343 print "File not found for deletion: %s" % contentLink
344 self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
345 self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
348 class ArchivedArticles(Feed):
349 def addArchivedArticle(self, title, link, date, configdir):
350 id = self.generateUniqueId({"date":date, "title":title})
351 values = (id, title, link, date, 0, link, 0)
352 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
355 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
357 rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
359 currentTime = time.time()
362 f = urllib2.urlopen(link)
363 #entry["content"] = f.read()
366 soup = BeautifulSoup(html)
370 filename = self.addImage(configdir, self.key, baseurl, img['src'])
372 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
373 contentLink = configdir+self.key+".d/"+id+".html"
374 file = open(contentLink, "w")
375 file.write(soup.prettify())
378 self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
380 return (currentTime, None, None)
382 def purgeReadArticles(self):
383 rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
386 self.removeArticle(row[0])
388 def removeArticle(self, id):
389 rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
392 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
400 # Lists all the feeds in a dictionary, and expose the data
401 def __init__(self, configdir):
402 self.configdir = configdir
404 self.db = sqlite3.connect("%s/feeds.db" % self.configdir)
407 table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
409 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
410 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
411 self.addCategory("Default Category")
412 if isfile(self.configdir+"feeds.pickle"):
413 self.importOldFormatFeeds()
415 self.addFeed("Maemo News", "http://maemo.org/news/items.xml")
417 from string import find, upper
418 if find(upper(table[0]), "WIDGET")<0:
419 self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
420 self.db.execute("UPDATE feeds SET widget=1;")
422 if find(upper(table[0]), "CATEGORY")<0:
423 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
424 self.addCategory("Default Category")
425 self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
426 self.db.execute("UPDATE feeds SET category=1;")
431 def importOldFormatFeeds(self):
432 """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
434 listing = rss.Listing(self.configdir)
436 for id in listing.getListOfFeeds():
439 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
440 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
443 feed = listing.getFeed(id)
444 new_feed = self.getFeed(id)
446 items = feed.getIds()[:]
449 if feed.isEntryRead(item):
453 date = timegm(feed.getDateTuple(item))
454 title = feed.getTitle(item)
455 newId = new_feed.generateUniqueId({"date":date, "title":title})
456 values = (newId, title , feed.getContentLink(item), date, tuple(time.time()), feed.getExternalLink(item), read_status)
457 new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
460 images = feed.getImages(item)
462 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
466 self.updateUnread(id)
469 traceback.print_exc()
470 remove(self.configdir+"feeds.pickle")
473 def addArchivedArticle(self, key, index):
474 feed = self.getFeed(key)
475 title = feed.getTitle(index)
476 link = feed.getExternalLink(index)
477 date = feed.getDate(index)
478 count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
480 self.addFeed("Archived Articles", "", id="ArchivedArticles")
482 archFeed = self.getFeed("ArchivedArticles")
483 archFeed.addArchivedArticle(title, link, date, self.configdir)
484 self.updateUnread("ArchivedArticles")
486 def updateFeed(self, key, expiryTime=24, proxy=None, imageCache=False):
487 feed = self.getFeed(key)
488 db = sqlite3.connect("%s/feeds.db" % self.configdir)
489 (url, etag, modified) = db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
491 modified = time.struct_time(eval(modified))
494 (updateTime, etag, modified) = feed.updateFeed(self.configdir, url, etag, modified, expiryTime, proxy, imageCache)
496 db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, str(tuple(modified)), key) )
498 db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, str(tuple(modified)), key) )
500 self.updateUnread(key, db=db)
502 def getFeed(self, key):
503 if key == "ArchivedArticles":
504 return ArchivedArticles(self.configdir, key)
505 return Feed(self.configdir, key)
507 def editFeed(self, key, title, url, category=None):
509 self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
511 self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
514 def getFeedUpdateTime(self, key):
515 return time.ctime(self.db.execute("SELECT updateTime FROM feeds WHERE id=?;", (key,)).fetchone()[0])
517 def getFeedNumberOfUnreadItems(self, key):
518 return self.db.execute("SELECT unread FROM feeds WHERE id=?;", (key,)).fetchone()[0]
520 def getFeedTitle(self, key):
521 return self.db.execute("SELECT title FROM feeds WHERE id=?;", (key,)).fetchone()[0]
523 def getFeedUrl(self, key):
524 return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
526 def getFeedCategory(self, key):
527 return self.db.execute("SELECT category FROM feeds WHERE id=?;", (key,)).fetchone()[0]
529 def getListOfFeeds(self, category=None):
531 rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
533 rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
540 def getListOfCategories(self):
541 rows = self.db.execute("SELECT id FROM categories ORDER BY rank;" )
548 def getCategoryTitle(self, id):
549 row = self.db.execute("SELECT title FROM categories WHERE id=?;", (id, )).fetchone()
552 def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
553 if order == "Most unread":
554 tmp = "ORDER BY unread DESC"
555 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
556 elif order == "Least unread":
557 tmp = "ORDER BY unread"
558 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
559 elif order == "Most recent":
560 tmp = "ORDER BY updateTime DESC"
561 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
562 elif order == "Least recent":
563 tmp = "ORDER BY updateTime"
564 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
565 else: # order == "Manual" or invalid value...
566 tmp = "ORDER BY rank"
567 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
569 sql = "SELECT id FROM feeds WHERE unread>0 AND category=%s " %category + tmp
571 sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
572 rows = self.db.execute(sql)
579 def getFavicon(self, key):
580 filename = "%s%s.d/favicon.ico" % (self.configdir, key)
586 def updateUnread(self, key, db=None):
589 feed = self.getFeed(key)
590 db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
593 def addFeed(self, title, url, id=None, category=1):
596 count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
598 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
601 values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
602 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
604 # Ask for the feed object, it will create the necessary tables
610 def addCategory(self, title):
611 rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
614 id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
617 self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
620 def removeFeed(self, key):
621 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
622 self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
623 self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
626 if isdir(self.configdir+key+".d/"):
627 rmtree(self.configdir+key+".d/")
629 def removeCategory(self, key):
630 if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
631 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
632 self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
633 self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
634 self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
637 #def saveConfig(self):
638 # self.listOfFeeds["feedingit-order"] = self.sortedKeys
639 # file = open(self.configdir+"feeds.pickle", "w")
640 # pickle.dump(self.listOfFeeds, file)
643 def moveUp(self, key):
644 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
646 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
647 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
650 def moveCategoryUp(self, key):
651 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
653 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
654 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
657 def moveDown(self, key):
658 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
659 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
661 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
662 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
665 def moveCategoryDown(self, key):
666 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
667 max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
669 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
670 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )