1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # ============================================================================
21 # Author : Yves Marcoz
23 # Description : Simple RSS Reader
24 # ============================================================================
27 from os.path import isfile, isdir
28 from shutil import rmtree
29 from os import mkdir, remove, utime
34 from BeautifulSoup import BeautifulSoup
35 from urlparse import urljoin
36 from calendar import timegm
39 return md5.new(string).hexdigest()
42 def __init__(self, configdir, key):
44 self.configdir = configdir
45 self.dir = "%s/%s.d" %(self.configdir, self.key)
46 if not isdir(self.dir):
48 if not isfile("%s/%s.db" %(self.dir, self.key)):
49 self.db = sqlite3.connect("%s/%s.db" %(self.dir, self.key) )
50 self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
51 self.db.execute("CREATE TABLE images (id text, imagePath text);")
54 self.db = sqlite3.connect("%s/%s.db" %(self.dir, self.key) )
56 def addImage(self, configdir, key, baseurl, url):
57 filename = configdir+key+".d/"+getId(url)
58 if not isfile(filename):
60 f = urllib2.urlopen(urljoin(baseurl,url))
61 outf = open(filename, "w")
66 print "Could not download " + url
68 #open(filename,"a").close() # "Touch" the file
69 file = open(filename,"a")
74 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
75 # Expiry time is in hours
77 tmp=feedparser.parse(url, etag = etag, modified = modified)
79 tmp=feedparser.parse(url, etag = etag, modified = modified, handlers = [proxy])
80 expiry = float(expiryTime) * 3600.
83 # Check if the parse was succesful (number of entries > 0, else do nothing)
84 if len(tmp["entries"])>0:
85 currentTime = time.time()
86 # The etag and modified value should only be updated if the content was not null
92 modified = tmp["modified"]
96 f = urllib2.urlopen(urljoin(tmp["feed"]["link"],"/favicon.ico"))
99 outf = open(self.dir+"/favicon.ico", "w")
105 #traceback.print_exc()
109 #reversedEntries = self.getEntries()
110 #reversedEntries.reverse()
114 tmp["entries"].reverse()
115 for entry in tmp["entries"]:
116 date = self.extractDate(entry)
120 entry["title"] = "No Title"
128 entry["author"] = None
129 tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
130 "date":date, "link":entry["link"], "author":entry["author"]}
131 id = self.generateUniqueId(tmpEntry)
133 #articleTime = time.mktime(self.entries[id]["dateTuple"])
135 soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
137 baseurl = tmpEntry["link"]
141 filename = self.addImage(configdir, self.key, baseurl, img['src'])
143 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
146 traceback.print_exc()
147 print "Error downloading image %s" % img
148 tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
149 file = open(tmpEntry["contentLink"], "w")
150 file.write(soup.prettify())
152 values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
153 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
156 self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
158 filename = configdir+self.key+".d/"+id+".html"
159 file = open(filename,"a")
160 utime(filename, None)
162 images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
164 file = open(image[0],"a")
165 utime(image[0], None)
172 rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
174 self.removeEntry(row[0])
176 from glob import glob
178 for file in glob(configdir+self.key+".d/*"):
182 # put the two dates into matching format
184 lastmodDate = stats[8]
186 expDate = time.time()-expiry*3
187 # check if image-last-modified-date is outdated
189 if expDate > lastmodDate:
193 #print 'Removing', file
195 remove(file) # commented out for testing
199 print 'Could not remove', file
201 rows = self.db.execute("SELECT MAX(date) FROM feed;")
204 return (updateTime, etag, modified)
206 def setEntryRead(self, id):
207 self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
210 def setEntryUnread(self, id):
211 self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
214 def markAllAsRead(self):
215 self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
218 def isEntryRead(self, id):
219 read_status = self.db.execute("SELECT read FROM feed WHERE id=?;", (id,) ).fetchone()[0]
220 return read_status==1 # Returns True if read==1, and False if read==0
222 def getTitle(self, id):
223 return self.db.execute("SELECT title FROM feed WHERE id=?;", (id,) ).fetchone()[0]
225 def getContentLink(self, id):
226 return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
228 def getExternalLink(self, id):
229 return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
231 def getDate(self, id):
232 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
233 return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
235 def getDateTuple(self, id):
236 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
237 return time.localtime(dateStamp)
239 def getDateStamp(self, id):
240 return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
242 def generateUniqueId(self, entry):
243 return getId(str(entry["date"]) + str(entry["title"]))
245 def getIds(self, onlyUnread=False):
247 rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
249 rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
256 def getNextId(self, id):
258 index = ids.index(id)
259 return ids[(index+1)%len(ids)]
261 def getPreviousId(self, id):
263 index = ids.index(id)
264 return ids[(index-1)%len(ids)]
266 def getNumberOfUnreadItems(self):
267 return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
269 def getNumberOfEntries(self):
270 return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
272 def getArticle(self, entry):
273 #self.setEntryRead(id)
274 #entry = self.entries[id]
275 title = entry['title']
276 #content = entry.get('content', entry.get('summary_detail', {}))
277 content = entry["content"]
280 author = entry['author']
281 date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
283 #text = '''<div style="color: black; background-color: white;">'''
284 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
285 text += "<html><head><title>" + title + "</title>"
286 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
287 #text += '<style> body {-webkit-user-select: none;} </style>'
288 text += '</head><body><div><a href=\"' + link + '\">' + title + "</a>"
290 text += "<BR /><small><i>Author: " + author + "</i></small>"
291 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
292 text += "<BR /><BR />"
294 text += "</body></html>"
297 def getContent(self, id):
298 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
300 file = open(self.entries[id]["contentLink"])
301 content = file.read()
304 content = "Content unavailable"
307 def extractDate(self, entry):
308 if entry.has_key("updated_parsed"):
309 return timegm(entry["updated_parsed"])
310 elif entry.has_key("published_parsed"):
311 return timegm(entry["published_parsed"])
315 def extractContent(self, entry):
317 if entry.has_key('summary'):
318 content = entry.get('summary', '')
319 if entry.has_key('content'):
320 if len(entry.content[0].value) > len(content):
321 content = entry.content[0].value
323 content = entry.get('description', '')
326 def removeEntry(self, id):
327 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
330 os.remove(contentLink)
332 print "File not found for deletion: %s" % contentLink
333 self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
334 self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
337 class ArchivedArticles(Feed):
338 def addArchivedArticle(self, title, link, date, configdir):
339 id = self.generateUniqueId({"date":date, "title":title})
340 values = (id, title, link, date, 0, link, 0)
341 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
344 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
346 rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
348 currentTime = time.time()
351 f = urllib2.urlopen(link)
352 #entry["content"] = f.read()
355 soup = BeautifulSoup(html)
359 filename = self.addImage(configdir, self.key, baseurl, img['src'])
361 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
362 contentLink = configdir+self.key+".d/"+id+".html"
363 file = open(contentLink, "w")
364 file.write(soup.prettify())
367 self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
369 return (currentTime, None, None)
371 def purgeReadArticles(self):
372 rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
375 self.removeArticle(row[0])
377 def removeArticle(self, id):
378 rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
381 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
389 # Lists all the feeds in a dictionary, and expose the data
390 def __init__(self, configdir):
391 self.configdir = configdir
393 self.db = sqlite3.connect("%s/feeds.db" % self.configdir)
396 table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
398 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
399 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
400 self.addCategory("Default Category")
401 if isfile(self.configdir+"feeds.pickle"):
402 self.importOldFormatFeeds()
404 self.addFeed("Maemo News", "http://maemo.org/news/items.xml")
406 from string import find, upper
407 if find(upper(table[0]), "WIDGET")<0:
408 self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
409 self.db.execute("UPDATE feeds SET widget=1;")
411 if find(upper(table[0]), "CATEGORY")<0:
412 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
413 self.addCategory("Default Category")
414 self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
415 self.db.execute("UPDATE feeds SET category=1;")
420 def importOldFormatFeeds(self):
421 """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
423 listing = rss.Listing(self.configdir)
425 for id in listing.getListOfFeeds():
428 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
429 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
432 feed = listing.getFeed(id)
433 new_feed = self.getFeed(id)
435 items = feed.getIds()[:]
438 if feed.isEntryRead(item):
442 date = timegm(feed.getDateTuple(item))
443 title = feed.getTitle(item)
444 newId = new_feed.generateUniqueId({"date":date, "title":title})
445 values = (newId, title , feed.getContentLink(item), date, time.time(), feed.getExternalLink(item), read_status)
446 new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
449 images = feed.getImages(item)
451 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
455 self.updateUnread(id)
458 traceback.print_exc()
459 remove(self.configdir+"feeds.pickle")
462 def addArchivedArticle(self, key, index):
463 feed = self.getFeed(key)
464 title = feed.getTitle(index)
465 link = feed.getExternalLink(index)
466 date = feed.getDate(index)
467 count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
469 self.addFeed("Archived Articles", "", id="ArchivedArticles")
471 archFeed = self.getFeed("ArchivedArticles")
472 archFeed.addArchivedArticle(title, link, date, self.configdir)
473 self.updateUnread("ArchivedArticles")
475 def updateFeed(self, key, expiryTime=24, proxy=None, imageCache=False):
476 feed = self.getFeed(key)
477 db = sqlite3.connect("%s/feeds.db" % self.configdir)
478 (url, etag, modified) = db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
479 (updateTime, etag, modified) = feed.updateFeed(self.configdir, url, etag, eval(modified), expiryTime, proxy, imageCache)
481 db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, str(modified), key) )
483 db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, str(modified), key) )
485 self.updateUnread(key, db=db)
487 def getFeed(self, key):
488 if key == "ArchivedArticles":
489 return ArchivedArticles(self.configdir, key)
490 return Feed(self.configdir, key)
492 def editFeed(self, key, title, url, category=None):
494 self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
496 self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
499 def getFeedUpdateTime(self, key):
500 return time.ctime(self.db.execute("SELECT updateTime FROM feeds WHERE id=?;", (key,)).fetchone()[0])
502 def getFeedNumberOfUnreadItems(self, key):
503 return self.db.execute("SELECT unread FROM feeds WHERE id=?;", (key,)).fetchone()[0]
505 def getFeedTitle(self, key):
506 return self.db.execute("SELECT title FROM feeds WHERE id=?;", (key,)).fetchone()[0]
508 def getFeedUrl(self, key):
509 return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
511 def getListOfFeeds(self, category=None):
513 rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
515 rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
522 def getListOfCategories(self):
523 rows = self.db.execute("SELECT id FROM categories ORDER BY rank;" )
530 def getCategoryTitle(self, id):
532 row = self.db.execute("SELECT title FROM categories WHERE id=?;", (id, )).fetchone()
535 def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
536 if order == "Most unread":
537 tmp = "ORDER BY unread DESC"
538 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
539 elif order == "Least unread":
540 tmp = "ORDER BY unread"
541 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
542 elif order == "Most recent":
543 tmp = "ORDER BY updateTime DESC"
544 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
545 elif order == "Least recent":
546 tmp = "ORDER BY updateTime"
547 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
548 else: # order == "Manual" or invalid value...
549 tmp = "ORDER BY rank"
550 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
552 sql = "SELECT id FROM feeds WHERE unread>0 WHERE category=%s" %category + tmp
554 sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
555 rows = self.db.execute(sql)
562 def getFavicon(self, key):
563 filename = "%s%s.d/favicon.ico" % (self.configdir, key)
569 def updateUnread(self, key, db=None):
572 feed = self.getFeed(key)
573 db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
576 def addFeed(self, title, url, id=None, category=1):
579 count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
581 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
584 values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
585 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
587 # Ask for the feed object, it will create the necessary tables
593 def addCategory(self, title):
594 rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
597 id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
600 self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
603 def removeFeed(self, key):
604 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
605 self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
606 self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
609 if isdir(self.configdir+key+".d/"):
610 rmtree(self.configdir+key+".d/")
612 def removeCategory(self, key):
613 if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
614 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
615 self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
616 self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
617 self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
620 #def saveConfig(self):
621 # self.listOfFeeds["feedingit-order"] = self.sortedKeys
622 # file = open(self.configdir+"feeds.pickle", "w")
623 # pickle.dump(self.listOfFeeds, file)
626 def moveUp(self, key):
627 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
629 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
630 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
633 def moveCategoryUp(self, key):
634 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
636 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
637 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
640 def moveDown(self, key):
641 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
642 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
644 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
645 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
648 def moveCategoryDown(self, key):
649 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
650 max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
652 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
653 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )