1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # ============================================================================
21 # Author : Yves Marcoz
23 # Description : Simple RSS Reader
24 # ============================================================================
27 from os.path import isfile, isdir
28 from shutil import rmtree
29 from os import mkdir, remove, utime
34 from BeautifulSoup import BeautifulSoup
35 from urlparse import urljoin
36 from calendar import timegm
39 return md5.new(string).hexdigest()
42 def __init__(self, configdir, key):
44 self.configdir = configdir
45 self.dir = "%s/%s.d" %(self.configdir, self.key)
46 if not isdir(self.dir):
48 if not isfile("%s/%s.db" %(self.dir, self.key)):
49 self.db = sqlite3.connect("%s/%s.db" %(self.dir, self.key) )
50 self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
51 self.db.execute("CREATE TABLE images (id text, imagePath text);")
54 self.db = sqlite3.connect("%s/%s.db" %(self.dir, self.key) )
56 def addImage(self, configdir, key, baseurl, url):
57 filename = configdir+key+".d/"+getId(url)
58 if not isfile(filename):
60 f = urllib2.urlopen(urljoin(baseurl,url))
61 outf = open(filename, "w")
66 print "Could not download " + url
68 #open(filename,"a").close() # "Touch" the file
69 file = open(filename,"a")
74 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
75 # Expiry time is in hours
77 tmp=feedparser.parse(url, etag = etag, modified = modified)
79 tmp=feedparser.parse(url, etag = etag, modified = modified, handlers = [proxy])
80 expiry = float(expiryTime) * 3600.
83 # Check if the parse was succesful (number of entries > 0, else do nothing)
84 if len(tmp["entries"])>0:
85 currentTime = time.time()
86 # The etag and modified value should only be updated if the content was not null
92 modified = tmp["modified"]
96 f = urllib2.urlopen(urljoin(tmp["feed"]["link"],"/favicon.ico"))
99 outf = open(self.dir+"/favicon.ico", "w")
105 #traceback.print_exc()
109 #reversedEntries = self.getEntries()
110 #reversedEntries.reverse()
114 tmp["entries"].reverse()
115 for entry in tmp["entries"]:
116 date = self.extractDate(entry)
120 entry["title"] = "No Title"
128 entry["author"] = None
129 if(not(entry.has_key("id"))):
131 tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
132 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
133 id = self.generateUniqueId(tmpEntry)
135 #articleTime = time.mktime(self.entries[id]["dateTuple"])
136 soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
138 baseurl = tmpEntry["link"]
143 filename = self.addImage(configdir, self.key, baseurl, img['src'])
144 img['src']="file://%s" %filename
145 count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
147 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
150 traceback.print_exc()
151 print "Error downloading image %s" % img
152 tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
153 file = open(tmpEntry["contentLink"], "w")
154 file.write(soup.prettify())
157 self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
160 values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
161 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
165 # self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
167 # filename = configdir+self.key+".d/"+id+".html"
168 # file = open(filename,"a")
169 # utime(filename, None)
171 # images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
172 # for image in images:
173 # file = open(image[0],"a")
174 # utime(image[0], None)
181 rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
183 self.removeEntry(row[0])
185 from glob import glob
187 for file in glob(configdir+self.key+".d/*"):
191 # put the two dates into matching format
193 lastmodDate = stats[8]
195 expDate = time.time()-expiry*3
196 # check if image-last-modified-date is outdated
198 if expDate > lastmodDate:
202 #print 'Removing', file
204 remove(file) # commented out for testing
208 print 'Could not remove', file
210 rows = self.db.execute("SELECT MAX(date) FROM feed;")
213 return (updateTime, etag, modified)
215 def setEntryRead(self, id):
216 self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
219 def setEntryUnread(self, id):
220 self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
223 def markAllAsRead(self):
224 self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
227 def isEntryRead(self, id):
228 read_status = self.db.execute("SELECT read FROM feed WHERE id=?;", (id,) ).fetchone()[0]
229 return read_status==1 # Returns True if read==1, and False if read==0
231 def getTitle(self, id):
232 return self.db.execute("SELECT title FROM feed WHERE id=?;", (id,) ).fetchone()[0]
234 def getContentLink(self, id):
235 return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
237 def getExternalLink(self, id):
238 return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
240 def getDate(self, id):
241 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
242 return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
244 def getDateTuple(self, id):
245 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
246 return time.localtime(dateStamp)
248 def getDateStamp(self, id):
249 return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
251 def generateUniqueId(self, entry):
252 if(entry["id"] != None):
253 return getId(str(entry["id"]))
256 return getId(str(entry["date"]) + str(entry["title"]))
258 #print entry["title"]
259 return getId(str(entry["date"]))
261 def getIds(self, onlyUnread=False):
263 rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
265 rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
272 def getNextId(self, id):
274 index = ids.index(id)
275 return ids[(index+1)%len(ids)]
277 def getPreviousId(self, id):
279 index = ids.index(id)
280 return ids[(index-1)%len(ids)]
282 def getNumberOfUnreadItems(self):
283 return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
285 def getNumberOfEntries(self):
286 return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
288 def getArticle(self, entry):
289 #self.setEntryRead(id)
290 #entry = self.entries[id]
291 title = entry['title']
292 #content = entry.get('content', entry.get('summary_detail', {}))
293 content = entry["content"]
296 author = entry['author']
297 date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
299 #text = '''<div style="color: black; background-color: white;">'''
300 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
301 text += "<html><head><title>" + title + "</title>"
302 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
303 #text += '<style> body {-webkit-user-select: none;} </style>'
304 text += '</head><body bgcolor=\"#ffffff\"><div><a href=\"' + link + '\">' + title + "</a>"
306 text += "<BR /><small><i>Author: " + author + "</i></small>"
307 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
308 text += "<BR /><BR />"
310 text += "</body></html>"
313 def getContent(self, id):
314 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
316 file = open(self.entries[id]["contentLink"])
317 content = file.read()
320 content = "Content unavailable"
323 def extractDate(self, entry):
324 if entry.has_key("updated_parsed"):
325 return timegm(entry["updated_parsed"])
326 elif entry.has_key("published_parsed"):
327 return timegm(entry["published_parsed"])
331 def extractContent(self, entry):
333 if entry.has_key('summary'):
334 content = entry.get('summary', '')
335 if entry.has_key('content'):
336 if len(entry.content[0].value) > len(content):
337 content = entry.content[0].value
339 content = entry.get('description', '')
342 def removeEntry(self, id):
343 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
347 except OSError, exception:
348 print "Deleting %s: %s" % (contentLink, str (exception))
349 self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
350 self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
353 class ArchivedArticles(Feed):
354 def addArchivedArticle(self, title, link, date, configdir):
355 id = self.generateUniqueId({"date":date, "title":title})
356 values = (id, title, link, date, 0, link, 0)
357 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
360 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
362 rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
364 currentTime = time.time()
367 f = urllib2.urlopen(link)
368 #entry["content"] = f.read()
371 soup = BeautifulSoup(html)
375 filename = self.addImage(configdir, self.key, baseurl, img['src'])
377 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
378 contentLink = configdir+self.key+".d/"+id+".html"
379 file = open(contentLink, "w")
380 file.write(soup.prettify())
383 self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
385 return (currentTime, None, None)
387 def purgeReadArticles(self):
388 rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
391 self.removeArticle(row[0])
393 def removeArticle(self, id):
394 rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
397 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
405 # Lists all the feeds in a dictionary, and expose the data
406 def __init__(self, configdir):
407 self.configdir = configdir
409 self.db = sqlite3.connect("%s/feeds.db" % self.configdir)
412 table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
414 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
415 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
416 self.addCategory("Default Category")
417 if isfile(self.configdir+"feeds.pickle"):
418 self.importOldFormatFeeds()
420 self.addFeed("Maemo News", "http://maemo.org/news/items.xml")
422 from string import find, upper
423 if find(upper(table[0]), "WIDGET")<0:
424 self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
425 self.db.execute("UPDATE feeds SET widget=1;")
427 if find(upper(table[0]), "CATEGORY")<0:
428 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
429 self.addCategory("Default Category")
430 self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
431 self.db.execute("UPDATE feeds SET category=1;")
436 def importOldFormatFeeds(self):
437 """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
439 listing = rss.Listing(self.configdir)
441 for id in listing.getListOfFeeds():
444 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
445 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
448 feed = listing.getFeed(id)
449 new_feed = self.getFeed(id)
451 items = feed.getIds()[:]
454 if feed.isEntryRead(item):
458 date = timegm(feed.getDateTuple(item))
459 title = feed.getTitle(item)
460 newId = new_feed.generateUniqueId({"date":date, "title":title})
461 values = (newId, title , feed.getContentLink(item), date, tuple(time.time()), feed.getExternalLink(item), read_status)
462 new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
465 images = feed.getImages(item)
467 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
471 self.updateUnread(id)
474 traceback.print_exc()
475 remove(self.configdir+"feeds.pickle")
478 def addArchivedArticle(self, key, index):
479 feed = self.getFeed(key)
480 title = feed.getTitle(index)
481 link = feed.getExternalLink(index)
482 date = feed.getDate(index)
483 count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
485 self.addFeed("Archived Articles", "", id="ArchivedArticles")
487 archFeed = self.getFeed("ArchivedArticles")
488 archFeed.addArchivedArticle(title, link, date, self.configdir)
489 self.updateUnread("ArchivedArticles")
491 def updateFeed(self, key, expiryTime=24, proxy=None, imageCache=False):
492 feed = self.getFeed(key)
493 db = sqlite3.connect("%s/feeds.db" % self.configdir)
494 (url, etag, modified) = db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
496 modified = time.struct_time(eval(modified))
499 (updateTime, etag, modified) = feed.updateFeed(self.configdir, url, etag, modified, expiryTime, proxy, imageCache)
503 modified=str(tuple(modified))
505 db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, modified, key) )
507 db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, modified, key) )
509 self.updateUnread(key, db=db)
511 def getFeed(self, key):
512 if key == "ArchivedArticles":
513 return ArchivedArticles(self.configdir, key)
514 return Feed(self.configdir, key)
516 def editFeed(self, key, title, url, category=None):
518 self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
520 self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
523 def getFeedUpdateTime(self, key):
524 return time.ctime(self.db.execute("SELECT updateTime FROM feeds WHERE id=?;", (key,)).fetchone()[0])
526 def getFeedNumberOfUnreadItems(self, key):
527 return self.db.execute("SELECT unread FROM feeds WHERE id=?;", (key,)).fetchone()[0]
529 def getFeedTitle(self, key):
530 return self.db.execute("SELECT title FROM feeds WHERE id=?;", (key,)).fetchone()[0]
532 def getFeedUrl(self, key):
533 return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
535 def getFeedCategory(self, key):
536 return self.db.execute("SELECT category FROM feeds WHERE id=?;", (key,)).fetchone()[0]
538 def getListOfFeeds(self, category=None):
540 rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
542 rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
549 def getListOfCategories(self):
550 rows = self.db.execute("SELECT id FROM categories ORDER BY rank;" )
557 def getCategoryTitle(self, id):
558 row = self.db.execute("SELECT title FROM categories WHERE id=?;", (id, )).fetchone()
561 def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
562 if order == "Most unread":
563 tmp = "ORDER BY unread DESC"
564 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
565 elif order == "Least unread":
566 tmp = "ORDER BY unread"
567 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
568 elif order == "Most recent":
569 tmp = "ORDER BY updateTime DESC"
570 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
571 elif order == "Least recent":
572 tmp = "ORDER BY updateTime"
573 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
574 else: # order == "Manual" or invalid value...
575 tmp = "ORDER BY rank"
576 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
578 sql = "SELECT id FROM feeds WHERE unread>0 AND category=%s " %category + tmp
580 sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
581 rows = self.db.execute(sql)
588 def getFavicon(self, key):
589 filename = "%s%s.d/favicon.ico" % (self.configdir, key)
595 def updateUnread(self, key, db=None):
598 feed = self.getFeed(key)
599 db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
602 def addFeed(self, title, url, id=None, category=1):
605 count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
607 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
610 values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
611 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
613 # Ask for the feed object, it will create the necessary tables
619 def addCategory(self, title):
620 rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
623 id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
626 self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
629 def removeFeed(self, key):
630 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
631 self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
632 self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
635 if isdir(self.configdir+key+".d/"):
636 rmtree(self.configdir+key+".d/")
638 def removeCategory(self, key):
639 if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
640 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
641 self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
642 self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
643 self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
646 #def saveConfig(self):
647 # self.listOfFeeds["feedingit-order"] = self.sortedKeys
648 # file = open(self.configdir+"feeds.pickle", "w")
649 # pickle.dump(self.listOfFeeds, file)
652 def moveUp(self, key):
653 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
655 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
656 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
659 def moveCategoryUp(self, key):
660 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
662 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
663 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
666 def moveDown(self, key):
667 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
668 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
670 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
671 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
674 def moveCategoryDown(self, key):
675 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
676 max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
678 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
679 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )