1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # ============================================================================
21 # Author : Yves Marcoz
23 # Description : Simple RSS Reader
24 # ============================================================================
27 from os.path import isfile, isdir
28 from shutil import rmtree
29 from os import mkdir, remove, utime
34 from BeautifulSoup import BeautifulSoup
35 from urlparse import urljoin
36 from calendar import timegm
39 return md5.new(string).hexdigest()
42 def __init__(self, configdir, key):
44 self.configdir = configdir
45 self.dir = "%s/%s.d" %(self.configdir, self.key)
46 if not isdir(self.dir):
48 if not isfile("%s/%s.db" %(self.dir, self.key)):
49 self.db = sqlite3.connect("%s/%s.db" %(self.dir, self.key) )
50 self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
51 self.db.execute("CREATE TABLE images (id text, imagePath text);")
54 self.db = sqlite3.connect("%s/%s.db" %(self.dir, self.key) )
56 def addImage(self, configdir, key, baseurl, url):
57 filename = configdir+key+".d/"+getId(url)
58 if not isfile(filename):
60 f = urllib2.urlopen(urljoin(baseurl,url))
61 outf = open(filename, "w")
66 print "Could not download " + url
68 #open(filename,"a").close() # "Touch" the file
69 file = open(filename,"a")
74 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
75 # Expiry time is in hours
77 tmp=feedparser.parse(url, etag = etag, modified = modified)
79 tmp=feedparser.parse(url, etag = etag, modified = modified, handlers = [proxy])
80 expiry = float(expiryTime) * 3600.
83 # Check if the parse was succesful (number of entries > 0, else do nothing)
84 if len(tmp["entries"])>0:
85 currentTime = time.time()
86 # The etag and modified value should only be updated if the content was not null
92 modified = tmp["modified"]
96 f = urllib2.urlopen(urljoin(tmp["feed"]["link"],"/favicon.ico"))
99 outf = open(self.dir+"/favicon.ico", "w")
105 #traceback.print_exc()
109 #reversedEntries = self.getEntries()
110 #reversedEntries.reverse()
114 tmp["entries"].reverse()
115 for entry in tmp["entries"]:
116 date = self.extractDate(entry)
120 entry["title"] = "No Title"
128 entry["author"] = None
129 if(not(entry.has_key("id"))):
131 tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
132 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
133 id = self.generateUniqueId(tmpEntry)
135 #articleTime = time.mktime(self.entries[id]["dateTuple"])
137 soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
139 baseurl = tmpEntry["link"]
143 filename = self.addImage(configdir, self.key, baseurl, img['src'])
145 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
148 traceback.print_exc()
149 print "Error downloading image %s" % img
150 tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
151 file = open(tmpEntry["contentLink"], "w")
152 file.write(soup.prettify())
154 values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
155 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
158 self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
160 filename = configdir+self.key+".d/"+id+".html"
161 file = open(filename,"a")
162 utime(filename, None)
164 images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
166 file = open(image[0],"a")
167 utime(image[0], None)
174 rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
176 self.removeEntry(row[0])
178 from glob import glob
180 for file in glob(configdir+self.key+".d/*"):
184 # put the two dates into matching format
186 lastmodDate = stats[8]
188 expDate = time.time()-expiry*3
189 # check if image-last-modified-date is outdated
191 if expDate > lastmodDate:
195 #print 'Removing', file
197 remove(file) # commented out for testing
201 print 'Could not remove', file
203 rows = self.db.execute("SELECT MAX(date) FROM feed;")
206 return (updateTime, etag, modified)
208 def setEntryRead(self, id):
209 self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
212 def setEntryUnread(self, id):
213 self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
216 def markAllAsRead(self):
217 self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
220 def isEntryRead(self, id):
221 read_status = self.db.execute("SELECT read FROM feed WHERE id=?;", (id,) ).fetchone()[0]
222 return read_status==1 # Returns True if read==1, and False if read==0
224 def getTitle(self, id):
225 return self.db.execute("SELECT title FROM feed WHERE id=?;", (id,) ).fetchone()[0]
227 def getContentLink(self, id):
228 return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
230 def getExternalLink(self, id):
231 return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
233 def getDate(self, id):
234 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
235 return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
237 def getDateTuple(self, id):
238 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
239 return time.localtime(dateStamp)
241 def getDateStamp(self, id):
242 return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
244 def generateUniqueId(self, entry):
245 if(entry["id"] != None):
246 return getId(str(entry["id"]))
248 return getId(str(entry["date"]) + str(entry["title"]))
250 def getIds(self, onlyUnread=False):
252 rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
254 rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
261 def getNextId(self, id):
263 index = ids.index(id)
264 return ids[(index+1)%len(ids)]
266 def getPreviousId(self, id):
268 index = ids.index(id)
269 return ids[(index-1)%len(ids)]
271 def getNumberOfUnreadItems(self):
272 return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
274 def getNumberOfEntries(self):
275 return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
277 def getArticle(self, entry):
278 #self.setEntryRead(id)
279 #entry = self.entries[id]
280 title = entry['title']
281 #content = entry.get('content', entry.get('summary_detail', {}))
282 content = entry["content"]
285 author = entry['author']
286 date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
288 #text = '''<div style="color: black; background-color: white;">'''
289 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
290 text += "<html><head><title>" + title + "</title>"
291 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
292 #text += '<style> body {-webkit-user-select: none;} </style>'
293 text += '</head><body background=\"white\"><div><a href=\"' + link + '\">' + title + "</a>"
295 text += "<BR /><small><i>Author: " + author + "</i></small>"
296 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
297 text += "<BR /><BR />"
299 text += "</body></html>"
302 def getContent(self, id):
303 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
305 file = open(self.entries[id]["contentLink"])
306 content = file.read()
309 content = "Content unavailable"
312 def extractDate(self, entry):
313 if entry.has_key("updated_parsed"):
314 return timegm(entry["updated_parsed"])
315 elif entry.has_key("published_parsed"):
316 return timegm(entry["published_parsed"])
320 def extractContent(self, entry):
322 if entry.has_key('summary'):
323 content = entry.get('summary', '')
324 if entry.has_key('content'):
325 if len(entry.content[0].value) > len(content):
326 content = entry.content[0].value
328 content = entry.get('description', '')
331 def removeEntry(self, id):
332 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
335 os.remove(contentLink)
337 print "File not found for deletion: %s" % contentLink
338 self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
339 self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
342 class ArchivedArticles(Feed):
343 def addArchivedArticle(self, title, link, date, configdir):
344 id = self.generateUniqueId({"date":date, "title":title})
345 values = (id, title, link, date, 0, link, 0)
346 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
349 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
351 rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
353 currentTime = time.time()
356 f = urllib2.urlopen(link)
357 #entry["content"] = f.read()
360 soup = BeautifulSoup(html)
364 filename = self.addImage(configdir, self.key, baseurl, img['src'])
366 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
367 contentLink = configdir+self.key+".d/"+id+".html"
368 file = open(contentLink, "w")
369 file.write(soup.prettify())
372 self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
374 return (currentTime, None, None)
376 def purgeReadArticles(self):
377 rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
380 self.removeArticle(row[0])
382 def removeArticle(self, id):
383 rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
386 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
394 # Lists all the feeds in a dictionary, and expose the data
395 def __init__(self, configdir):
396 self.configdir = configdir
398 self.db = sqlite3.connect("%s/feeds.db" % self.configdir)
401 table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
403 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
404 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
405 self.addCategory("Default Category")
406 if isfile(self.configdir+"feeds.pickle"):
407 self.importOldFormatFeeds()
409 self.addFeed("Maemo News", "http://maemo.org/news/items.xml")
411 from string import find, upper
412 if find(upper(table[0]), "WIDGET")<0:
413 self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
414 self.db.execute("UPDATE feeds SET widget=1;")
416 if find(upper(table[0]), "CATEGORY")<0:
417 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
418 self.addCategory("Default Category")
419 self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
420 self.db.execute("UPDATE feeds SET category=1;")
425 def importOldFormatFeeds(self):
426 """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
428 listing = rss.Listing(self.configdir)
430 for id in listing.getListOfFeeds():
433 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
434 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
437 feed = listing.getFeed(id)
438 new_feed = self.getFeed(id)
440 items = feed.getIds()[:]
443 if feed.isEntryRead(item):
447 date = timegm(feed.getDateTuple(item))
448 title = feed.getTitle(item)
449 newId = new_feed.generateUniqueId({"date":date, "title":title})
450 values = (newId, title , feed.getContentLink(item), date, time.time(), feed.getExternalLink(item), read_status)
451 new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
454 images = feed.getImages(item)
456 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
460 self.updateUnread(id)
463 traceback.print_exc()
464 remove(self.configdir+"feeds.pickle")
467 def addArchivedArticle(self, key, index):
468 feed = self.getFeed(key)
469 title = feed.getTitle(index)
470 link = feed.getExternalLink(index)
471 date = feed.getDate(index)
472 count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
474 self.addFeed("Archived Articles", "", id="ArchivedArticles")
476 archFeed = self.getFeed("ArchivedArticles")
477 archFeed.addArchivedArticle(title, link, date, self.configdir)
478 self.updateUnread("ArchivedArticles")
480 def updateFeed(self, key, expiryTime=24, proxy=None, imageCache=False):
481 feed = self.getFeed(key)
482 db = sqlite3.connect("%s/feeds.db" % self.configdir)
483 (url, etag, modified) = db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
484 (updateTime, etag, modified) = feed.updateFeed(self.configdir, url, etag, eval(modified), expiryTime, proxy, imageCache)
486 db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, str(modified), key) )
488 db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, str(modified), key) )
490 self.updateUnread(key, db=db)
492 def getFeed(self, key):
493 if key == "ArchivedArticles":
494 return ArchivedArticles(self.configdir, key)
495 return Feed(self.configdir, key)
497 def editFeed(self, key, title, url, category=None):
499 self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
501 self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
504 def getFeedUpdateTime(self, key):
505 return time.ctime(self.db.execute("SELECT updateTime FROM feeds WHERE id=?;", (key,)).fetchone()[0])
507 def getFeedNumberOfUnreadItems(self, key):
508 return self.db.execute("SELECT unread FROM feeds WHERE id=?;", (key,)).fetchone()[0]
510 def getFeedTitle(self, key):
511 return self.db.execute("SELECT title FROM feeds WHERE id=?;", (key,)).fetchone()[0]
513 def getFeedUrl(self, key):
514 return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
516 def getListOfFeeds(self, category=None):
518 rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
520 rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
527 def getListOfCategories(self):
528 rows = self.db.execute("SELECT id FROM categories ORDER BY rank;" )
535 def getCategoryTitle(self, id):
536 row = self.db.execute("SELECT title FROM categories WHERE id=?;", (id, )).fetchone()
539 def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
540 if order == "Most unread":
541 tmp = "ORDER BY unread DESC"
542 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
543 elif order == "Least unread":
544 tmp = "ORDER BY unread"
545 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
546 elif order == "Most recent":
547 tmp = "ORDER BY updateTime DESC"
548 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
549 elif order == "Least recent":
550 tmp = "ORDER BY updateTime"
551 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
552 else: # order == "Manual" or invalid value...
553 tmp = "ORDER BY rank"
554 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
556 sql = "SELECT id FROM feeds WHERE unread>0 WHERE category=%s" %category + tmp
558 sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
559 rows = self.db.execute(sql)
566 def getFavicon(self, key):
567 filename = "%s%s.d/favicon.ico" % (self.configdir, key)
573 def updateUnread(self, key, db=None):
576 feed = self.getFeed(key)
577 db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
580 def addFeed(self, title, url, id=None, category=1):
583 count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
585 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
588 values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
589 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
591 # Ask for the feed object, it will create the necessary tables
597 def addCategory(self, title):
598 rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
601 id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
604 self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
607 def removeFeed(self, key):
608 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
609 self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
610 self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
613 if isdir(self.configdir+key+".d/"):
614 rmtree(self.configdir+key+".d/")
616 def removeCategory(self, key):
617 if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
618 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
619 self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
620 self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
621 self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
624 #def saveConfig(self):
625 # self.listOfFeeds["feedingit-order"] = self.sortedKeys
626 # file = open(self.configdir+"feeds.pickle", "w")
627 # pickle.dump(self.listOfFeeds, file)
630 def moveUp(self, key):
631 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
633 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
634 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
637 def moveCategoryUp(self, key):
638 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
640 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
641 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
644 def moveDown(self, key):
645 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
646 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
648 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
649 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
652 def moveCategoryDown(self, key):
653 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
654 max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
656 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
657 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )