1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # ============================================================================
21 # Author : Yves Marcoz
23 # Description : Simple RSS Reader
24 # ============================================================================
27 from os.path import isfile, isdir
28 from shutil import rmtree
29 from os import mkdir, remove, utime
34 from BeautifulSoup import BeautifulSoup
35 from urlparse import urljoin
36 from calendar import timegm
39 return md5.new(string).hexdigest()
42 def __init__(self, configdir, key):
44 self.configdir = configdir
45 self.dir = "%s/%s.d" %(self.configdir, self.key)
46 if not isdir(self.dir):
48 if not isfile("%s/%s.db" %(self.dir, self.key)):
49 self.db = sqlite3.connect("%s/%s.db" %(self.dir, self.key) )
50 self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
51 self.db.execute("CREATE TABLE images (id text, imagePath text);")
54 self.db = sqlite3.connect("%s/%s.db" %(self.dir, self.key) )
56 def addImage(self, configdir, key, baseurl, url):
57 filename = configdir+key+".d/"+getId(url)
58 if not isfile(filename):
60 f = urllib2.urlopen(urljoin(baseurl,url))
61 outf = open(filename, "w")
66 print "Could not download " + url
68 #open(filename,"a").close() # "Touch" the file
69 file = open(filename,"a")
74 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
75 # Expiry time is in hours
77 tmp=feedparser.parse(url, etag = etag, modified = modified)
79 tmp=feedparser.parse(url, etag = etag, modified = modified, handlers = [proxy])
80 expiry = float(expiryTime) * 3600.
83 # Check if the parse was succesful (number of entries > 0, else do nothing)
84 if len(tmp["entries"])>0:
85 currentTime = time.time()
86 # The etag and modified value should only be updated if the content was not null
92 modified = tmp["modified"]
96 f = urllib2.urlopen(urljoin(tmp["feed"]["link"],"/favicon.ico"))
99 outf = open(self.dir+"/favicon.ico", "w")
105 #traceback.print_exc()
109 #reversedEntries = self.getEntries()
110 #reversedEntries.reverse()
114 tmp["entries"].reverse()
115 for entry in tmp["entries"]:
116 date = self.extractDate(entry)
120 entry["title"] = "No Title"
128 entry["author"] = None
129 tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
130 "date":date, "link":entry["link"], "author":entry["author"]}
131 id = self.generateUniqueId(tmpEntry)
133 #articleTime = time.mktime(self.entries[id]["dateTuple"])
135 soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
137 baseurl = tmpEntry["link"]
141 filename = self.addImage(configdir, self.key, baseurl, img['src'])
143 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
146 traceback.print_exc()
147 print "Error downloading image %s" % img
148 tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
149 file = open(tmpEntry["contentLink"], "w")
150 file.write(soup.prettify())
152 values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
153 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
156 self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
158 filename = configdir+self.key+".d/"+id+".html"
159 file = open(filename,"a")
160 utime(filename, None)
162 images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
164 file = open(image[0],"a")
165 utime(image[0], None)
172 rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
174 self.removeEntry(row[0])
176 from glob import glob
178 for file in glob(configdir+self.key+".d/*"):
182 # put the two dates into matching format
184 lastmodDate = stats[8]
186 expDate = time.time()-expiry*3
187 # check if image-last-modified-date is outdated
189 if expDate > lastmodDate:
193 #print 'Removing', file
195 remove(file) # commented out for testing
199 print 'Could not remove', file
201 rows = self.db.execute("SELECT MAX(date) FROM feed;")
204 return (updateTime, etag, modified)
206 def setEntryRead(self, id):
207 self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
210 def setEntryUnread(self, id):
211 self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
214 def markAllAsRead(self):
215 self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
218 def isEntryRead(self, id):
219 read_status = self.db.execute("SELECT read FROM feed WHERE id=?;", (id,) ).fetchone()[0]
220 return read_status==1 # Returns True if read==1, and False if read==0
222 def getTitle(self, id):
223 return self.db.execute("SELECT title FROM feed WHERE id=?;", (id,) ).fetchone()[0]
225 def getContentLink(self, id):
226 return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
228 def getExternalLink(self, id):
229 return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
231 def getDate(self, id):
232 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
233 return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
235 def getDateTuple(self, id):
236 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
237 return time.localtime(dateStamp)
239 def getDateStamp(self, id):
240 return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
242 def generateUniqueId(self, entry):
243 return getId(str(entry["date"]) + str(entry["title"]))
245 def getIds(self, onlyUnread=False):
247 rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
249 rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
256 def getNextId(self, id):
258 index = ids.index(id)
259 return ids[(index+1)%len(ids)]
261 def getPreviousId(self, id):
263 index = ids.index(id)
264 return ids[(index-1)%len(ids)]
266 def getNumberOfUnreadItems(self):
267 return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
269 def getNumberOfEntries(self):
270 return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
272 def getArticle(self, entry):
273 #self.setEntryRead(id)
274 #entry = self.entries[id]
275 title = entry['title']
276 #content = entry.get('content', entry.get('summary_detail', {}))
277 content = entry["content"]
280 author = entry['author']
281 date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
283 #text = '''<div style="color: black; background-color: white;">'''
284 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
285 text += "<html><head><title>" + title + "</title>"
286 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
287 #text += '<style> body {-webkit-user-select: none;} </style>'
288 text += '</head><body><div><a href=\"' + link + '\">' + title + "</a>"
290 text += "<BR /><small><i>Author: " + author + "</i></small>"
291 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
292 text += "<BR /><BR />"
294 text += "</body></html>"
297 def getContent(self, id):
298 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
300 file = open(self.entries[id]["contentLink"])
301 content = file.read()
304 content = "Content unavailable"
307 def extractDate(self, entry):
308 if entry.has_key("updated_parsed"):
309 return timegm(entry["updated_parsed"])
310 elif entry.has_key("published_parsed"):
311 return timegm(entry["published_parsed"])
315 def extractContent(self, entry):
317 if entry.has_key('summary'):
318 content = entry.get('summary', '')
319 if entry.has_key('content'):
320 if len(entry.content[0].value) > len(content):
321 content = entry.content[0].value
323 content = entry.get('description', '')
326 def removeEntry(self, id):
327 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
330 os.remove(contentLink)
332 print "File not found for deletion: %s" % contentLink
333 self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
334 self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
337 class ArchivedArticles(Feed):
338 def addArchivedArticle(self, title, link, date, configdir):
339 id = self.generateUniqueId({"date":date, "title":title})
340 values = (id, title, link, date, 0, link, 0)
341 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
344 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
346 rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
348 currentTime = time.time()
351 f = urllib2.urlopen(link)
352 #entry["content"] = f.read()
355 soup = BeautifulSoup(html)
359 filename = self.addImage(configdir, self.key, baseurl, img['src'])
361 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
362 contentLink = configdir+self.key+".d/"+id+".html"
363 file = open(contentLink, "w")
364 file.write(soup.prettify())
367 self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
369 return (currentTime, None, None)
371 def purgeReadArticles(self):
372 rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
375 self.removeArticle(row[0])
377 def removeArticle(self, id):
378 rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
381 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
389 # Lists all the feeds in a dictionary, and expose the data
390 def __init__(self, configdir):
391 self.configdir = configdir
393 self.db = sqlite3.connect("%s/feeds.db" % self.configdir)
396 table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
398 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int);")
399 if isfile(self.configdir+"feeds.pickle"):
400 self.importOldFormatFeeds()
402 self.addFeed("Maemo News", "http://maemo.org/news/items.xml")
404 from string import find, upper
405 if find(upper(table[0]), "WIDGET")<0:
406 self.db.execute("ALTER TABLE feeds ADD COLUMN (widget int);")
407 self.db.execute("UPDATE feeds SET widget=1;")
412 def importOldFormatFeeds(self):
413 """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
415 listing = rss.Listing(self.configdir)
417 for id in listing.getListOfFeeds():
420 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
421 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?);", values)
424 feed = listing.getFeed(id)
425 new_feed = self.getFeed(id)
427 items = feed.getIds()[:]
430 if feed.isEntryRead(item):
434 date = timegm(feed.getDateTuple(item))
435 title = feed.getTitle(item)
436 newId = new_feed.generateUniqueId({"date":date, "title":title})
437 values = (newId, title , feed.getContentLink(item), date, time.time(), feed.getExternalLink(item), read_status)
438 new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
441 images = feed.getImages(item)
443 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
447 self.updateUnread(id)
450 traceback.print_exc()
451 remove(self.configdir+"feeds.pickle")
454 def addArchivedArticle(self, key, index):
455 feed = self.getFeed(key)
456 title = feed.getTitle(index)
457 link = feed.getExternalLink(index)
458 date = feed.getDate(index)
459 count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
461 self.addFeed("Archived Articles", "", id="ArchivedArticles")
463 archFeed = self.getFeed("ArchivedArticles")
464 archFeed.addArchivedArticle(title, link, date, self.configdir)
465 self.updateUnread("ArchivedArticles")
467 def updateFeed(self, key, expiryTime=24, proxy=None, imageCache=False):
468 feed = self.getFeed(key)
469 db = sqlite3.connect("%s/feeds.db" % self.configdir)
470 (url, etag, modified) = db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
471 (updateTime, etag, modified) = feed.updateFeed(self.configdir, url, etag, eval(modified), expiryTime, proxy, imageCache)
473 db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, str(modified), key) )
475 db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, str(modified), key) )
477 self.updateUnread(key, db=db)
479 def getFeed(self, key):
480 if key == "ArchivedArticles":
481 return ArchivedArticles(self.configdir, key)
482 return Feed(self.configdir, key)
484 def editFeed(self, key, title, url):
485 self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
488 def getFeedUpdateTime(self, key):
489 return time.ctime(self.db.execute("SELECT updateTime FROM feeds WHERE id=?;", (key,)).fetchone()[0])
491 def getFeedNumberOfUnreadItems(self, key):
492 return self.db.execute("SELECT unread FROM feeds WHERE id=?;", (key,)).fetchone()[0]
494 def getFeedTitle(self, key):
495 return self.db.execute("SELECT title FROM feeds WHERE id=?;", (key,)).fetchone()[0]
497 def getFeedUrl(self, key):
498 return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
500 def getListOfFeeds(self):
501 rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
508 def getSortedListOfKeys(self, order, onlyUnread=False):
509 if order == "Most unread":
510 tmp = "ORDER BY unread DESC"
511 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
512 elif order == "Least unread":
513 tmp = "ORDER BY unread"
514 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
515 elif order == "Most recent":
516 tmp = "ORDER BY updateTime DESC"
517 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
518 elif order == "Least recent":
519 tmp = "ORDER BY updateTime"
520 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
521 else: # order == "Manual" or invalid value...
522 tmp = "ORDER BY rank"
523 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
525 sql = "SELECT id FROM feeds WHERE unread>0 " + tmp
527 sql = "SELECT id FROM feeds " + tmp
528 rows = self.db.execute(sql)
535 def getFavicon(self, key):
536 filename = "%s%s.d/favicon.ico" % (self.configdir, key)
542 def updateUnread(self, key, db=None):
545 feed = self.getFeed(key)
546 db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
549 def addFeed(self, title, url, id=None):
552 count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
554 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
557 values = (id, title, url, 0, 0, max_rank+1, None, "None", 1)
558 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?);", values)
560 # Ask for the feed object, it will create the necessary tables
566 def removeFeed(self, key):
567 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
568 self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
569 self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
572 if isdir(self.configdir+key+".d/"):
573 rmtree(self.configdir+key+".d/")
576 #def saveConfig(self):
577 # self.listOfFeeds["feedingit-order"] = self.sortedKeys
578 # file = open(self.configdir+"feeds.pickle", "w")
579 # pickle.dump(self.listOfFeeds, file)
582 def moveUp(self, key):
583 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
585 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
586 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
589 def moveDown(self, key):
590 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
591 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
593 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
594 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )