1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # ============================================================================
21 # Author : Yves Marcoz
23 # Description : Simple RSS Reader
24 # ============================================================================
27 from os.path import isfile, isdir
28 from shutil import rmtree
29 from os import mkdir, remove, utime
34 from BeautifulSoup import BeautifulSoup
35 from urlparse import urljoin
38 return md5.new(string).hexdigest()
41 def __init__(self, configdir, key):
43 self.configdir = configdir
44 self.dir = "%s/%s.d" %(self.configdir, self.key)
45 if not isdir(self.dir):
47 if not isfile("%s/%s.db" %(self.dir, self.key)):
48 self.db = sqlite3.connect("%s/%s.db" %(self.dir, self.key) )
49 self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
50 self.db.execute("CREATE TABLE images (id text, imagePath text);")
53 self.db = sqlite3.connect("%s/%s.db" %(self.dir, self.key) )
55 def addImage(self, configdir, key, baseurl, url):
56 filename = configdir+key+".d/"+getId(url)
57 if not isfile(filename):
59 f = urllib2.urlopen(urljoin(baseurl,url))
60 outf = open(filename, "w")
65 print "Could not download " + url
67 #open(filename,"a").close() # "Touch" the file
68 file = open(filename,"a")
73 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
74 # Expiry time is in hours
76 tmp=feedparser.parse(url, etag = etag, modified = modified)
78 tmp=feedparser.parse(url, etag = etag, modified = modified, handlers = [proxy])
79 expiry = float(expiryTime) * 3600.
81 currentTime = time.time()
82 # Check if the parse was succesful (number of entries > 0, else do nothing)
83 if len(tmp["entries"])>0:
84 # The etag and modified value should only be updated if the content was not null
90 modified = tmp["modified"]
94 f = urllib2.urlopen(urljoin(tmp["feed"]["link"],"/favicon.ico"))
97 outf = open(self.dir+"/favicon.ico", "w")
103 #traceback.print_exc()
107 #reversedEntries = self.getEntries()
108 #reversedEntries.reverse()
112 tmp["entries"].reverse()
113 for entry in tmp["entries"]:
114 date = self.extractDate(entry)
118 entry["title"] = "No Title"
123 tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
124 "date":date, "link":entry["link"]}
125 id = self.generateUniqueId(tmpEntry)
127 #articleTime = time.mktime(self.entries[id]["dateTuple"])
129 soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
131 baseurl = tmpEntry["link"]
135 filename = self.addImage(configdir, self.key, baseurl, img['src'])
137 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
140 traceback.print_exc()
141 print "Error downloading image %s" % img
142 tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
143 file = open(tmpEntry["contentLink"], "w")
144 file.write(soup.prettify())
146 values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
147 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
150 filename = configdir+self.key+".d/"+id+".html"
151 file = open(filename,"a")
152 utime(filename, None)
154 images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
156 file = open(image[0],"a")
157 utime(image[0], None)
164 rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (2*expiry, expiry))
166 self.removeEntry(row[0])
168 from glob import glob
170 for file in glob(configdir+self.key+".d/*"):
174 # put the two dates into matching format
176 lastmodDate = stats[8]
178 expDate = time.time()-expiry*3
179 # check if image-last-modified-date is outdated
181 if expDate > lastmodDate:
185 #print 'Removing', file
187 remove(file) # commented out for testing
191 print 'Could not remove', file
192 return (currentTime, etag, modified)
194 def setEntryRead(self, id):
195 self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
198 def setEntryUnread(self, id):
199 self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
202 def markAllAsRead(self):
203 self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
206 def isEntryRead(self, id):
207 read_status = self.db.execute("SELECT read FROM feed WHERE id=?;", (id,) ).fetchone()[0]
208 return read_status==1 # Returns True if read==1, and False if read==0
210 def getTitle(self, id):
211 return self.db.execute("SELECT title FROM feed WHERE id=?;", (id,) ).fetchone()[0]
213 def getContentLink(self, id):
214 return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
216 def getExternalLink(self, id):
217 return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
219 def getDate(self, id):
220 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
221 return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
223 def getDateTuple(self, id):
224 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
225 return time.localtime(dateStamp)
227 def getDateStamp(self, id):
228 return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
230 def generateUniqueId(self, entry):
231 return getId(str(entry["date"]) + str(entry["title"]))
233 def getIds(self, onlyUnread=False):
235 rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
237 rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
244 def getNextId(self, id):
246 index = ids.index(id)
247 return ids[(index+1)%len(ids)]
249 def getPreviousId(self, id):
251 index = ids.index(id)
252 return ids[(index-1)%len(ids)]
254 def getNumberOfUnreadItems(self):
255 return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
257 def getNumberOfEntries(self):
258 return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
260 def getArticle(self, entry):
261 #self.setEntryRead(id)
262 #entry = self.entries[id]
263 title = entry['title']
264 #content = entry.get('content', entry.get('summary_detail', {}))
265 content = entry["content"]
268 date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
270 #text = '''<div style="color: black; background-color: white;">'''
271 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
272 text += "<html><head><title>" + title + "</title>"
273 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
274 #text += '<style> body {-webkit-user-select: none;} </style>'
275 text += '</head><body><div><a href=\"' + link + '\">' + title + "</a>"
276 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
277 text += "<BR /><BR />"
279 text += "</body></html>"
282 def getContent(self, id):
283 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
285 file = open(self.entries[id]["contentLink"])
286 content = file.read()
289 content = "Content unavailable"
292 def extractDate(self, entry):
293 if entry.has_key("updated_parsed"):
294 return time.mktime(entry["updated_parsed"])
295 elif entry.has_key("published_parsed"):
296 return time.mktime(entry["published_parsed"])
300 def extractContent(self, entry):
302 if entry.has_key('summary'):
303 content = entry.get('summary', '')
304 if entry.has_key('content'):
305 if len(entry.content[0].value) > len(content):
306 content = entry.content[0].value
308 content = entry.get('description', '')
311 def removeEntry(self, id):
312 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
315 os.remove(contentLink)
317 print "File not found for deletion: %s" % contentLink
318 self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
319 self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
322 class ArchivedArticles(Feed):
323 def addArchivedArticle(self, title, link, date, configdir):
324 id = self.generateUniqueId({"date":date, "title":title})
325 values = (id, title, link, date, 0, link, 0)
326 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
329 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
330 currentTime = time.time()
331 rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
335 f = urllib2.urlopen(link)
336 #entry["content"] = f.read()
339 soup = BeautifulSoup(html)
343 filename = self.addImage(configdir, self.key, baseurl, img['src'])
345 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
346 contentLink = configdir+self.key+".d/"+id+".html"
347 file = open(contentLink, "w")
348 file.write(soup.prettify())
351 self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
353 return (currentTime, None, None)
355 def purgeReadArticles(self):
356 rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
359 self.removeArticle(row[0])
361 def removeArticle(self, id):
362 rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
365 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
373 # Lists all the feeds in a dictionary, and expose the data
374 def __init__(self, configdir):
375 self.configdir = configdir
377 self.db = sqlite3.connect("%s/feeds.db" % self.configdir)
380 self.db.execute("create table feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text);")
381 if isfile(self.configdir+"feeds.pickle"):
382 self.importOldFormatFeeds()
384 self.addFeed("Maemo News", "http://maemo.org/news/items.xml")
386 # Table already created
389 def importOldFormatFeeds(self):
390 """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
392 listing = rss.Listing(self.configdir)
394 for id in listing.getListOfFeeds():
397 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None")
398 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified) VALUES (?, ?, ? ,? ,? ,?, ?, ?);", values)
401 feed = listing.getFeed(id)
402 new_feed = self.getFeed(id)
404 items = feed.getIds()[:]
407 if feed.isEntryRead(item):
411 date = time.mktime(feed.getDateTuple(item))
412 title = feed.getTitle(item)
413 newId = new_feed.generateUniqueId({"date":date, "title":title})
414 values = (newId, title , feed.getContentLink(item), date, time.time(), feed.getExternalLink(item), read_status)
415 new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
418 images = feed.getImages(item)
420 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
424 self.updateUnread(id)
427 traceback.print_exc()
428 remove(self.configdir+"feeds.pickle")
431 def addArchivedArticle(self, key, index):
432 feed = self.getFeed(key)
433 title = feed.getTitle(index)
434 link = feed.getExternalLink(index)
435 date = feed.getDate(index)
436 count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
438 self.addFeed("Archived Articles", "", id="ArchivedArticles")
440 archFeed = self.getFeed("ArchivedArticles")
441 archFeed.addArchivedArticle(title, link, date, self.configdir)
442 self.updateUnread("ArchivedArticles")
444 def updateFeed(self, key, expiryTime=24, proxy=None, imageCache=False):
445 feed = self.getFeed(key)
446 db = sqlite3.connect("%s/feeds.db" % self.configdir)
447 (url, etag, modified) = db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
448 (updateTime, etag, modified) = feed.updateFeed(self.configdir, url, etag, eval(modified), expiryTime, proxy, imageCache)
449 db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, str(modified), key) )
451 self.updateUnread(key, db=db)
453 def getFeed(self, key):
454 if key == "ArchivedArticles":
455 return ArchivedArticles(self.configdir, key)
456 return Feed(self.configdir, key)
458 def editFeed(self, key, title, url):
459 self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
462 def getFeedUpdateTime(self, key):
463 return time.ctime(self.db.execute("SELECT updateTime FROM feeds WHERE id=?;", (key,)).fetchone()[0])
465 def getFeedNumberOfUnreadItems(self, key):
466 return self.db.execute("SELECT unread FROM feeds WHERE id=?;", (key,)).fetchone()[0]
468 def getFeedTitle(self, key):
469 return self.db.execute("SELECT title FROM feeds WHERE id=?;", (key,)).fetchone()[0]
471 def getFeedUrl(self, key):
472 return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
474 def getListOfFeeds(self):
475 rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
482 def getSortedListOfKeys(self, order, onlyUnread=False):
483 if order == "Most unread":
484 tmp = "ORDER BY unread DESC"
485 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
486 elif order == "Least unread":
487 tmp = "ORDER BY unread"
488 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
489 elif order == "Most recent":
490 tmp = "ORDER BY updateTime DESC"
491 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
492 elif order == "Least recent":
493 tmp = "ORDER BY updateTime"
494 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
495 else: # order == "Manual" or invalid value...
496 tmp = "ORDER BY rank"
497 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
499 sql = "SELECT id FROM feeds WHERE unread>0 " + tmp
501 sql = "SELECT id FROM feeds " + tmp
502 rows = self.db.execute(sql)
509 def getFavicon(self, key):
510 filename = "%s%s.d/favicon.ico" % (self.configdir, key)
516 def updateUnread(self, key, db=None):
519 feed = self.getFeed(key)
520 db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
523 def addFeed(self, title, url, id=None):
526 count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
528 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
531 values = (id, title, url, 0, 0, max_rank+1, None, "None")
532 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified) VALUES (?, ?, ? ,? ,? ,?, ?, ?);", values)
534 # Ask for the feed object, it will create the necessary tables
540 def removeFeed(self, key):
541 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
542 self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
543 self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
546 if isdir(self.configdir+key+".d/"):
547 rmtree(self.configdir+key+".d/")
550 #def saveConfig(self):
551 # self.listOfFeeds["feedingit-order"] = self.sortedKeys
552 # file = open(self.configdir+"feeds.pickle", "w")
553 # pickle.dump(self.listOfFeeds, file)
556 def moveUp(self, key):
557 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
559 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
560 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
563 def moveDown(self, key):
564 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
565 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
567 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
568 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )