1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Lesser General Public License for more details.
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # ============================================================================
21 # Author : Yves Marcoz
23 # Description : Simple RSS Reader
24 # ============================================================================
27 from os.path import isfile, isdir
28 from shutil import rmtree
29 from os import mkdir, remove, utime
34 from BeautifulSoup import BeautifulSoup
35 from urlparse import urljoin
38 return md5.new(string).hexdigest()
41 def __init__(self, configdir, key):
43 self.configdir = configdir
44 self.dir = "%s/%s.d" %(self.configdir, self.key)
45 if not isdir(self.dir):
47 if not isfile("%s/%s.db" %(self.dir, self.key)):
48 self.db = sqlite3.connect("%s/%s.db" %(self.dir, self.key) )
49 self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
50 self.db.execute("CREATE TABLE images (id text, imagePath text);")
53 self.db = sqlite3.connect("%s/%s.db" %(self.dir, self.key) )
55 def addImage(self, configdir, key, baseurl, url):
56 filename = configdir+key+".d/"+getId(url)
57 if not isfile(filename):
59 f = urllib2.urlopen(urljoin(baseurl,url))
60 outf = open(filename, "w")
65 print "Could not download " + url
67 #open(filename,"a").close() # "Touch" the file
68 file = open(filename,"a")
73 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
74 # Expiry time is in hours
76 tmp=feedparser.parse(url, etag = etag, modified = modified)
78 tmp=feedparser.parse(url, etag = etag, modified = modified, handlers = [proxy])
79 expiry = float(expiryTime) * 3600.
81 currentTime = time.time()
82 # Check if the parse was succesful (number of entries > 0, else do nothing)
83 if len(tmp["entries"])>0:
84 # The etag and modified value should only be updated if the content was not null
90 modified = tmp["modified"]
94 f = urllib2.urlopen(urljoin(tmp["feed"]["link"],"/favicon.ico"))
97 outf = open(self.dir+"/favicon.ico", "w")
103 #traceback.print_exc()
107 #reversedEntries = self.getEntries()
108 #reversedEntries.reverse()
112 tmp["entries"].reverse()
113 for entry in tmp["entries"]:
114 date = self.extractDate(entry)
118 entry["title"] = "No Title"
123 tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
124 "date":date, "link":entry["link"]}
125 id = self.generateUniqueId(tmpEntry)
127 #articleTime = time.mktime(self.entries[id]["dateTuple"])
129 soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
131 baseurl = tmpEntry["link"]
135 filename = self.addImage(configdir, self.key, baseurl, img['src'])
137 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
141 traceback.print_exc()
142 print "Error downloading image %s" % img
143 tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
144 file = open(tmpEntry["contentLink"], "w")
145 file.write(soup.prettify())
147 values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
148 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
152 filename = configdir+self.key+".d/"+id+".html"
153 file = open(filename,"a")
154 utime(filename, None)
156 images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
158 file = open(image[0],"a")
159 utime(image[0], None)
165 rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (2*expiry, expiry))
167 self.removeEntry(row[0])
169 from glob import glob
171 for file in glob(configdir+self.key+".d/*"):
175 # put the two dates into matching format
177 lastmodDate = stats[8]
179 expDate = time.time()-expiry*3
180 # check if image-last-modified-date is outdated
182 if expDate > lastmodDate:
186 #print 'Removing', file
188 remove(file) # commented out for testing
192 print 'Could not remove', file
193 return (currentTime, etag, modified)
195 def setEntryRead(self, id):
196 self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
199 def setEntryUnread(self, id):
200 self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
203 def isEntryRead(self, id):
204 read_status = self.db.execute("SELECT read FROM feed WHERE id=?;", (id,) ).fetchone()[0]
205 return read_status==1 # Returns True if read==1, and False if read==0
207 def getTitle(self, id):
208 return self.db.execute("SELECT title FROM feed WHERE id=?;", (id,) ).fetchone()[0]
210 def getContentLink(self, id):
211 return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
213 def getExternalLink(self, id):
214 return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
216 def getDate(self, id):
217 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
218 return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
220 def getDateTuple(self, id):
221 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
222 return time.localtime(dateStamp)
224 def getDateStamp(self, id):
225 return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
227 def generateUniqueId(self, entry):
228 return getId(str(entry["date"]) + str(entry["title"]))
231 rows = self.db.execute("SELECT id FROM feed;").fetchall()
238 def getNextId(self, id):
240 index = ids.index(id)
241 return ids[(index+1)%len(ids)]
243 def getPreviousId(self, id):
245 index = ids.index(id)
246 return ids[(index-1)%len(ids)]
248 def getNumberOfUnreadItems(self):
249 return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
251 def getNumberOfEntries(self):
252 return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
254 def getArticle(self, entry):
255 #self.setEntryRead(id)
256 #entry = self.entries[id]
257 title = entry['title']
258 #content = entry.get('content', entry.get('summary_detail', {}))
259 content = entry["content"]
262 date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
264 #text = '''<div style="color: black; background-color: white;">'''
265 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
266 text += "<html><head><title>" + title + "</title>"
267 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
268 #text += '<style> body {-webkit-user-select: none;} </style>'
269 text += '</head><body><div><a href=\"' + link + '\">' + title + "</a>"
270 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
271 text += "<BR /><BR />"
273 text += "</body></html>"
276 def getContent(self, id):
277 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
279 file = open(self.entries[id]["contentLink"])
280 content = file.read()
283 content = "Content unavailable"
286 def extractDate(self, entry):
287 if entry.has_key("updated_parsed"):
288 return time.mktime(entry["updated_parsed"])
289 elif entry.has_key("published_parsed"):
290 return time.mktime(entry["published_parsed"])
294 def extractContent(self, entry):
296 if entry.has_key('summary'):
297 content = entry.get('summary', '')
298 if entry.has_key('content'):
299 if len(entry.content[0].value) > len(content):
300 content = entry.content[0].value
302 content = entry.get('description', '')
305 def removeEntry(self, id):
306 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
309 os.remove(contentLink)
311 print "File not found for deletion: %s" % contentLink
312 self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
313 self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
316 class ArchivedArticles(Feed):
317 def addArchivedArticle(self, title, link, date, configdir):
318 id = self.generateUniqueId({"date":date, "title":title})
319 values = (id, title, None, date, 0, link, 0)
320 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
323 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
324 currentTime = time.time()
325 rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
329 f = urllib2.urlopen(link)
330 #entry["content"] = f.read()
333 soup = BeautifulSoup(html)
337 filename = self.addImage(configdir, self.key, baseurl, img['src'])
339 contentLink = configdir+self.key+".d/"+id+".html"
340 file = open(contentLink, "w")
341 file.write(soup.prettify())
344 self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
346 return (currentTime, None, None)
348 def purgeReadArticles(self):
349 rows = self.db.execute("SELECT id FROM feed WHERE read=0;")
352 self.removeEntry(row[0])
354 def removeArticle(self, id):
358 # Lists all the feeds in a dictionary, and expose the data
359 def __init__(self, configdir):
360 self.configdir = configdir
362 self.db = sqlite3.connect("%s/feeds.db" % self.configdir)
365 self.db.execute("create table feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text);")
366 if isfile(self.configdir+"feeds.pickle"):
367 self.importOldFormatFeeds()
369 self.addFeed("Maemo News", "http://maemo.org/news/items.xml")
371 # Table already created
374 def importOldFormatFeeds(self):
375 """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
377 listing = rss.Listing(self.configdir)
379 for id in listing.getListOfFeeds():
382 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None")
383 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified) VALUES (?, ?, ? ,? ,? ,?, ?, ?);", values)
386 feed = listing.getFeed(id)
387 new_feed = self.getFeed(id)
389 items = feed.getIds()[:]
392 if feed.isEntryRead(item):
396 values = (item, feed.getTitle(item), feed.getContentLink(item), time.time(), time.time(), feed.getExternalLink(item), read_status)
397 new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
399 images = feed.getImages(item)
401 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
403 self.updateUnread(id)
406 traceback.print_exc()
407 remove(self.configdir+"feeds.pickle")
410 def addArchivedArticle(self, key, index):
411 feed = self.getFeed(key)
412 title = feed.getTitle(index)
413 link = feed.getExternalLink(index)
414 date = feed.getDate(index)
415 count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
417 self.addFeed("Archived Articles", "", id="ArchivedArticles")
419 archFeed = self.getFeed("ArchivedArticles")
420 archFeed.addArchivedArticle(title, link, date, self.configdir)
421 self.updateUnread("ArchivedArticles")
423 def updateFeed(self, key, expiryTime=24, proxy=None, imageCache=False):
424 feed = self.getFeed(key)
425 db = sqlite3.connect("%s/feeds.db" % self.configdir)
426 (url, etag, modified) = db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
427 (updateTime, etag, modified) = feed.updateFeed(self.configdir, url, etag, eval(modified), expiryTime, proxy, imageCache)
428 db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, str(modified), key) )
431 def getFeed(self, key):
432 if key == "ArchivedArticles":
433 return ArchivedArticles(self.configdir, key)
434 return Feed(self.configdir, key)
436 def editFeed(self, key, title, url):
437 self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
440 def getFeedUpdateTime(self, key):
441 return time.ctime(self.db.execute("SELECT updateTime FROM feeds WHERE id=?;", (key,)).fetchone()[0])
443 def getFeedNumberOfUnreadItems(self, key):
444 return self.db.execute("SELECT unread FROM feeds WHERE id=?;", (key,)).fetchone()[0]
446 def getFeedTitle(self, key):
447 return self.db.execute("SELECT title FROM feeds WHERE id=?;", (key,)).fetchone()[0]
449 def getFeedUrl(self, key):
450 return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
452 def getListOfFeeds(self):
453 rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
460 def getSortedListOfKeys(self, order):
461 if order == "Most unread":
462 tmp = "ORDER BY unread"
463 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
464 elif order == "Least unread":
465 tmp = "ORDER BY unread DESC"
466 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
467 elif order == "Most recent":
468 tmp = "ORDER BY updateTime"
469 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
470 elif order == "Least recent":
471 tmp = "ORDER BY updateTime DESC"
472 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
473 else: # order == "Manual" or invalid value...
474 tmp = "ORDER BY rank"
475 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
476 sql = "SELECT id FROM feeds " + tmp
477 rows = self.db.execute(sql)
484 def getFavicon(self, key):
485 filename = "%s%s.d/favicon.ico" % (self.configdir, key)
491 def updateUnread(self, key):
492 feed = self.getFeed(key)
493 self.db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
496 def addFeed(self, title, url, id=None):
499 count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
501 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
504 values = (id, title, url, 0, 0, max_rank+1, None, "None")
505 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified) VALUES (?, ?, ? ,? ,? ,?, ?, ?);", values)
507 # Ask for the feed object, it will create the necessary tables
513 def removeFeed(self, key):
514 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
515 self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
516 self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
519 if isdir(self.configdir+key+".d/"):
520 rmtree(self.configdir+key+".d/")
523 #def saveConfig(self):
524 # self.listOfFeeds["feedingit-order"] = self.sortedKeys
525 # file = open(self.configdir+"feeds.pickle", "w")
526 # pickle.dump(self.listOfFeeds, file)
529 def moveUp(self, key):
530 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
532 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
533 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
536 def moveDown(self, key):
537 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
538 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
540 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
541 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )