# ============================================================================
# Name : FeedingIt.py
# Author : Yves Marcoz
-# Version : 0.5.0
+# Version : 0.5.4
# Description : Simple RSS Reader
# ============================================================================
-from os.path import isfile
-from os.path import isdir
+from os.path import isfile, isdir
from shutil import rmtree
-from os import mkdir
+from os import mkdir, remove, utime
import pickle
import md5
import feedparser
import time
import urllib2
from BeautifulSoup import BeautifulSoup
-from urlparse import urlparse
+from urlparse import urljoin
#CONFIGDIR="/home/user/.feedingit/"
def getId(string):
return md5.new(string).hexdigest()
+#def getProxy():
+# import gconf
+# if gconf.client_get_default().get_bool('/system/http_proxy/use_http_proxy'):
+# port = gconf.client_get_default().get_int('/system/http_proxy/port')
+# http = gconf.client_get_default().get_string('/system/http_proxy/host')
+# proxy = proxy = urllib2.ProxyHandler( {"http":"http://%s:%s/"% (http,port)} )
+# return (True, proxy)
+# return (False, None)
+
+# Enable proxy support for images and ArchivedArticles
+#(proxy_support, proxy) = getProxy()
+#if proxy_support:
+# opener = urllib2.build_opener(proxy)
+# urllib2.install_opener(opener)
+
# Entry = {"title":XXX, "content":XXX, "date":XXX, "link":XXX, images = [] }
class ImageHandler:
def __init__(self, configdir):
- self.configdir = configdir
- self.images = {}
-
- def addImage(self, key, baseurl, url):
- filename = self.configdir+key+".d/"+getId(url)
- if not isfile(filename):
- try:
- if url.startswith("http"):
- f = urllib2.urlopen(url)
- else:
- f = urllib2.urlopen(baseurl+"/"+url)
- outf = open(filename, "w")
- outf.write(f.read())
- f.close()
- outf.close()
- except:
- print "Could not download" + url
- if url in self.images:
- self.images[url] += 1
- else:
- self.images[url] = 1
- return "file://" + filename
-
- def removeImage(self, key, url):
- filename = self.configdir+key+".d/"+getId(url)
- self.images[url] -= 1
- if self.images[url] == 0:
- os.remove(filename)
- del self.images[url]
-
-class UnreadTracker:
- def __init__(self):
- self.readItems = {}
- self.countUnread
-
- def setEntryUnread(self, id):
- if self.readItems.has_key(id):
- if self.readItems[id]==True:
- self.countUnread = self.countUnread + 1
- self.readItems[id] = False
- else:
- self.readItems[id] = False
- self.countUnread = self.countUnread + 1
-
- def setEntryRead(self, id):
- if self.readItems[id]==False:
- self.countUnread = self.countUnread - 1
- self.readItems[id] = True
-
- def isRead(self, id):
- return self.readItems[id]
-
- def removeEntry(self, id):
- if self.readItems[id]==False:
- self.countUnread = self.countUnread - 1
- del self.readItems[id]
+ pass
class Feed:
- def __init__(self, uniqueId, name, url, imageHandler):
+ def __init__(self, uniqueId, name, url):
self.titles = []
self.entries = {}
self.ids = []
self.countUnread = 0
self.updateTime = "Never"
self.uniqueId = uniqueId
- self.imageHandler = imageHandler
+ self.etag = None
+ self.modified = None
+
+ def addImage(self, configdir, key, baseurl, url):
+ filename = configdir+key+".d/"+getId(url)
+ if not isfile(filename):
+ try:
+ #if url.startswith("http"):
+ # f = urllib2.urlopen(url)
+ #else:
+ f = urllib2.urlopen(urljoin(baseurl,url))
+ outf = open(filename, "w")
+ outf.write(f.read())
+ f.close()
+ outf.close()
+ except:
+ print "Could not download " + url
+ else:
+ #open(filename,"a").close() # "Touch" the file
+ file = open(filename,"a")
+ utime(filename, None)
+ file.close()
+ return filename
def editFeed(self, url):
self.url = url
pass
return self.countUnread
- def updateFeed(self, configdir, expiryTime=24, proxy=None):
+ def updateFeed(self, configdir, expiryTime=24, proxy=None, imageCache=False):
# Expiry time is in hours
if proxy == None:
- tmp=feedparser.parse(self.url)
+ tmp=feedparser.parse(self.url, etag = self.etag, modified = self.modified)
else:
- tmp=feedparser.parse(self.url, handlers = [proxy])
+ tmp=feedparser.parse(self.url, etag = self.etag, modified = self.modified, handlers = [proxy])
+ expiry = float(expiryTime) * 3600.
+
# Check if the parse was succesful (number of entries > 0, else do nothing)
if len(tmp["entries"])>0:
+ # The etag and modified value should only be updated if the content was not null
+ try:
+ self.etag = tmp["etag"]
+ except KeyError:
+ self.etag = None
+ try:
+ self.modified = tmp["modified"]
+ except KeyError:
+ self.modified = None
+ #if len(tmp["entries"])>0:
+ if not isdir(configdir+self.uniqueId+".d"):
+ mkdir(configdir+self.uniqueId+".d")
+ try:
+ f = urllib2.urlopen(urljoin(tmp["feed"]["link"],"/favicon.ico"))
+ data = f.read()
+ f.close()
+ outf = open(configdir+self.uniqueId+".d/favicon.ico", "w")
+ outf.write(data)
+ outf.close()
+ del data
+ except:
+ #import traceback
+ #traceback.print_exc()
+ pass
+
+
#reversedEntries = self.getEntries()
#reversedEntries.reverse()
+
+ currentTime = time.time()
tmpEntries = {}
tmpIds = []
for entry in tmp["entries"]:
(dateTuple, date) = self.extractDate(entry)
+ try:
+ entry["title"]
+ except:
+ entry["title"] = "No Title"
+ try:
+ entry["link"]
+ except:
+ entry["link"] = ""
tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
"date":date, "dateTuple":dateTuple, "link":entry["link"], "images":[] }
id = self.generateUniqueId(tmpEntry)
- tmpEntries[id] = tmpEntry
- tmpIds.append(id)
- for entryId in self.getIds():
- currentTime = time.time()
- expiry = float(expiryTime) * 3600.
- articleTime = time.mktime(self.entries[entryId]["dateTuple"])
- if currentTime - articleTime < expiry:
- if not entryId in tmpIds:
- tmpEntries[entryId] = self.entries[entryId]
- tmpIds.append(entryId)
+
+ #articleTime = time.mktime(self.entries[id]["dateTuple"])
+ if not id in self.ids:
+ soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
+ images = soup('img')
+ baseurl = tmpEntry["link"]
+ if imageCache:
+ for img in images:
+ try:
+ filename = self.addImage(configdir, self.uniqueId, baseurl, img['src'])
+ img['src']=filename
+ tmpEntry["images"].append(filename)
+ except:
+ print "Error downloading image %s" % img
+ tmpEntry["contentLink"] = configdir+self.uniqueId+".d/"+id+".html"
+ file = open(tmpEntry["contentLink"], "w")
+ file.write(soup.prettify())
+ file.close()
+ tmpEntries[id] = tmpEntry
+ tmpIds.append(id)
+ if id not in self.readItems:
+ self.readItems[id] = False
else:
- if (not self.isEntryRead(entryId)) and (currentTime - articleTime < 2*expiry):
+ try:
+ filename = configdir+self.uniqueId+".d/"+id+".html"
+ file = open(filename,"a")
+ utime(filename, None)
+ file.close()
+ for image in self.entries[id]["images"]:
+ file = open(image,"a")
+ utime(image, None)
+ file.close()
+ except:
+ pass
+ tmpEntries[id] = self.entries[id]
+ tmpIds.append(id)
+
+ oldIds = self.ids[:]
+ for entryId in oldIds:
+ if not entryId in tmpIds:
+ try:
+ articleTime = time.mktime(self.entries[entryId]["dateTuple"])
+ if (currentTime - articleTime > 2*expiry):
+ self.removeEntry(entryId)
+ continue
+ if (currentTime - articleTime > expiry) and (self.isEntryRead(entryId)):
+ # Entry is over 24 hours, and already read
+ self.removeEntry(entryId)
+ continue
tmpEntries[entryId] = self.entries[entryId]
tmpIds.append(entryId)
-
+ except:
+ print "Error purging old articles %s" % entryId
+ self.removeEntry(entryId)
+
self.entries = tmpEntries
self.ids = tmpIds
- self.countUnread = 0
- # Initialize the new articles to unread
- tmpReadItems = self.readItems
- self.readItems = {}
- for id in self.getIds():
- if not tmpReadItems.has_key(id):
+ tmpUnread = 0
+
+
+ ids = self.ids[:]
+ for id in ids:
+ if not self.readItems.has_key(id):
self.readItems[id] = False
- else:
- self.readItems[id] = tmpReadItems[id]
if self.readItems[id]==False:
- self.countUnread = self.countUnread + 1
+ tmpUnread = tmpUnread + 1
+ keys = self.readItems.keys()
+ for id in keys:
+ if not id in self.ids:
+ del self.readItems[id]
del tmp
+ self.countUnread = tmpUnread
self.updateTime = time.asctime()
self.saveFeed(configdir)
+ from glob import glob
+ from os import stat
+ for file in glob(configdir+self.uniqueId+".d/*"):
+ #
+ stats = stat(file)
+ #
+ # put the two dates into matching format
+ #
+ lastmodDate = stats[8]
+ #
+ expDate = time.time()-expiry*3
+ # check if image-last-modified-date is outdated
+ #
+ if expDate > lastmodDate:
+ #
+ try:
+ #
+ #print 'Removing', file
+ #
+ remove(file) # commented out for testing
+ #
+ except OSError:
+ #
+ print 'Could not remove', file
+
def extractContent(self, entry):
content = ""
self.readItems[id] = False
def isEntryRead(self, id):
- return self.readItems[id]
+ # Check if an entry is read; return False if the read
+ # status of an entry is unknown (id not in readItems)
+ return self.readItems.get(id, False)
def getTitle(self, id):
return self.entries[id]["title"]
- def getLink(self, id):
+ def getContentLink(self, id):
if self.entries[id].has_key("contentLink"):
return self.entries[id]["contentLink"]
return self.entries[id]["link"]
+ def getExternalLink(self, id):
+ return self.entries[id]["link"]
+
def getDate(self, id):
return self.entries[id]["date"]
return self.entries[id]["content"]
def removeEntry(self, id):
- entry = self.entries[id]
- for img in entry["images"]:
- self.imageHandler.removeImage(self.uniqueId, img)
- if entry.has_key["contentLink"]:
- os.remove(entry["contentLink"])
- self.entries.remove(id)
- self.ids.remove(id)
- if self.readItems[id]==False:
- self.countUnread = self.countUnread - 1
- self.readItems.remove(id)
+ #try:
+ if self.entries.has_key(id):
+ entry = self.entries[id]
+
+ if entry.has_key("contentLink"):
+ try:
+ remove(entry["contentLink"]) #os.remove
+ except:
+ print "File not found for deletion: %s" % entry["contentLink"]
+ del self.entries[id]
+ else:
+ print "Entries has no %s key" % id
+ if id in self.ids:
+ self.ids.remove(id)
+ else:
+ print "Ids has no %s key" % id
+ if self.readItems.has_key(id):
+ if self.readItems[id]==False:
+ self.countUnread = self.countUnread - 1
+ del self.readItems[id]
+ else:
+ print "ReadItems has no %s key" % id
+ #except:
+ # print "Error removing entry %s" %id
- def getArticle(self, id):
- self.setEntryRead(id)
- entry = self.entries[id]
+ def getArticle(self, entry):
+ #self.setEntryRead(id)
+ #entry = self.entries[id]
title = entry['title']
#content = entry.get('content', entry.get('summary_detail', {}))
content = entry["content"]
self.saveFeed(configdir)
self.saveUnread(configdir)
- def updateFeed(self, configdir, expiryTime=24, proxy=None):
+ def updateFeed(self, configdir, expiryTime=24, proxy=None, imageCache=False):
for id in self.getIds():
entry = self.entries[id]
if not entry["downloaded"]:
html = f.read()
f.close()
soup = BeautifulSoup(html)
- images = soup.body('img')
- baseurl = ''.join(urlparse(entry["link"])[:-1])
+ images = soup('img')
+ baseurl = entry["link"]
for img in images:
- filename = self.imageHandler.addImage(self.uniqueId, baseurl, img['src'])
- #filename = configdir+self.uniqueId+".d/"+getId(img['src'])
- #if not isfile(filename):
- # try:
- # if img['src'].startswith("http"):
- # f = urllib2.urlopen(img['src'])
- # else:
- # f = urllib2.urlopen(baseurl+"/"+img['src'])
- # #print baseurl+"/"+img['src']
- # print filename
- # outf = open(filename, "w")
- # outf.write(f.read())
- # f.close()
- # outf.close()
- # except:
- # print "Could not download" + img['src']
+ filename = self.addImage(configdir, self.uniqueId, baseurl, img['src'])
img['src']=filename
- entry["images"].append(filename)
entry["contentLink"] = configdir+self.uniqueId+".d/"+id+".html"
file = open(entry["contentLink"], "w")
file.write(soup.prettify())
self.setEntryUnread(id)
#except:
# pass
- currentTime = time.time()
- expiry = float(expiryTime) * 3600
- if currentTime - entry["time"] > expiry:
- if self.isEntryRead(id):
- self.removeEntry(id)
- else:
- if currentTime - entry["time"] > 2*expiry:
- self.removeEntry(id)
+ #currentTime = time.time()
+ #expiry = float(expiryTime) * 3600
+ #if currentTime - entry["time"] > expiry:
+ # if self.isEntryRead(id):
+ # self.removeEntry(id)
+ # else:
+ # if currentTime - entry["time"] > 2*expiry:
+ # self.removeEntry(id)
self.updateTime = time.asctime()
self.saveFeed(configdir)
+
+ def purgeReadArticles(self):
+ ids = self.getIds()
+ for id in ids:
+ entry = self.entries[id]
+ if self.isEntryRead(id):
+ self.removeEntry(id)
+
+ def removeArticle(self, id):
+ self.removeEntry(id)
def getArticle(self, index):
self.setEntryRead(index)
self.listOfFeeds = pickle.load(file)
file.close()
else:
- self.listOfFeeds = {getId("Slashdot"):{"title":"Slashdot", "url":"http://rss.slashdot.org/Slashdot/slashdot", "unread":0, "updateTime":"Never"}, }
- if isfile(self.configdir+"images.pickle"):
- file = open(self.configdir+"images.pickle")
- self.imageHandler = pickle.load(file)
- file.close()
- else:
- self.imageHandler = ImageHandler(self.configdir)
+ self.listOfFeeds = {getId("Maemo News"):{"title":"Maemo News", "url":"http://maemo.org/news/items.xml", "unread":0, "updateTime":"Never"}, }
if self.listOfFeeds.has_key("font"):
del self.listOfFeeds["font"]
if self.listOfFeeds.has_key("feedingit-order"):
self.sortedKeys.remove("font")
self.sortedKeys.sort(key=lambda obj: self.getFeedTitle(obj))
list = self.sortedKeys[:]
- #for key in list:
- # try:
- # self.loadFeed(key)
- # except:
- #import traceback
- #if key.startswith('d8'):
- #traceback.print_exc()
- # self.sortedKeys.remove(key)
- #print key
- #print key in self.sortedKeys
- #print "d8eb3f07572892a7b5ed9c81c5bb21a2" in self.sortedKeys
- #print self.listOfFeeds["d8eb3f07572892a7b5ed9c81c5bb21a2"]
- self.closeCurrentlyDisplayedFeed()
- #self.saveConfig()
+ #self.closeCurrentlyDisplayedFeed()
def addArchivedArticle(self, key, index):
feed = self.getFeed(key)
title = feed.getTitle(index)
- link = feed.getLink(index)
+ link = feed.getExternalLink(index)
date = feed.getDateTuple(index)
if not self.listOfFeeds.has_key("ArchivedArticles"):
self.listOfFeeds["ArchivedArticles"] = {"title":"Archived Articles", "url":"", "unread":0, "updateTime":"Never"}
file.close()
try:
feed.uniqueId
- feed.imageHandler
except AttributeError:
feed.uniqueId = getId(feed.name)
- feed.imageHandler = self.imageHandler
+ try:
+ del feed.imageHandler
+ except:
+ pass
+ try:
+ feed.etag
+ except AttributeError:
+ feed.etag = None
+ try:
+ feed.modified
+ except AttributeError:
+ feed.modified = None
#feed.reloadUnread(self.configdir)
else:
#print key
title = self.listOfFeeds[key]["title"]
url = self.listOfFeeds[key]["url"]
if key == "ArchivedArticles":
- feed = ArchivedArticles("ArchivedArticles", title, url, self.imageHandler)
+ feed = ArchivedArticles("ArchivedArticles", title, url)
else:
- feed = Feed(getId(title), title, url, self.imageHandler)
+ feed = Feed(getId(title), title, url)
return feed
- def updateFeeds(self, expiryTime=24, proxy=None):
+ def updateFeeds(self, expiryTime=24, proxy=None, imageCache=False):
for key in self.getListOfFeeds():
feed = self.loadFeed(key)
- feed.updateFeed(self.configdir, expiryTime, proxy)
+ feed.updateFeed(self.configdir, expiryTime, proxy, imageCache)
self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
- def updateFeed(self, key, expiryTime=24, proxy=None):
+ def updateFeed(self, key, expiryTime=24, proxy=None, imageCache=False):
feed = self.getFeed(key)
- feed.updateFeed(self.configdir, expiryTime, proxy)
+ feed.updateFeed(self.configdir, expiryTime, proxy, imageCache)
self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
feed.editFeed(url)
def getFeed(self, key):
- feed = self.loadFeed(key)
- feed.reloadUnread(self.configdir)
+ try:
+ feed = self.loadFeed(key)
+ feed.reloadUnread(self.configdir)
+ except:
+ # If the feed file gets corrupted, we need to reset the feed.
+ import traceback
+ traceback.print_exc()
+ import dbus
+ bus = dbus.SessionBus()
+ remote_object = bus.get_object("org.freedesktop.Notifications", # Connection name
+ "/org/freedesktop/Notifications" # Object's path
+ )
+ iface = dbus.Interface(remote_object, 'org.freedesktop.Notifications')
+ iface.SystemNoteInfoprint("Error opening feed %s, it has been reset." % self.getFeedTitle(key))
+ if isdir(self.configdir+key+".d/"):
+ rmtree(self.configdir+key+".d/")
+ feed = self.loadFeed(key)
return feed
def getFeedUpdateTime(self, key):
def getListOfFeeds(self):
return self.sortedKeys
- #def getNumberOfUnreadItems(self, key):
- # if self.listOfFeeds.has_key("unread"):
- # return self.listOfFeeds[key]["unread"]
- # else:
- # return 0
+ def getFavicon(self, key):
+ filename = self.configdir+key+".d/favicon.ico"
+ if isfile(filename):
+ return filename
+ else:
+ return False
def addFeed(self, title, url):
if not self.listOfFeeds.has_key(getId(title)):
file = open(self.configdir+"feeds.pickle", "w")
pickle.dump(self.listOfFeeds, file)
file.close()
- file = open(self.configdir+"images.pickle", "w")
- pickle.dump(self.imageHandler, file)
- file.close()
def moveUp(self, key):
index = self.sortedKeys.index(key)
index2 = (index+1)%len(self.sortedKeys)
self.sortedKeys[index] = self.sortedKeys[index2]
self.sortedKeys[index2] = key
-
- def setCurrentlyDisplayedFeed(self, key):
- self.currentlyDisplayedFeed = key
- def closeCurrentlyDisplayedFeed(self):
- self.currentlyDisplayedFeed = False
- def getCurrentlyDisplayedFeed(self):
- return self.currentlyDisplayedFeed
if __name__ == "__main__":
listing = Listing('/home/user/.feedingit/')
#list.reverse()
for key in list:
if key.startswith('d8'):
- print listing.getFeedUpdateTime(key)
\ No newline at end of file
+ print listing.getFeedUpdateTime(key)