Refactoring feed
[feedingit] / src / rss.py
1 #!/usr/bin/env python2.5
2
3
4 # Copyright (c) 2007-2008 INdT.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
9 #
10 #  This program is distributed in the hope that it will be useful,
11 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 #  GNU Lesser General Public License for more details.
14 #
15 #  You should have received a copy of the GNU Lesser General Public License
16 #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 #
18
19 # ============================================================================
20 # Name        : FeedingIt.py
21 # Author      : Yves Marcoz
22 # Version     : 0.5.0
23 # Description : Simple RSS Reader
24 # ============================================================================
25
26 from os.path import isfile
27 from os.path import isdir
28 from shutil import rmtree
29 import pickle
30 import md5
31 import feedparser
32 import time
33 import urllib2
34
35 #CONFIGDIR="/home/user/.feedingit/"
36
37 def getId(string):
38     return md5.new(string).hexdigest()
39
40 class Entry:
41     def __init__(self, title, content, date, link):
42         self.title = title
43         self.content = content
44         self.date = date
45         self.link = link
46         
47 # Entry = {"title":XXX, "content":XXX, "date":XXX, "link":XXX, images = [] }
48
49 class Feed:
50     def __init__(self, name, url):
51         self.titles = []
52         self.entries = {}
53         self.ids = []
54         self.readItems = {}
55         self.name = name
56         self.url = url
57         self.updateTime = "Never"
58
59     def editFeed(self, url):
60         self.url = url
61
62     def saveFeed(self, configdir):
63         if not isdir(configdir+getId(self.name)+".d"):
64              mkdir(configdir+getId(self.name)+".d")
65         file = open(configdir+getId(self.name)+".d/feed", "w")
66         pickle.dump(self, file )
67         file.close()
68
69     def updateFeed(self, configdir, expiryTime=24):
70         # Expiry time is in hours
71         tmp=feedparser.parse(self.url)
72         # Check if the parse was succesful (number of entries > 0, else do nothing)
73         if len(tmp["entries"])>0:
74            #reversedEntries = self.getEntries()
75            #reversedEntries.reverse()
76            tmpEntries = []
77            tmpIds = []
78            for entry in tmp["entries"]:
79                (dateTuple, date) = self.extractDate(entry)
80                tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
81                             "date":date, "dateTuple":dateTuple, "link":entry["link"], images = [] }
82                id = self.generateUniqueId(tmpEntry)
83                tmpEntries[id] = tmpEntry
84                tmpIds.append(id)               
85            for entryId in self.getEntryIds():
86                currentTime = time.time()
87                expiry = float(expiryTime) * 3600.
88                articleTime = time.mktime(self.entries[endtryId]["dateTuple"])
89                if currentTime - articleTime < expiry:
90                    if not id in tmpIds:
91                        tmpEntries[entryId] = self.entries[endtryId]
92                        tmpIds.append(entryId)
93                    
94            self.entries = tmpEntries
95            self.ids = tmpIds
96            self.countUnread = 0
97            # Initialize the new articles to unread
98            tmpReadItems = self.readItems
99            self.readItems = {}
100            for index in range(self.getNumberOfEntries()):
101                if not tmpReadItems.has_key(self.getUniqueId(index)):
102                    self.readItems[self.getUniqueId(index)] = False
103                else:
104                    self.readItems[self.getUniqueId(index)] = tmpReadItems[self.getUniqueId(index)]
105                if self.readItems[self.getUniqueId(index)]==False:
106                   self.countUnread = self.countUnread + 1
107            del tmp
108            self.updateTime = time.asctime()
109            self.saveFeed(configdir)
110
111     def extractContent(self, entry):
112         if entry.has_key('summary'):
113             content = entry.get('summary', '')
114         if entry.has_key('content'):
115             if len(entry.content[0].value) > len(content):
116                 content = entry.content[0].value
117         if content == "":
118             content = entry.get('description', '')
119         return content
120         
121     def extractDate(self, entry):
122         if entry.has_key("updated_parsed"):
123             date1 = entry["updated_parsed"]
124             date = time.strftime("%a, %d %b %Y %H:%M:%S",entry["updated_parsed"])
125         elif entry.has_key("published_parsed"):
126             date1 = entry["published_parsed"]
127             date = time.strftime("%a, %d %b %Y %H:%M:%S", entry["published_parsed"])
128         else:
129             date1= ""
130             date = ""
131         return (date1, date)
132
133     def setEntryRead(self, id):
134         if self.readItems[id]==False:
135             self.countUnread = self.countUnread - 1
136             self.readItems[id] = True
137             
138     def setEntryUnread(self, id):
139         if self.readItems[id]==True:
140             self.countUnread = self.countUnread + 1
141             self.readItems[id] = False
142     
143     def isEntryRead(self, id):
144         return self.readItems[id]
145     
146     def getTitle(self, id):
147         return self.entries[id]["title"]
148     
149     def getLink(self, id):
150         return self.entries[id]["link"]
151     
152     def getDate(self, id):
153         return self.entries[id]["date"]
154  
155     def getUniqueId(self, index):
156         return self.ids[index]
157     
158     def generateUniqueId(self, entry)
159         return getId(entry["date"] + entry["title"])
160     
161     def getUpdateTime(self):
162         return self.updateTime
163     
164     def getEntries(self):
165         return self.entries
166     
167     def getNumberOfUnreadItems(self):
168         return self.countUnread
169     
170     def getNumberOfEntries(self):
171         return len(self.ids)
172     
173     def getItem(self, id):
174         try:
175             return self.entries[id]
176         except:
177             return []
178     
179     def getContent(self, id):
180         return self.entries[id]["content"]
181     
182     def getArticle(self, id):
183         self.setEntryRead(id)
184         entry = self.entries[id]
185         title = entry['title']
186         #content = entry.get('content', entry.get('summary_detail', {}))
187         content = entry["content"]
188
189         link = entry['link']
190         date = entry["date"]
191
192         #text = '''<div style="color: black; background-color: white;">'''
193         text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
194         text += "<html><head><title>" + title + "</title>"
195         text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
196         text += '<style> body {-webkit-user-select: none;} </style></head>'
197         text += '<body><div><a href=\"' + link + '\">' + title + "</a>"
198         text += "<BR /><small><i>Date: " + date + "</i></small></div>"
199         text += "<BR /><BR />"
200         text += content
201         text += "</body></html>"
202         return text
203         
204
205 class FeedX:
206     # Contains all the info about a single feed (articles, ...), and expose the data
207     def __init__(self, name, url):
208         self.entries = []
209         self.readItems = {}
210         self.countUnread = 0
211         self.name = name
212         self.url = url
213         self.updateTime = "Never"
214
215     def editFeed(self, url):
216         self.url = url
217
218     def saveFeed(self, configdir):
219         file = open(configdir+getId(self.name), "w")
220         pickle.dump(self, file )
221         file.close()
222
223     def updateFeed(self, configdir, expiryTime=24):
224         # Expiry time is in hours
225         tmp=feedparser.parse(self.url)
226         # Check if the parse was succesful (number of entries > 0, else do nothing)
227         if len(tmp["entries"])>0:
228            #reversedEntries = self.getEntries()
229            #reversedEntries.reverse()
230            tmpIds = []
231            for entry in tmp["entries"]:
232                tmpIds.append(self.getUniqueId(-1, entry))
233            for entry in self.getEntries():
234                currentTime = time.time()
235                expiry = float(expiryTime) * 3600.
236                if entry.has_key("updated_parsed"):
237                    articleTime = time.mktime(entry["updated_parsed"])
238                    if currentTime - articleTime < expiry:
239                        id = self.getUniqueId(-1, entry)
240                        if not id in tmpIds:
241                            tmp["entries"].append(entry)
242                    
243            self.entries = tmp["entries"]
244            self.countUnread = 0
245            # Initialize the new articles to unread
246            tmpReadItems = self.readItems
247            self.readItems = {}
248            for index in range(self.getNumberOfEntries()):
249                if not tmpReadItems.has_key(self.getUniqueId(index)):
250                    self.readItems[self.getUniqueId(index)] = False
251                else:
252                    self.readItems[self.getUniqueId(index)] = tmpReadItems[self.getUniqueId(index)]
253                if self.readItems[self.getUniqueId(index)]==False:
254                   self.countUnread = self.countUnread + 1
255            del tmp
256            self.updateTime = time.asctime()
257            self.saveFeed(configdir)
258     
259     def setEntryRead(self, index):
260         if self.readItems[self.getUniqueId(index)]==False:
261             self.countUnread = self.countUnread - 1
262             self.readItems[self.getUniqueId(index)] = True
263             
264     def setEntryUnread(self, index):
265         if self.readItems[self.getUniqueId(index)]==True:
266             self.countUnread = self.countUnread + 1
267             self.readItems[self.getUniqueId(index)] = False
268     
269     def isEntryRead(self, index):
270         return self.readItems[self.getUniqueId(index)]
271     
272     def getTitle(self, index):
273         return self.entries[index]["title"]
274     
275     def getLink(self, index):
276         return self.entries[index]["link"]
277     
278     def getDate(self, index):
279         try:
280             return self.entries[index]["updated_parsed"]
281         except:
282             return time.localtime()
283     
284     def getUniqueId(self, index, entry=None):
285         if index >=0:
286             entry = self.entries[index]
287         if entry.has_key("updated_parsed"):
288             return getId(time.strftime("%a, %d %b %Y %H:%M:%S",entry["updated_parsed"]) + entry["title"])
289         elif entry.has_key("link"):
290             return getId(entry["link"] + entry["title"])
291         else:
292             return getId(entry["title"])
293     
294     def getUpdateTime(self):
295         return self.updateTime
296     
297     def getEntries(self):
298         try:
299             return self.entries
300         except:
301             return []
302     
303     def getNumberOfUnreadItems(self):
304         return self.countUnread
305     
306     def getNumberOfEntries(self):
307         return len(self.entries)
308     
309     def getItem(self, index):
310         try:
311             return self.entries[index]
312         except:
313             return []
314     
315     def getContent(self, index):
316         content = ""
317         entry = self.entries[index]
318         if entry.has_key('summary'):
319             content = entry.get('summary', '')
320         if entry.has_key('content'):
321             if len(entry.content[0].value) > len(content):
322                 content = entry.content[0].value
323         if content == "":
324             content = entry.get('description', '')
325         return content
326     
327     def getArticle(self, index):
328         self.setEntryRead(index)
329         entry = self.entries[index]
330         title = entry.get('title', 'No title')
331         #content = entry.get('content', entry.get('summary_detail', {}))
332         content = self.getContent(index)
333
334         link = entry.get('link', 'NoLink')
335         if entry.has_key("updated_parsed"):
336             date = time.strftime("%a, %d %b %Y %H:%M:%S",entry["updated_parsed"])
337         elif entry.has_key("published_parsed"):
338             date = time.strftime("%a, %d %b %Y %H:%M:%S", entry["published_parsed"])
339         else:
340             date = ""
341         #text = '''<div style="color: black; background-color: white;">'''
342         text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
343         text += "<html><head><title>" + title + "</title>"
344         text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
345         text += '<style> body {-webkit-user-select: none;} </style></head>'
346         text += '<body><div><a href=\"' + link + '\">' + title + "</a>"
347         text += "<BR /><small><i>Date: " + date + "</i></small></div>"
348         text += "<BR /><BR />"
349         text += content
350         text += "</body></html>"
351         return text
352
353 class ArchivedArticles(Feed):
354     def addArchivedArticle(self, title, link, updated_parsed, configdir):
355         entry = {}
356         entry["title"] = title
357         entry["link"] = link
358         entry["downloaded"] = False
359         entry["summary"] = '<a href=\"' + link + '\">' + title + "</a>"
360         entry["updated_parsed"] = updated_parsed
361         entry["time"] = time.time()
362         self.entries.append(entry)
363         self.readItems[self.getUniqueId(len(self.entries)-1)] = False
364         self.countUnread = self.countUnread + 1
365         self.saveFeed(configdir)
366         #print entry
367         
368     def updateFeed(self, configdir, expiryTime=24):
369         index = 0
370         for entry in self.getEntries():
371             if not entry["downloaded"]:
372                 try:
373                     f = urllib2.urlopen(entry["link"])
374                     entry["summary"] = f.read()
375                     f.close()
376                     if len(entry["summary"]) > 0:
377                         entry["downloaded"] = True
378                         entry["time"] = time.time()
379                         self.setEntryUnread(index)
380                 except:
381                     pass
382             currentTime = time.time()
383             expiry = float(expiryTime) * 3600
384             if currentTime - entry["time"] > expiry:
385                 self.entries.remove(entry)
386             index += 1
387         self.updateTime = time.asctime()
388         self.saveFeed(configdir)
389
390     def getArticle(self, index):
391         self.setEntryRead(index)
392         content = self.getContent(index)
393         return content
394
395
396 class Listing:
397     # Lists all the feeds in a dictionary, and expose the data
398     def __init__(self, configdir):
399         self.configdir = configdir
400         #self.feeds = {}
401         if isfile(self.configdir+"feeds.pickle"):
402             file = open(self.configdir+"feeds.pickle")
403             self.listOfFeeds = pickle.load(file)
404             file.close()
405         else:
406             self.listOfFeeds = {getId("Slashdot"):{"title":"Slashdot", "url":"http://rss.slashdot.org/Slashdot/slashdot", "unread":0, "updateTime":"Never"}, }
407         if self.listOfFeeds.has_key("font"):
408             del self.listOfFeeds["font"]
409         if self.listOfFeeds.has_key("feedingit-order"):
410             self.sortedKeys = self.listOfFeeds["feedingit-order"]
411         else:
412             self.sortedKeys = self.listOfFeeds.keys()
413             if "font" in self.sortedKeys:
414                 self.sortedKeys.remove("font")
415             self.sortedKeys.sort(key=lambda obj: self.getFeedTitle(obj))
416         list = self.sortedKeys[:]
417         #for key in list:
418         #    try:
419         #        self.loadFeed(key)
420         #    except:
421                 #import traceback
422                 #if key.startswith('d8'):
423                 #traceback.print_exc()
424         #        self.sortedKeys.remove(key)
425             #print key
426                 #print key in self.sortedKeys
427         #print "d8eb3f07572892a7b5ed9c81c5bb21a2" in self.sortedKeys
428         #print self.listOfFeeds["d8eb3f07572892a7b5ed9c81c5bb21a2"]
429         self.closeCurrentlyDisplayedFeed()
430         #self.saveConfig()
431
432     def addArchivedArticle(self, key, index):
433         title = self.getFeed(key).getTitle(index)
434         link = self.getFeed(key).getLink(index)
435         date = self.getFeed(key).getDate(index)
436         if not self.listOfFeeds.has_key(getId("Archived Articles")):
437             self.listOfFeeds[getId("Archived Articles")] = {"title":"Archived Articles", "url":""}
438             self.sortedKeys.append(getId("Archived Articles"))
439             self.feeds[getId("Archived Articles")] = ArchivedArticles("Archived Articles", "")
440             self.saveConfig()
441             
442         self.getFeed(getId("Archived Articles")).addArchivedArticle(title, link, date, self.configdir)
443         
444     def loadFeed(self, key):
445             if isfile(self.configdir+key+".d/feed"):
446                 file = open(self.configdir+key)
447                 feed = pickle.load(file)
448                 file.close()
449             else:
450                 #print key
451                 title = self.listOfFeeds[key]["title"]
452                 url = self.listOfFeeds[key]["url"]
453                 feed = Feed(title, url)
454             return feed
455         
456     def updateFeeds(self, expiryTime=24):
457         for key in self.getListOfFeeds():
458             feed = self.loadFeed(key)
459             feed.updateFeed(self.configdir, expiryTime)
460             
461     def updateFeed(self, key, expiryTime=24):
462         feed = self.loadFeed(key)
463         feed.updateFeed(self.configdir, expiryTime)
464         self.listOfFeeds[key]["unread"] = feed.getNumberOfUnreadItems()
465         self.listOfFeeds[key]["updateTime"] = feed.getUpdateTime()
466         
467     def editFeed(self, key, title, url):
468         self.listOfFeeds[key]["title"] = title
469         self.listOfFeeds[key]["url"] = url
470         self.feeds[key].editFeed(url)
471             
472     def getFeed(self, key):
473         return self.feeds[key]
474     
475     def getFeedUpdateTime(self, key):
476         #print self.listOfFeeds.has_key(key)
477         return self.feeds[key].getUpdateTime()
478     
479     def getFeedNumberOfUnreadItems(self, key):
480         return self.listOfFeeds[key]["unread"]
481    
482     def getFeedTitle(self, key):
483         return self.listOfFeeds[key]["title"]
484     
485     def getFeedUrl(self, key):
486         return self.listOfFeeds[key]["url"]
487     
488     def getListOfFeeds(self):
489         return self.sortedKeys
490     
491     def getNumberOfUnreadItems(self, key):
492         if self.listOfFeeds.has_key("unread"):
493            return self.listOfFeeds[key]["unread"]
494         else:
495            return 0
496     
497     def addFeed(self, title, url):
498         if not self.listOfFeeds.has_key(getId(title)):
499             self.listOfFeeds[getId(title)] = {"title":title, "url":url, "read":0}
500             self.sortedKeys.append(getId(title))
501             self.saveConfig()
502             #self.feeds[getId(title)] = Feed(title, url)
503             return True
504         else:
505             return False
506         
507     def removeFeed(self, key):
508         del self.listOfFeeds[key]
509         self.sortedKeys.remove(key)
510         #del self.feeds[key]
511         if isfile(self.configdir+key):
512            rmtree(self.configdir+key+".d/")
513         self.saveConfig()
514     
515     def saveConfig(self):
516         self.listOfFeeds["feedingit-order"] = self.sortedKeys
517         file = open(self.configdir+"feeds.pickle", "w")
518         pickle.dump(self.listOfFeeds, file)
519         file.close()
520         
521     def moveUp(self, key):
522         index = self.sortedKeys.index(key)
523         self.sortedKeys[index] = self.sortedKeys[index-1]
524         self.sortedKeys[index-1] = key
525         
526     def moveDown(self, key):
527         index = self.sortedKeys.index(key)
528         index2 = (index+1)%len(self.sortedKeys)
529         self.sortedKeys[index] = self.sortedKeys[index2]
530         self.sortedKeys[index2] = key
531         
532     def setCurrentlyDisplayedFeed(self, key):
533         self.currentlyDisplayedFeed = key
534     def closeCurrentlyDisplayedFeed(self):
535         self.currentlyDisplayedFeed = False
536     def getCurrentlyDisplayedFeed(self):
537         return self.currentlyDisplayedFeed
538     
539 if __name__ == "__main__":
540     listing = Listing('/home/user/.feedingit/')
541     list = listing.getListOfFeeds()[:]
542         #list.reverse()
543     for key in list:
544         if key.startswith('d8'):
545             print listing.getFeedUpdateTime(key)