When creating a unique id for an article, use all available information.
[feedingit] / src / rss_sqlite.py
1 #!/usr/bin/env python2.5
2
3
4 # Copyright (c) 2007-2008 INdT.
5 # Copyright (c) 2011 Neal H. Walfield
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Lesser General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
10 #
11 #  This program is distributed in the hope that it will be useful,
12 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
13 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 #  GNU Lesser General Public License for more details.
15 #
16 #  You should have received a copy of the GNU Lesser General Public License
17 #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 #
19
20 # ============================================================================
21 # Name        : FeedingIt.py
22 # Author      : Yves Marcoz
23 # Version     : 0.5.4
24 # Description : Simple RSS Reader
25 # ============================================================================
26
27 import sqlite3
28 from os.path import isfile, isdir
29 from shutil import rmtree
30 from os import mkdir, remove, utime
31 import os
32 import md5
33 import feedparser
34 import time
35 import urllib2
36 from BeautifulSoup import BeautifulSoup
37 from urlparse import urljoin
38 from calendar import timegm
39 from updatedbus import get_lock, release_lock
40 import threading
41 import traceback
42 from wc import wc, wc_init
43 import woodchuck
44 from jobmanager import JobManager
45 import mainthread
46 from httpprogresshandler import HTTPProgressHandler
47 import random
48 import sys
49
50 def getId(string):
51     return md5.new(string).hexdigest()
52
53 def download_callback(connection):
54     if JobManager().do_quit:
55         raise KeyboardInterrupt
56
57 def downloader(progress_handler=None, proxy=None):
58     openers = []
59
60     if progress_handler:
61         openers.append (progress_handler)
62     else:
63         openers.append(HTTPProgressHandler(download_callback))
64
65     if proxy:
66         openers.append (proxy)
67
68     return urllib2.build_opener (*openers)
69
70 class Feed:
71     serial_execution_lock = threading.Lock()
72
73     def _getdb(self):
74         try:
75             db = self.tls.db
76         except AttributeError:
77             db = sqlite3.connect("%s/%s.db" % (self.dir, self.key), timeout=120)
78             self.tls.db = db
79         return db
80     db = property(_getdb)
81
82     def __init__(self, configdir, key):
83         self.key = key
84         self.configdir = configdir
85         self.dir = "%s/%s.d" %(self.configdir, self.key)
86         self.tls = threading.local ()
87
88         if not isdir(self.dir):
89             mkdir(self.dir)
90         if not isfile("%s/%s.db" %(self.dir, self.key)):
91             self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
92             self.db.execute("CREATE TABLE images (id text, imagePath text);")
93             self.db.commit()
94
95     def addImage(self, configdir, key, baseurl, url, proxy=None, opener=None):
96         filename = configdir+key+".d/"+getId(url)
97         if not isfile(filename):
98             try:
99                 if not opener:
100                     opener = downloader(proxy=proxy)
101
102                 abs_url = urljoin(baseurl,url)
103                 f = opener.open(abs_url)
104                 outf = open(filename, "w")
105                 outf.write(f.read())
106                 f.close()
107                 outf.close()
108             except (urllib2.HTTPError, urllib2.URLError, IOError), exception:
109                 print ("Could not download image %s: %s"
110                        % (abs_url, str (exception)))
111                 return None
112             except:
113                 exception = sys.exc_info()[0]
114
115                 print "Downloading image: %s" % abs_url
116                 traceback.print_exc()
117
118                 try:
119                     remove(filename)
120                 except OSError:
121                     pass
122
123                 raise exception
124         else:
125             #open(filename,"a").close()  # "Touch" the file
126             file = open(filename,"a")
127             utime(filename, None)
128             file.close()
129         return filename
130
131     def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
132         def doit():
133             def it():
134                 self._updateFeed(configdir, url, etag, modified, expiryTime, proxy, imageCache, postFeedUpdateFunc, *postFeedUpdateFuncArgs)
135             return it
136         JobManager().execute(doit(), self.key, priority=priority)
137
138     def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
139         success = False
140         have_serial_execution_lock = False
141         try:
142             update_lock = None
143             update_lock = get_lock("key")
144             if not update_lock:
145                 # Someone else is doing an update.
146                 return
147
148             download_start = time.time ()
149
150             progress_handler = HTTPProgressHandler(download_callback)
151
152             openers = [progress_handler]
153             if proxy:
154                 openers.append (proxy)
155             kwargs = {'handlers':openers}
156             
157             tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs)
158             download_duration = time.time () - download_start
159     
160             opener = downloader(progress_handler, proxy)
161
162             if JobManager().do_quit:
163                 raise KeyboardInterrupt
164
165             process_start = time.time()
166
167             # Expiry time is in hours
168             expiry = float(expiryTime) * 3600.
169     
170             currentTime = 0
171     
172             have_woodchuck = mainthread.execute (wc().available)
173
174             def wc_success():
175                 try:
176                     wc().stream_register (self.key, "", 6 * 60 * 60)
177                 except woodchuck.ObjectExistsError:
178                     pass
179                 try:
180                     wc()[self.key].updated (
181                         indicator=(woodchuck.Indicator.ApplicationVisual
182                                    |woodchuck.Indicator.StreamWide),
183                         transferred_down=progress_handler.stats['received'],
184                         transferred_up=progress_handler.stats['sent'],
185                         transfer_time=download_start,
186                         transfer_duration=download_duration,
187                         new_objects=len (tmp.entries),
188                         objects_inline=len (tmp.entries))
189                 except KeyError:
190                     print "Failed to register update with woodchuck!"
191                     pass
192     
193             http_status = tmp.get ('status', 200)
194     
195             # Check if the parse was succesful.  If the http status code
196             # is 304, then the download was successful, but there is
197             # nothing new.  Indeed, no content is returned.  This make a
198             # 304 look like an error because there are no entries and the
199             # parse fails.  But really, everything went great!  Check for
200             # this first.
201             if http_status == 304:
202                 print "%s: No changes to feed." % (self.key,)
203                 mainthread.execute (wc_success, async=True)
204                 success = True
205             elif len(tmp["entries"])==0 and not tmp.version:
206                 # An error occured fetching or parsing the feed.  (Version
207                 # will be either None if e.g. the connection timed our or
208                 # '' if the data is not a proper feed)
209                 print ("Error fetching %s: version is: %s: error: %s"
210                        % (url, str (tmp.version),
211                           str (tmp.get ('bozo_exception', 'Unknown error'))))
212                 print tmp
213                 if have_woodchuck:
214                     def e():
215                         print "%s: stream update failed!" % self.key
216     
217                         try:
218                             # It's not easy to get the feed's title from here.
219                             # At the latest, the next time the application is
220                             # started, we'll fix up the human readable name.
221                             wc().stream_register (self.key, "", 6 * 60 * 60)
222                         except woodchuck.ObjectExistsError:
223                             pass
224                         ec = woodchuck.TransferStatus.TransientOther
225                         if 300 <= http_status and http_status < 400:
226                             ec = woodchuck.TransferStatus.TransientNetwork
227                         if 400 <= http_status and http_status < 500:
228                             ec = woodchuck.TransferStatus.FailureGone
229                         if 500 <= http_status and http_status < 600:
230                             ec = woodchuck.TransferStatus.TransientNetwork
231                         wc()[self.key].update_failed(ec)
232                     mainthread.execute (e, async=True)
233             else:
234                currentTime = time.time()
235                # The etag and modified value should only be updated if the content was not null
236                try:
237                    etag = tmp["etag"]
238                except KeyError:
239                    etag = None
240                try:
241                    modified = tmp["modified"]
242                except KeyError:
243                    modified = None
244                try:
245                    abs_url = urljoin(tmp["feed"]["link"],"/favicon.ico")
246                    f = opener.open(abs_url)
247                    data = f.read()
248                    f.close()
249                    outf = open(self.dir+"/favicon.ico", "w")
250                    outf.write(data)
251                    outf.close()
252                    del data
253                except (urllib2.HTTPError, urllib2.URLError), exception:
254                    print ("Could not download favicon %s: %s"
255                           % (abs_url, str (exception)))
256     
257                self.serial_execution_lock.acquire ()
258                have_serial_execution_lock = True
259
260                #reversedEntries = self.getEntries()
261                #reversedEntries.reverse()
262     
263                ids = self.getIds()
264     
265                tmp["entries"].reverse()
266                for entry in tmp["entries"]:
267                    # Yield so as to make the main thread a bit more
268                    # responsive.
269                    time.sleep(0)
270     
271                    if JobManager().do_quit:
272                        raise KeyboardInterrupt
273
274                    received_base = progress_handler.stats['received']
275                    sent_base = progress_handler.stats['sent']
276                    object_size = 0
277
278                    date = self.extractDate(entry)
279                    try:
280                        entry["title"]
281                    except KeyError:
282                        entry["title"] = "No Title"
283                    try :
284                        entry["link"]
285                    except KeyError:
286                        entry["link"] = ""
287                    try:
288                        entry["author"]
289                    except KeyError:
290                        entry["author"] = None
291                    if(not(entry.has_key("id"))):
292                        entry["id"] = None
293                    content = self.extractContent(entry)
294                    object_size = len (content)
295                    received_base -= len (content)
296                    tmpEntry = {"title":entry["title"], "content":content,
297                                 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
298                    id = self.generateUniqueId(tmpEntry)
299                    
300                    #articleTime = time.mktime(self.entries[id]["dateTuple"])
301                    soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
302                    images = soup('img')
303                    baseurl = tmpEntry["link"]
304                    #if not id in ids:
305                    if imageCache and len(images) > 0:
306                        self.serial_execution_lock.release ()
307                        have_serial_execution_lock = False
308                        for img in images:
309                             filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
310                             if filename:
311                                 img['src']="file://%s" %filename
312                                 count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
313                                 if count == 0:
314                                     self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
315                                     self.db.commit()
316     
317                                 try:
318                                     object_size += os.path.getsize (filename)
319                                 except os.error, exception:
320                                     print ("Error getting size of %s: %s"
321                                            % (filename, exception))
322                                     pass
323                        self.serial_execution_lock.acquire ()
324                        have_serial_execution_lock = True
325     
326                    tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
327                    file = open(tmpEntry["contentLink"], "w")
328                    file.write(soup.prettify())
329                    file.close()
330                    if id in ids:
331                        self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
332                        self.db.commit()
333                    else:
334                        values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
335                        self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
336                        self.db.commit()
337 #                   else:
338 #                       try:
339 #                           self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
340 #                           self.db.commit()
341 #                           filename = configdir+self.key+".d/"+id+".html"
342 #                           file = open(filename,"a")
343 #                           utime(filename, None)
344 #                           file.close()
345 #                           images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
346 #                           for image in images:
347 #                                file = open(image[0],"a")
348 #                                utime(image[0], None)
349 #                                file.close()
350 #                       except:
351 #                           pass
352     
353                    # Register the object with Woodchuck and mark it as
354                    # downloaded.
355                    if have_woodchuck:
356                        def e():
357                            try:
358                                obj = wc()[self.key].object_register(
359                                    object_identifier=id,
360                                    human_readable_name=tmpEntry["title"])
361                            except woodchuck.ObjectExistsError:
362                                obj = wc()[self.key][id]
363                            else:
364                                # If the entry does not contain a publication
365                                # time, the attribute won't exist.
366                                pubtime = entry.get ('date_parsed', None)
367                                if pubtime:
368                                    obj.publication_time = time.mktime (pubtime)
369         
370                                received = (progress_handler.stats['received']
371                                            - received_base)
372                                sent = progress_handler.stats['sent'] - sent_base
373                                obj.transferred (
374                                    indicator=(woodchuck.Indicator.ApplicationVisual
375                                               |woodchuck.Indicator.StreamWide),
376                                    transferred_down=received,
377                                    transferred_up=sent,
378                                    object_size=object_size)
379                        mainthread.execute(e, async=True)
380                self.db.commit()
381
382                print ("%s: Update successful: transferred: %d/%d; objects: %d)"
383                       % (self.key,
384                          progress_handler.stats['sent'],
385                          progress_handler.stats['received'],
386                          len (tmp.entries)))
387                mainthread.execute (wc_success, async=True)
388                success = True
389
390             rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
391             for row in rows:
392                self.removeEntry(row[0])
393             
394             from glob import glob
395             from os import stat
396             for file in glob(configdir+self.key+".d/*"):
397                 #
398                 stats = stat(file)
399                 #
400                 # put the two dates into matching format
401                 #
402                 lastmodDate = stats[8]
403                 #
404                 expDate = time.time()-expiry*3
405                 # check if image-last-modified-date is outdated
406                 #
407                 if expDate > lastmodDate:
408                     #
409                     try:
410                         #
411                         #print 'Removing', file
412                         #
413                         # XXX: Tell woodchuck.
414                         remove(file) # commented out for testing
415                         #
416                     except OSError, exception:
417                         #
418                         print 'Could not remove %s: %s' % (file, str (exception))
419             print ("updated %s: %fs in download, %fs in processing"
420                    % (self.key, download_duration,
421                       time.time () - process_start))
422         except:
423             print "Updating %s: %s" % (self.key, sys.exc_info()[0])
424             traceback.print_exc()
425         finally:
426             self.db.commit ()
427
428             if have_serial_execution_lock:
429                 self.serial_execution_lock.release ()
430
431             if update_lock is not None:
432                 release_lock (update_lock)
433
434             updateTime = 0
435             try:
436                 rows = self.db.execute("SELECT MAX(date) FROM feed;")
437                 for row in rows:
438                     updateTime=row[0]
439             except:
440                 print "Fetching update time."
441                 traceback.print_exc()
442             finally:
443                 if not success:
444                     etag = None
445                     modified = None
446                 title = None
447                 try:
448                     title = tmp.feed.title
449                 except (AttributeError, UnboundLocalError), exception:
450                     pass
451                 if postFeedUpdateFunc is not None:
452                     postFeedUpdateFunc (self.key, updateTime, etag, modified,
453                                         title, *postFeedUpdateFuncArgs)
454
455     def setEntryRead(self, id):
456         self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
457         self.db.commit()
458
459         def e():
460             if wc().available():
461                 try:
462                     wc()[self.key][id].used()
463                 except KeyError:
464                     pass
465
466     def setEntryUnread(self, id):
467         self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
468         self.db.commit()     
469         
470     def markAllAsRead(self):
471         self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
472         self.db.commit()
473
474     def isEntryRead(self, id):
475         read_status = self.db.execute("SELECT read FROM feed WHERE id=?;", (id,) ).fetchone()[0]
476         return read_status==1  # Returns True if read==1, and False if read==0
477     
478     def getTitle(self, id):
479         return self.db.execute("SELECT title FROM feed WHERE id=?;", (id,) ).fetchone()[0]
480     
481     def getContentLink(self, id):
482         return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
483     
484     def getExternalLink(self, id):
485         return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
486     
487     def getDate(self, id):
488         dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
489         return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
490
491     def getDateTuple(self, id):
492         dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
493         return time.localtime(dateStamp)
494     
495     def getDateStamp(self, id):
496         return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
497     
498     def generateUniqueId(self, entry):
499         """
500         Generate a stable identifier for the article.  For the same
501         entry, this should result in the same identifier.  If
502         possible, the identifier should remain the same even if the
503         article is updated.
504         """
505         # Prefer the entry's id, which is supposed to be globally
506         # unique.
507         key = entry.get('id', None)
508         if not key:
509             # Next, try the link to the content.
510             key = entry.get('link', None)
511         if not key:
512             # Ok, the title and the date concatenated are likely to be
513             # relatively stable.
514             key = entry.get('title', None) + entry.get('date', None)
515         if not key:
516             # Hmm, the article's content will at least guarantee no
517             # false negatives (i.e., missing articles)
518             key = entry.get('content', None)
519         if not key:
520             # If all else fails, just use a random number.
521             key = str (random.random ())
522         return getId (key)
523     
524     def getIds(self, onlyUnread=False):
525         if onlyUnread:
526             rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
527         else:
528             rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
529         ids = []
530         for row in rows:
531             ids.append(row[0])
532         #ids.reverse()
533         return ids
534     
535     def getNextId(self, id):
536         ids = self.getIds()
537         index = ids.index(id)
538         return ids[(index+1)%len(ids)]
539         
540     def getPreviousId(self, id):
541         ids = self.getIds()
542         index = ids.index(id)
543         return ids[(index-1)%len(ids)]
544     
545     def getNumberOfUnreadItems(self):
546         return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
547     
548     def getNumberOfEntries(self):
549         return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
550
551     def getArticle(self, entry):
552         #self.setEntryRead(id)
553         #entry = self.entries[id]
554         title = entry['title']
555         #content = entry.get('content', entry.get('summary_detail', {}))
556         content = entry["content"]
557
558         link = entry['link']
559         author = entry['author']
560         date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
561
562         #text = '''<div style="color: black; background-color: white;">'''
563         text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
564         text += "<html><head><title>" + title + "</title>"
565         text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
566         #text += '<style> body {-webkit-user-select: none;} </style>'
567         text += '</head><body bgcolor=\"#ffffff\"><div><a href=\"' + link + '\">' + title + "</a>"
568         if author != None:
569             text += "<BR /><small><i>Author: " + author + "</i></small>"
570         text += "<BR /><small><i>Date: " + date + "</i></small></div>"
571         text += "<BR /><BR />"
572         text += content
573         text += "</body></html>"
574         return text
575    
576     def getContent(self, id):
577         contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
578         try:
579             file = open(self.entries[id]["contentLink"])
580             content = file.read()
581             file.close()
582         except:
583             content = "Content unavailable"
584         return content
585     
586     def extractDate(self, entry):
587         if entry.has_key("updated_parsed"):
588             return timegm(entry["updated_parsed"])
589         elif entry.has_key("published_parsed"):
590             return timegm(entry["published_parsed"])
591         else:
592             return time.time()
593         
594     def extractContent(self, entry):
595         content = ""
596         if entry.has_key('summary'):
597             content = entry.get('summary', '')
598         if entry.has_key('content'):
599             if len(entry.content[0].value) > len(content):
600                 content = entry.content[0].value
601         if content == "":
602             content = entry.get('description', '')
603         return content
604     
605     def removeEntry(self, id):
606         contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
607         if contentLink:
608             try:
609                 remove(contentLink)
610             except OSError, exception:
611                 print "Deleting %s: %s" % (contentLink, str (exception))
612         self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
613         self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
614         self.db.commit()
615
616         def e():
617             if wc().available():
618                 try:
619                     wc()[self.key][id].files_deleted (
620                         woodchuck.DeletionResponse.Deleted)
621                     del wc()[self.key][id]
622                 except KeyError:
623                     pass
624         mainthread.execute (e, async=True)
625  
626 class ArchivedArticles(Feed):    
627     def addArchivedArticle(self, title, link, date, configdir):
628         id = self.generateUniqueId({"date":date, "title":title})
629         values = (id, title, link, date, 0, link, 0)
630         self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
631         self.db.commit()
632
633     def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
634         currentTime = 0
635         rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
636         for row in rows:
637             currentTime = time.time()
638             id = row[0]
639             link = row[1]
640             f = urllib2.urlopen(link)
641             #entry["content"] = f.read()
642             html = f.read()
643             f.close()
644             soup = BeautifulSoup(html)
645             images = soup('img')
646             baseurl = link
647             for img in images:
648                 filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
649                 img['src']=filename
650                 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
651                 self.db.commit()
652             contentLink = configdir+self.key+".d/"+id+".html"
653             file = open(contentLink, "w")
654             file.write(soup.prettify())
655             file.close()
656             
657             self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
658             self.db.commit()
659         return (currentTime, None, None)
660     
661     def purgeReadArticles(self):
662         rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
663         #ids = self.getIds()
664         for row in rows:
665             self.removeArticle(row[0])
666
667     def removeArticle(self, id):
668         rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
669         for row in rows:
670             try:
671                 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
672                 if count == 0:
673                     os.remove(row[0])
674             except:
675                 pass
676         self.removeEntry(id)
677
678 class Listing:
679     def _getdb(self):
680         try:
681             db = self.tls.db
682         except AttributeError:
683             db = sqlite3.connect("%s/feeds.db" % self.configdir, timeout=120)
684             self.tls.db = db
685         return db
686     db = property(_getdb)
687
688     # Lists all the feeds in a dictionary, and expose the data
689     def __init__(self, config, configdir):
690         self.config = config
691         self.configdir = configdir
692
693         self.tls = threading.local ()
694         
695         try:
696             table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
697             if table == None:
698                 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
699                 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
700                 self.addCategory("Default Category")
701                 if isfile(self.configdir+"feeds.pickle"):
702                     self.importOldFormatFeeds()
703                 else:
704                     self.addFeed("Maemo News", "http://maemo.org/news/items.xml")    
705             else:
706                 from string import find, upper
707                 if find(upper(table[0]), "WIDGET")<0:
708                     self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
709                     self.db.execute("UPDATE feeds SET widget=1;")
710                     self.db.commit()
711                 if find(upper(table[0]), "CATEGORY")<0:
712                     self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
713                     self.addCategory("Default Category")
714                     self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
715                     self.db.execute("UPDATE feeds SET category=1;")
716             self.db.commit()
717         except:
718             pass
719
720         # Check that Woodchuck's state is up to date with respect our
721         # state.
722         wc_init (self)
723         if wc().available():
724             # The list of known streams.
725             streams = wc().streams_list ()
726             stream_ids = [s.identifier for s in streams]
727
728             # Register any unknown streams.  Remove known streams from
729             # STREAMS_IDS.
730             for key in self.getListOfFeeds():
731                 title = self.getFeedTitle(key)
732                 # XXX: We should also check whether the list of
733                 # articles/objects in each feed/stream is up to date.
734                 if key not in stream_ids:
735                     print ("Registering previously unknown channel: %s (%s)"
736                            % (key, title,))
737                     # Use a default refresh interval of 6 hours.
738                     wc().stream_register (key, title, 6 * 60 * 60)
739                 else:
740                     # Make sure the human readable name is up to date.
741                     if wc()[key].human_readable_name != title:
742                         wc()[key].human_readable_name = title
743                     stream_ids.remove (key)
744                     
745
746             # Unregister any streams that are no longer subscribed to.
747             for id in stream_ids:
748                 print ("Unregistering %s" % (id,))
749                 w.stream_unregister (id)
750
751     def importOldFormatFeeds(self):
752         """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
753         import rss
754         listing = rss.Listing(self.configdir)
755         rank = 0
756         for id in listing.getListOfFeeds():
757             try:
758                 rank += 1
759                 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
760                 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
761                 self.db.commit()
762                 
763                 feed = listing.getFeed(id)
764                 new_feed = self.getFeed(id)
765                 
766                 items = feed.getIds()[:]
767                 items.reverse()
768                 for item in items:
769                         if feed.isEntryRead(item):
770                             read_status = 1
771                         else:
772                             read_status = 0 
773                         date = timegm(feed.getDateTuple(item))
774                         title = feed.getTitle(item)
775                         newId = new_feed.generateUniqueId({"date":date, "title":title})
776                         values = (newId, title , feed.getContentLink(item), date, tuple(time.time()), feed.getExternalLink(item), read_status)
777                         new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
778                         new_feed.db.commit()
779                         try:
780                             images = feed.getImages(item)
781                             for image in images:
782                                 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
783                                 new_feed.db.commit()
784                         except:
785                             pass
786                 self.updateUnread(id)
787             except:
788                 traceback.print_exc()
789         remove(self.configdir+"feeds.pickle")
790                 
791         
792     def addArchivedArticle(self, key, index):
793         feed = self.getFeed(key)
794         title = feed.getTitle(index)
795         link = feed.getExternalLink(index)
796         date = feed.getDate(index)
797         count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
798         if count == 0:
799             self.addFeed("Archived Articles", "", id="ArchivedArticles")
800
801         archFeed = self.getFeed("ArchivedArticles")
802         archFeed.addArchivedArticle(title, link, date, self.configdir)
803         self.updateUnread("ArchivedArticles")
804         
805     def updateFeed(self, key, expiryTime=None, proxy=None, imageCache=None,
806                    priority=0):
807         if expiryTime is None:
808             expiryTime = self.config.getExpiry()
809         if not expiryTime:
810             # Default to 24 hours
811             expriyTime = 24
812         if proxy is None:
813             (use_proxy, proxy) = self.config.getProxy()
814             if not use_proxy:
815                 proxy = None
816         if imageCache is None:
817             imageCache = self.config.getImageCache()
818
819         feed = self.getFeed(key)
820         (url, etag, modified) = self.db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
821         try:
822             modified = time.struct_time(eval(modified))
823         except:
824             modified = None
825         feed.updateFeed(
826             self.configdir, url, etag, modified, expiryTime, proxy, imageCache,
827             priority, postFeedUpdateFunc=self._queuePostFeedUpdate)
828
829     def _queuePostFeedUpdate(self, *args, **kwargs):
830         mainthread.execute (self._postFeedUpdate, async=True, *args, **kwargs)
831
832     def _postFeedUpdate(self, key, updateTime, etag, modified, title):
833         if modified==None:
834             modified="None"
835         else:
836             modified=str(tuple(modified))
837         if updateTime > 0:
838             self.db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, modified, key) )
839         else:
840             self.db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, modified, key) )
841
842         if title is not None:
843             self.db.execute("UPDATE feeds SET title=(case WHEN title=='' THEN ? ELSE title END) where id=?;",
844                             (title, key))
845         self.db.commit()
846         self.updateUnread(key)
847         
848     def getFeed(self, key):
849         if key == "ArchivedArticles":
850             return ArchivedArticles(self.configdir, key)
851         return Feed(self.configdir, key)
852         
853     def editFeed(self, key, title, url, category=None):
854         if category:
855             self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
856         else:
857             self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
858         self.db.commit()
859
860         if wc().available():
861             try:
862                 wc()[key].human_readable_name = title
863             except KeyError:
864                 print "Feed %s (%s) unknown." % (key, title)
865                 pass
866         
867     def getFeedUpdateTime(self, key):
868         return time.ctime(self.db.execute("SELECT updateTime FROM feeds WHERE id=?;", (key,)).fetchone()[0])
869         
870     def getFeedNumberOfUnreadItems(self, key):
871         return self.db.execute("SELECT unread FROM feeds WHERE id=?;", (key,)).fetchone()[0]
872         
873     def getFeedTitle(self, key):
874         (title, url) = self.db.execute("SELECT title, url FROM feeds WHERE id=?;", (key,)).fetchone()
875         if title:
876             return title
877         return url
878         
879     def getFeedUrl(self, key):
880         return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
881     
882     def getFeedCategory(self, key):
883         return self.db.execute("SELECT category FROM feeds WHERE id=?;", (key,)).fetchone()[0]
884         
885     def getListOfFeeds(self, category=None):
886         if category:
887             rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
888         else:
889             rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
890         keys = []
891         for row in rows:
892             if row[0]:
893                 keys.append(row[0])
894         return keys
895     
896     def getListOfCategories(self):
897         rows = self.db.execute("SELECT id FROM categories ORDER BY rank;" )
898         keys = []
899         for row in rows:
900             if row[0]:
901                 keys.append(row[0])
902         return keys
903     
904     def getCategoryTitle(self, id):
905         row = self.db.execute("SELECT title FROM categories WHERE id=?;", (id, )).fetchone()
906         return row[0]
907     
908     def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
909         if   order == "Most unread":
910             tmp = "ORDER BY unread DESC"
911             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
912         elif order == "Least unread":
913             tmp = "ORDER BY unread"
914             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
915         elif order == "Most recent":
916             tmp = "ORDER BY updateTime DESC"
917             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
918         elif order == "Least recent":
919             tmp = "ORDER BY updateTime"
920             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
921         else: # order == "Manual" or invalid value...
922             tmp = "ORDER BY rank"
923             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
924         if onlyUnread:
925             sql = "SELECT id FROM feeds WHERE unread>0 AND category=%s " %category + tmp 
926         else:
927             sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
928         rows = self.db.execute(sql)
929         keys = []
930         for row in rows:
931             if row[0]:
932                 keys.append(row[0])
933         return keys
934     
935     def getFavicon(self, key):
936         filename = "%s%s.d/favicon.ico" % (self.configdir, key)
937         if isfile(filename):
938             return filename
939         else:
940             return False
941         
942     def updateUnread(self, key):
943         feed = self.getFeed(key)
944         self.db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
945         self.db.commit()
946
947     def addFeed(self, title, url, id=None, category=1):
948         if not id:
949             id = getId(url)
950         count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
951         if count == 0:
952             max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
953             if max_rank == None:
954                 max_rank = 0
955             values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
956             self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
957             self.db.commit()
958             # Ask for the feed object, it will create the necessary tables
959             self.getFeed(id)
960
961             if wc().available():
962                 # Register the stream with Woodchuck.  Update approximately
963                 # every 6 hours.
964                 wc().stream_register(stream_identifier=id,
965                                      human_readable_name=title,
966                                      freshness=6*60*60)
967
968             return True
969         else:
970             return False
971         
972     def addCategory(self, title):
973         rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
974         if rank==None:
975             rank=1
976         id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
977         if id==None:
978             id=1
979         self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
980         self.db.commit()
981     
982     def removeFeed(self, key):
983         if wc().available ():
984             try:
985                 del wc()[key]
986             except KeyError:
987                 print "Removing unregistered feed %s failed" % (key,)
988
989         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
990         self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
991         self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
992         self.db.commit()
993
994         if isdir(self.configdir+key+".d/"):
995            rmtree(self.configdir+key+".d/")
996            
997     def removeCategory(self, key):
998         if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
999             rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
1000             self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
1001             self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
1002             self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
1003             self.db.commit()
1004         
1005     #def saveConfig(self):
1006     #    self.listOfFeeds["feedingit-order"] = self.sortedKeys
1007     #    file = open(self.configdir+"feeds.pickle", "w")
1008     #    pickle.dump(self.listOfFeeds, file)
1009     #    file.close()
1010         
1011     def moveUp(self, key):
1012         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1013         if rank>0:
1014             self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
1015             self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
1016             self.db.commit()
1017             
1018     def moveCategoryUp(self, key):
1019         rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1020         if rank>0:
1021             self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
1022             self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
1023             self.db.commit()
1024         
1025     def moveDown(self, key):
1026         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1027         max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1028         if rank<max_rank:
1029             self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
1030             self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
1031             self.db.commit()
1032             
1033     def moveCategoryDown(self, key):
1034         rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1035         max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
1036         if rank<max_rank:
1037             self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
1038             self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )
1039             self.db.commit()
1040             
1041