When creating a closure, correctly capture any required local state.
[feedingit] / src / rss_sqlite.py
1 #!/usr/bin/env python2.5
2
3
4 # Copyright (c) 2007-2008 INdT.
5 # Copyright (c) 2011 Neal H. Walfield
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Lesser General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
10 #
11 #  This program is distributed in the hope that it will be useful,
12 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
13 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 #  GNU Lesser General Public License for more details.
15 #
16 #  You should have received a copy of the GNU Lesser General Public License
17 #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 #
19
20 # ============================================================================
21 # Name        : FeedingIt.py
22 # Author      : Yves Marcoz
23 # Version     : 0.5.4
24 # Description : Simple RSS Reader
25 # ============================================================================
26
27 from __future__ import with_statement
28
29 import sqlite3
30 from os.path import isfile, isdir
31 from shutil import rmtree
32 from os import mkdir, remove, utime
33 import os
34 import md5
35 import feedparser
36 import time
37 import urllib2
38 from BeautifulSoup import BeautifulSoup
39 from urlparse import urljoin
40 from calendar import timegm
41 import threading
42 import traceback
43 from wc import wc, wc_init, woodchuck
44 import subprocess
45 import dbus
46 from updatedbus import update_server_object
47
48 from jobmanager import JobManager
49 import mainthread
50 from httpprogresshandler import HTTPProgressHandler
51 import random
52 import sys
53 import logging
54 logger = logging.getLogger(__name__)
55
56 def getId(string):
57     return md5.new(string).hexdigest()
58
59 def download_callback(connection):
60     if JobManager().do_quit:
61         raise KeyboardInterrupt
62
63 def downloader(progress_handler=None, proxy=None):
64     openers = []
65
66     if progress_handler is not None:
67         openers.append(progress_handler)
68     else:
69         openers.append(HTTPProgressHandler(download_callback))
70
71     if proxy:
72         openers.append(proxy)
73
74     return urllib2.build_opener(*openers)
75
76 # If not None, a subprocess.Popen object corresponding to a
77 # update_feeds.py process.
78 update_feed_process = None
79
80 update_feeds_iface = None
81
82 jobs_at_start = 0
83
84 class Feed:
85     serial_execution_lock = threading.Lock()
86
87     def _getdb(self):
88         try:
89             db = self.tls.db
90         except AttributeError:
91             db = sqlite3.connect("%s/%s.db" % (self.dir, self.key), timeout=120)
92             self.tls.db = db
93         return db
94     db = property(_getdb)
95
96     def __init__(self, configdir, key):
97         self.key = key
98         self.configdir = configdir
99         self.dir = "%s/%s.d" %(self.configdir, self.key)
100         self.tls = threading.local ()
101
102         if not isdir(self.dir):
103             mkdir(self.dir)
104         if not isfile("%s/%s.db" %(self.dir, self.key)):
105             self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
106             self.db.execute("CREATE TABLE images (id text, imagePath text);")
107             self.db.commit()
108
109     def addImage(self, configdir, key, baseurl, url, proxy=None, opener=None):
110         filename = configdir+key+".d/"+getId(url)
111         if not isfile(filename):
112             try:
113                 if not opener:
114                     opener = downloader(proxy=proxy)
115
116                 abs_url = urljoin(baseurl,url)
117                 f = opener.open(abs_url)
118                 try:
119                     with open(filename, "w") as outf:
120                         for data in f:
121                             outf.write(data)
122                 finally:
123                     f.close()
124             except (urllib2.HTTPError, urllib2.URLError, IOError), exception:
125                 logger.info("Could not download image %s: %s"
126                             % (abs_url, str (exception)))
127                 return None
128             except:
129                 exception = sys.exc_info()[0]
130
131                 logger.info("Downloading image %s: %s" %
132                             (abs_url, traceback.format_exc()))
133                 try:
134                     remove(filename)
135                 except OSError:
136                     pass
137
138                 raise exception
139         else:
140             #open(filename,"a").close()  # "Touch" the file
141             file = open(filename,"a")
142             utime(filename, None)
143             file.close()
144         return filename
145
146     def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
147         if (os.path.basename(sys.argv[0]) == 'update_feeds.py'):
148             def doit():
149                 def it():
150                     self._updateFeed(configdir, url, etag, modified, expiryTime, proxy, imageCache, postFeedUpdateFunc, *postFeedUpdateFuncArgs)
151                 return it
152             JobManager().execute(doit(), self.key, priority=priority)
153         else:
154             def send_update_request():
155                 global update_feeds_iface
156                 if update_feeds_iface is None:
157                     bus=dbus.SessionBus()
158                     remote_object = bus.get_object(
159                         "org.marcoz.feedingit", # Connection name
160                         "/org/marcoz/feedingit/update" # Object's path
161                         )
162                     update_feeds_iface = dbus.Interface(
163                         remote_object, 'org.marcoz.feedingit')
164
165                 try:
166                     update_feeds_iface.Update(self.key)
167                 except Exception, e:
168                     logger.error("Invoking org.marcoz.feedingit.Update: %s"
169                                  % str(e))
170                     update_feeds_iface = None
171                 else:
172                     return True
173
174             if send_update_request():
175                 # Success!  It seems we were able to start the update
176                 # daemon via dbus (or, it was already running).
177                 return
178
179             global update_feed_process
180             if (update_feed_process is None
181                 or update_feed_process.poll() is not None):
182                 # The update_feeds process is not running.  Start it.
183                 update_feeds = os.path.join(os.path.dirname(__file__),
184                                             'update_feeds.py')
185                 argv = ['/usr/bin/env', 'python', update_feeds, '--daemon' ]
186                 logger.debug("Starting update_feeds: running %s"
187                              % (str(argv),))
188                 update_feed_process = subprocess.Popen(argv)
189                 # Make sure the dbus calls go to the right process:
190                 # rebind.
191                 update_feeds_iface = None
192
193             for _ in xrange(5):
194                 if send_update_request():
195                     break
196                 time.sleep(1)
197
198     def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
199         success = False
200         have_serial_execution_lock = False
201         try:
202             download_start = time.time ()
203
204             progress_handler = HTTPProgressHandler(download_callback)
205
206             openers = [progress_handler]
207             if proxy:
208                 openers.append (proxy)
209             kwargs = {'handlers':openers}
210             
211             tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs)
212             download_duration = time.time () - download_start
213     
214             opener = downloader(progress_handler, proxy)
215
216             if JobManager().do_quit:
217                 raise KeyboardInterrupt
218
219             process_start = time.time()
220
221             # Expiry time is in hours
222             expiry = float(expiryTime) * 3600.
223     
224             currentTime = 0
225     
226             def wc_success():
227                 try:
228                     wc().stream_register (self.key, "", 6 * 60 * 60)
229                 except woodchuck.ObjectExistsError:
230                     pass
231                 try:
232                     wc()[self.key].updated (
233                         indicator=(woodchuck.Indicator.ApplicationVisual
234                                    |woodchuck.Indicator.StreamWide),
235                         transferred_down=progress_handler.stats['received'],
236                         transferred_up=progress_handler.stats['sent'],
237                         transfer_time=download_start,
238                         transfer_duration=download_duration,
239                         new_objects=len (tmp.entries),
240                         objects_inline=len (tmp.entries))
241                 except KeyError:
242                     logger.warn(
243                         "Failed to register update of %s with woodchuck!"
244                         % (self.key))
245     
246             http_status = tmp.get ('status', 200)
247     
248             # Check if the parse was succesful.  If the http status code
249             # is 304, then the download was successful, but there is
250             # nothing new.  Indeed, no content is returned.  This make a
251             # 304 look like an error because there are no entries and the
252             # parse fails.  But really, everything went great!  Check for
253             # this first.
254             if http_status == 304:
255                 logger.debug("%s: No changes to feed." % (self.key,))
256                 mainthread.execute(wc_success, async=True)
257                 success = True
258             elif len(tmp["entries"])==0 and not tmp.version:
259                 # An error occured fetching or parsing the feed.  (Version
260                 # will be either None if e.g. the connection timed our or
261                 # '' if the data is not a proper feed)
262                 logger.error(
263                     "Error fetching %s: version is: %s: error: %s"
264                     % (url, str (tmp.version),
265                        str (tmp.get ('bozo_exception', 'Unknown error'))))
266                 logger.debug(tmp)
267                 def register_stream_update_failed(http_status):
268                     def doit():
269                         logger.debug("%s: stream update failed!" % self.key)
270     
271                         try:
272                             # It's not easy to get the feed's title from here.
273                             # At the latest, the next time the application is
274                             # started, we'll fix up the human readable name.
275                             wc().stream_register (self.key, "", 6 * 60 * 60)
276                         except woodchuck.ObjectExistsError:
277                             pass
278                         ec = woodchuck.TransferStatus.TransientOther
279                         if 300 <= http_status and http_status < 400:
280                             ec = woodchuck.TransferStatus.TransientNetwork
281                         if 400 <= http_status and http_status < 500:
282                             ec = woodchuck.TransferStatus.FailureGone
283                         if 500 <= http_status and http_status < 600:
284                             ec = woodchuck.TransferStatus.TransientNetwork
285                         wc()[self.key].update_failed(ec)
286                     return doit
287                 if wc().available:
288                     mainthread.execute(
289                         register_stream_update_failed(
290                             http_status=http_status),
291                         async=True)
292             else:
293                currentTime = time.time()
294                # The etag and modified value should only be updated if the content was not null
295                try:
296                    etag = tmp["etag"]
297                except KeyError:
298                    etag = None
299                try:
300                    modified = tmp["modified"]
301                except KeyError:
302                    modified = None
303                try:
304                    abs_url = urljoin(tmp["feed"]["link"],"/favicon.ico")
305                    f = opener.open(abs_url)
306                    data = f.read()
307                    f.close()
308                    outf = open(self.dir+"/favicon.ico", "w")
309                    outf.write(data)
310                    outf.close()
311                    del data
312                except (urllib2.HTTPError, urllib2.URLError), exception:
313                    logger.debug("Could not download favicon %s: %s"
314                                 % (abs_url, str (exception)))
315     
316                self.serial_execution_lock.acquire ()
317                have_serial_execution_lock = True
318
319                #reversedEntries = self.getEntries()
320                #reversedEntries.reverse()
321     
322                ids = self.getIds()
323     
324                tmp["entries"].reverse()
325                for entry in tmp["entries"]:
326                    # Yield so as to make the main thread a bit more
327                    # responsive.
328                    time.sleep(0)
329     
330                    if JobManager().do_quit:
331                        raise KeyboardInterrupt
332
333                    received_base = progress_handler.stats['received']
334                    sent_base = progress_handler.stats['sent']
335                    object_size = 0
336
337                    date = self.extractDate(entry)
338                    try:
339                        entry["title"]
340                    except KeyError:
341                        entry["title"] = "No Title"
342                    try :
343                        entry["link"]
344                    except KeyError:
345                        entry["link"] = ""
346                    try:
347                        entry["author"]
348                    except KeyError:
349                        entry["author"] = None
350                    if(not(entry.has_key("id"))):
351                        entry["id"] = None
352                    content = self.extractContent(entry)
353                    object_size = len (content)
354                    received_base -= len (content)
355                    tmpEntry = {"title":entry["title"], "content":content,
356                                 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
357                    id = self.generateUniqueId(tmpEntry)
358                    
359                    #articleTime = time.mktime(self.entries[id]["dateTuple"])
360                    soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
361                    images = soup('img')
362                    baseurl = tmpEntry["link"]
363                    #if not id in ids:
364                    if imageCache and len(images) > 0:
365                        self.serial_execution_lock.release ()
366                        have_serial_execution_lock = False
367                        for img in images:
368                            filename = self.addImage(
369                                configdir, self.key, baseurl, img['src'],
370                                opener=opener)
371                            if filename:
372                                 img['src']="file://%s" %filename
373                                 count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
374                                 if count == 0:
375                                     self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
376                                     self.db.commit()
377     
378                                 try:
379                                     object_size += os.path.getsize (filename)
380                                 except os.error, exception:
381                                     logger.error ("Error getting size of %s: %s"
382                                                   % (filename, exception))
383                        self.serial_execution_lock.acquire ()
384                        have_serial_execution_lock = True
385     
386                    tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
387                    file = open(tmpEntry["contentLink"], "w")
388                    file.write(soup.prettify())
389                    file.close()
390                    if id in ids:
391                        self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
392                        self.db.commit()
393                    else:
394                        values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
395                        self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
396                        self.db.commit()
397 #                   else:
398 #                       try:
399 #                           self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
400 #                           self.db.commit()
401 #                           filename = configdir+self.key+".d/"+id+".html"
402 #                           file = open(filename,"a")
403 #                           utime(filename, None)
404 #                           file.close()
405 #                           images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
406 #                           for image in images:
407 #                                file = open(image[0],"a")
408 #                                utime(image[0], None)
409 #                                file.close()
410 #                       except:
411 #                           pass
412     
413                    # Register the object with Woodchuck and mark it as
414                    # downloaded.
415                    def register_object_transferred(
416                            id, title, publication_time,
417                            sent, received, object_size):
418                        def doit():
419                            logger.debug("Registering transfer of object %s"
420                                         % title)
421                            try:
422                                obj = wc()[self.key].object_register(
423                                    object_identifier=id,
424                                    human_readable_name=title)
425                            except woodchuck.ObjectExistsError:
426                                obj = wc()[self.key][id]
427                            else:
428                                obj.publication_time = publication_time
429                                obj.transferred(
430                                    indicator=(
431                                        woodchuck.Indicator.ApplicationVisual
432                                        |woodchuck.Indicator.StreamWide),
433                                    transferred_down=received,
434                                    transferred_up=sent,
435                                    object_size=object_size)
436                        return doit
437                    if wc().available:
438                        # If the entry does not contain a publication
439                        # time, the attribute won't exist.
440                        pubtime = entry.get('date_parsed', None)
441                        if pubtime:
442                            publication_time = time.mktime (pubtime)
443                        else:
444                            publication_time = None
445
446                        sent = progress_handler.stats['sent'] - sent_base
447                        received = (progress_handler.stats['received']
448                                    - received_base)
449
450                        mainthread.execute(
451                            register_object_transferred(
452                                id=id,
453                                title=tmpEntry["title"],
454                                publication_time=publication_time,
455                                sent=sent, received=received,
456                                object_size=object_size),
457                            async=True)
458                self.db.commit()
459
460                logger.debug (
461                    "%s: Update successful: transferred: %d/%d; objects: %d)"
462                    % (self.key,
463                       progress_handler.stats['sent'],
464                       progress_handler.stats['received'],
465                       len (tmp.entries)))
466                mainthread.execute (wc_success, async=True)
467                success = True
468
469             rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
470             for row in rows:
471                self.removeEntry(row[0])
472             
473             from glob import glob
474             from os import stat
475             for file in glob(configdir+self.key+".d/*"):
476                 #
477                 stats = stat(file)
478                 #
479                 # put the two dates into matching format
480                 #
481                 lastmodDate = stats[8]
482                 #
483                 expDate = time.time()-expiry*3
484                 # check if image-last-modified-date is outdated
485                 #
486                 if expDate > lastmodDate:
487                     #
488                     try:
489                         #
490                         #print 'Removing', file
491                         #
492                         # XXX: Tell woodchuck.
493                         remove(file) # commented out for testing
494                         #
495                     except OSError, exception:
496                         #
497                         logger.error('Could not remove %s: %s'
498                                      % (file, str (exception)))
499             logger.debug("updated %s: %fs in download, %fs in processing"
500                          % (self.key, download_duration,
501                             time.time () - process_start))
502         except:
503             logger.error("Updating %s: %s" % (self.key, traceback.format_exc()))
504         finally:
505             self.db.commit ()
506
507             if have_serial_execution_lock:
508                 self.serial_execution_lock.release ()
509
510             updateTime = 0
511             try:
512                 rows = self.db.execute("SELECT MAX(date) FROM feed;")
513                 for row in rows:
514                     updateTime=row[0]
515             except Exception, e:
516                 logger.error("Fetching update time: %s: %s"
517                              % (str(e), traceback.format_exc()))
518             finally:
519                 if not success:
520                     etag = None
521                     modified = None
522                 title = None
523                 try:
524                     title = tmp.feed.title
525                 except (AttributeError, UnboundLocalError), exception:
526                     pass
527                 if postFeedUpdateFunc is not None:
528                     postFeedUpdateFunc (self.key, updateTime, etag, modified,
529                                         title, *postFeedUpdateFuncArgs)
530
531     def setEntryRead(self, id):
532         self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
533         self.db.commit()
534
535         def doit():
536             try:
537                 wc()[self.key][id].used()
538             except KeyError:
539                 pass
540         if wc().available():
541             mainthread.execute(doit, async=True)
542
543     def setEntryUnread(self, id):
544         self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
545         self.db.commit()     
546         
547     def markAllAsRead(self):
548         self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
549         self.db.commit()
550
551     def isEntryRead(self, id):
552         read_status = self.db.execute("SELECT read FROM feed WHERE id=?;", (id,) ).fetchone()[0]
553         return read_status==1  # Returns True if read==1, and False if read==0
554     
555     def getTitle(self, id):
556         return self.db.execute("SELECT title FROM feed WHERE id=?;", (id,) ).fetchone()[0]
557     
558     def getContentLink(self, id):
559         return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
560     
561     def getExternalLink(self, id):
562         return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
563     
564     def getDate(self, id):
565         dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
566         return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
567
568     def getDateTuple(self, id):
569         dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
570         return time.localtime(dateStamp)
571     
572     def getDateStamp(self, id):
573         return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
574     
575     def generateUniqueId(self, entry):
576         """
577         Generate a stable identifier for the article.  For the same
578         entry, this should result in the same identifier.  If
579         possible, the identifier should remain the same even if the
580         article is updated.
581         """
582         # Prefer the entry's id, which is supposed to be globally
583         # unique.
584         key = entry.get('id', None)
585         if not key:
586             # Next, try the link to the content.
587             key = entry.get('link', None)
588         if not key:
589             # Ok, the title and the date concatenated are likely to be
590             # relatively stable.
591             key = entry.get('title', None) + entry.get('date', None)
592         if not key:
593             # Hmm, the article's content will at least guarantee no
594             # false negatives (i.e., missing articles)
595             key = entry.get('content', None)
596         if not key:
597             # If all else fails, just use a random number.
598             key = str (random.random ())
599         return getId (key)
600     
601     def getIds(self, onlyUnread=False):
602         if onlyUnread:
603             rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
604         else:
605             rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
606         ids = []
607         for row in rows:
608             ids.append(row[0])
609         #ids.reverse()
610         return ids
611     
612     def getNextId(self, id, forward=True):
613         if forward:
614             delta = 1
615         else:
616             delta = -1
617         ids = self.getIds()
618         index = ids.index(id)
619         return ids[(index + delta) % len(ids)]
620         
621     def getPreviousId(self, id):
622         return self.getNextId(id, forward=False)
623     
624     def getNumberOfUnreadItems(self):
625         return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
626     
627     def getNumberOfEntries(self):
628         return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
629
630     def getArticle(self, entry):
631         #self.setEntryRead(id)
632         #entry = self.entries[id]
633         title = entry['title']
634         #content = entry.get('content', entry.get('summary_detail', {}))
635         content = entry["content"]
636
637         link = entry['link']
638         author = entry['author']
639         date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
640
641         #text = '''<div style="color: black; background-color: white;">'''
642         text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
643         text += "<html><head><title>" + title + "</title>"
644         text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
645         #text += '<style> body {-webkit-user-select: none;} </style>'
646         text += '</head><body bgcolor=\"#ffffff\"><div><a href=\"' + link + '\">' + title + "</a>"
647         if author != None:
648             text += "<BR /><small><i>Author: " + author + "</i></small>"
649         text += "<BR /><small><i>Date: " + date + "</i></small></div>"
650         text += "<BR /><BR />"
651         text += content
652         text += "</body></html>"
653         return text
654    
655     def getContent(self, id):
656         contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
657         try:
658             file = open(self.entries[id]["contentLink"])
659             content = file.read()
660             file.close()
661         except:
662             content = "Content unavailable"
663         return content
664     
665     def extractDate(self, entry):
666         if entry.has_key("updated_parsed"):
667             return timegm(entry["updated_parsed"])
668         elif entry.has_key("published_parsed"):
669             return timegm(entry["published_parsed"])
670         else:
671             return time.time()
672         
673     def extractContent(self, entry):
674         content = ""
675         if entry.has_key('summary'):
676             content = entry.get('summary', '')
677         if entry.has_key('content'):
678             if len(entry.content[0].value) > len(content):
679                 content = entry.content[0].value
680         if content == "":
681             content = entry.get('description', '')
682         return content
683     
684     def removeEntry(self, id):
685         contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
686         if contentLink:
687             try:
688                 remove(contentLink)
689             except OSError, exception:
690                 logger.error("Deleting %s: %s" % (contentLink, str (exception)))
691         self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
692         self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
693         self.db.commit()
694
695         def doit():
696             try:
697                 wc()[self.key][id].files_deleted (
698                     woodchuck.DeletionResponse.Deleted)
699                 del wc()[self.key][id]
700             except KeyError:
701                 pass
702         if wc().available():
703             mainthread.execute (doit, async=True)
704  
705 class ArchivedArticles(Feed):    
706     def addArchivedArticle(self, title, link, date, configdir):
707         id = self.generateUniqueId({"date":date, "title":title})
708         values = (id, title, link, date, 0, link, 0)
709         self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
710         self.db.commit()
711
712     def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
713         currentTime = 0
714         rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
715         for row in rows:
716             currentTime = time.time()
717             id = row[0]
718             link = row[1]
719             f = urllib2.urlopen(link)
720             #entry["content"] = f.read()
721             html = f.read()
722             f.close()
723             soup = BeautifulSoup(html)
724             images = soup('img')
725             baseurl = link
726             for img in images:
727                 filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
728                 img['src']=filename
729                 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
730                 self.db.commit()
731             contentLink = configdir+self.key+".d/"+id+".html"
732             file = open(contentLink, "w")
733             file.write(soup.prettify())
734             file.close()
735             
736             self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
737             self.db.commit()
738         return (currentTime, None, None)
739     
740     def purgeReadArticles(self):
741         rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
742         #ids = self.getIds()
743         for row in rows:
744             self.removeArticle(row[0])
745
746     def removeArticle(self, id):
747         rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
748         for row in rows:
749             try:
750                 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
751                 if count == 0:
752                     os.remove(row[0])
753             except:
754                 pass
755         self.removeEntry(id)
756
757 class Listing:
758     def _getdb(self):
759         try:
760             db = self.tls.db
761         except AttributeError:
762             db = sqlite3.connect("%s/feeds.db" % self.configdir, timeout=120)
763             self.tls.db = db
764         return db
765     db = property(_getdb)
766
767     # Lists all the feeds in a dictionary, and expose the data
768     def __init__(self, config, configdir):
769         self.config = config
770         self.configdir = configdir
771
772         self.tls = threading.local ()
773         
774         try:
775             table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
776             if table == None:
777                 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
778                 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
779                 self.addCategory("Default Category")
780                 if isfile(self.configdir+"feeds.pickle"):
781                     self.importOldFormatFeeds()
782                 else:
783                     self.addFeed("Maemo News", "http://maemo.org/news/items.xml")    
784             else:
785                 from string import find, upper
786                 if find(upper(table[0]), "WIDGET")<0:
787                     self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
788                     self.db.execute("UPDATE feeds SET widget=1;")
789                     self.db.commit()
790                 if find(upper(table[0]), "CATEGORY")<0:
791                     self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
792                     self.addCategory("Default Category")
793                     self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
794                     self.db.execute("UPDATE feeds SET category=1;")
795             self.db.commit()
796         except:
797             pass
798
799         # Check that Woodchuck's state is up to date with respect our
800         # state.
801         updater = os.path.basename(sys.argv[0]) == 'update_feeds.py'
802         wc_init (self, True if updater else False)
803         if wc().available() and updater:
804             # The list of known streams.
805             streams = wc().streams_list ()
806             stream_ids = [s.identifier for s in streams]
807
808             # Register any unknown streams.  Remove known streams from
809             # STREAMS_IDS.
810             for key in self.getListOfFeeds():
811                 title = self.getFeedTitle(key)
812                 # XXX: We should also check whether the list of
813                 # articles/objects in each feed/stream is up to date.
814                 if key not in stream_ids:
815                     logger.debug(
816                         "Registering previously unknown channel: %s (%s)"
817                         % (key, title,))
818                     # Use a default refresh interval of 6 hours.
819                     wc().stream_register (key, title, 6 * 60 * 60)
820                 else:
821                     # Make sure the human readable name is up to date.
822                     if wc()[key].human_readable_name != title:
823                         wc()[key].human_readable_name = title
824                     stream_ids.remove (key)
825                     
826
827             # Unregister any streams that are no longer subscribed to.
828             for id in stream_ids:
829                 logger.debug("Unregistering %s" % (id,))
830                 w.stream_unregister (id)
831
832     def importOldFormatFeeds(self):
833         """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
834         import rss
835         listing = rss.Listing(self.configdir)
836         rank = 0
837         for id in listing.getListOfFeeds():
838             try:
839                 rank += 1
840                 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
841                 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
842                 self.db.commit()
843                 
844                 feed = listing.getFeed(id)
845                 new_feed = self.getFeed(id)
846                 
847                 items = feed.getIds()[:]
848                 items.reverse()
849                 for item in items:
850                         if feed.isEntryRead(item):
851                             read_status = 1
852                         else:
853                             read_status = 0 
854                         date = timegm(feed.getDateTuple(item))
855                         title = feed.getTitle(item)
856                         newId = new_feed.generateUniqueId({"date":date, "title":title})
857                         values = (newId, title , feed.getContentLink(item), date, tuple(time.time()), feed.getExternalLink(item), read_status)
858                         new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
859                         new_feed.db.commit()
860                         try:
861                             images = feed.getImages(item)
862                             for image in images:
863                                 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
864                                 new_feed.db.commit()
865                         except:
866                             pass
867                 self.updateUnread(id)
868             except:
869                 logger.error("importOldFormatFeeds: %s"
870                              % (traceback.format_exc(),))
871         remove(self.configdir+"feeds.pickle")
872                 
873         
874     def addArchivedArticle(self, key, index):
875         feed = self.getFeed(key)
876         title = feed.getTitle(index)
877         link = feed.getExternalLink(index)
878         date = feed.getDate(index)
879         count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
880         if count == 0:
881             self.addFeed("Archived Articles", "", id="ArchivedArticles")
882
883         archFeed = self.getFeed("ArchivedArticles")
884         archFeed.addArchivedArticle(title, link, date, self.configdir)
885         self.updateUnread("ArchivedArticles")
886         
887     def updateFeed(self, key, expiryTime=None, proxy=None, imageCache=None,
888                    priority=0):
889         if expiryTime is None:
890             expiryTime = self.config.getExpiry()
891         if not expiryTime:
892             # Default to 24 hours
893             expriyTime = 24
894         if proxy is None:
895             (use_proxy, proxy) = self.config.getProxy()
896             if not use_proxy:
897                 proxy = None
898         if imageCache is None:
899             imageCache = self.config.getImageCache()
900
901         feed = self.getFeed(key)
902         (url, etag, modified) = self.db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
903         try:
904             modified = time.struct_time(eval(modified))
905         except:
906             modified = None
907         feed.updateFeed(
908             self.configdir, url, etag, modified, expiryTime, proxy, imageCache,
909             priority, postFeedUpdateFunc=self._queuePostFeedUpdate)
910
911     def _queuePostFeedUpdate(self, *args, **kwargs):
912         mainthread.execute (self._postFeedUpdate, async=True, *args, **kwargs)
913
914     def _postFeedUpdate(self, key, updateTime, etag, modified, title):
915         if modified==None:
916             modified="None"
917         else:
918             modified=str(tuple(modified))
919         if updateTime > 0:
920             self.db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, modified, key) )
921         else:
922             self.db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, modified, key) )
923
924         if title is not None:
925             self.db.execute("UPDATE feeds SET title=(case WHEN title=='' THEN ? ELSE title END) where id=?;",
926                             (title, key))
927         self.db.commit()
928         self.updateUnread(key)
929
930         update_server_object().ArticleCountUpdated()
931
932         stats = JobManager().stats()
933         global jobs_at_start
934         completed = stats['jobs-completed'] - jobs_at_start
935         in_progress = stats['jobs-in-progress']
936         queued = stats['jobs-queued']
937
938         percent = (100 * ((completed + in_progress / 2.))
939                    / (completed + in_progress + queued))
940
941         update_server_object().UpdateProgress(
942             percent, completed, in_progress, queued, 0, 0, 0, key)
943
944         if in_progress == 0 and queued == 0:
945             jobs_at_start = stats['jobs-completed']
946         
947     def getFeed(self, key):
948         if key == "ArchivedArticles":
949             return ArchivedArticles(self.configdir, key)
950         return Feed(self.configdir, key)
951         
952     def editFeed(self, key, title, url, category=None):
953         if category:
954             self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
955         else:
956             self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
957         self.db.commit()
958
959         if wc().available():
960             try:
961                 wc()[key].human_readable_name = title
962             except KeyError:
963                 logger.debug("Feed %s (%s) unknown." % (key, title))
964         
965     def getFeedUpdateTime(self, key):
966         return time.ctime(self.db.execute("SELECT updateTime FROM feeds WHERE id=?;", (key,)).fetchone()[0])
967         
968     def getFeedNumberOfUnreadItems(self, key):
969         return self.db.execute("SELECT unread FROM feeds WHERE id=?;", (key,)).fetchone()[0]
970         
971     def getFeedTitle(self, key):
972         (title, url) = self.db.execute("SELECT title, url FROM feeds WHERE id=?;", (key,)).fetchone()
973         if title:
974             return title
975         return url
976         
977     def getFeedUrl(self, key):
978         return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
979     
980     def getFeedCategory(self, key):
981         return self.db.execute("SELECT category FROM feeds WHERE id=?;", (key,)).fetchone()[0]
982         
983     def getListOfFeeds(self, category=None):
984         if category:
985             rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
986         else:
987             rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
988         keys = []
989         for row in rows:
990             if row[0]:
991                 keys.append(row[0])
992         return keys
993     
994     def getListOfCategories(self):
995         rows = self.db.execute("SELECT id FROM categories ORDER BY rank;" )
996         keys = []
997         for row in rows:
998             if row[0]:
999                 keys.append(row[0])
1000         return keys
1001     
1002     def getCategoryTitle(self, id):
1003         row = self.db.execute("SELECT title FROM categories WHERE id=?;", (id, )).fetchone()
1004         return row[0]
1005     
1006     def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
1007         if   order == "Most unread":
1008             tmp = "ORDER BY unread DESC"
1009             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
1010         elif order == "Least unread":
1011             tmp = "ORDER BY unread"
1012             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
1013         elif order == "Most recent":
1014             tmp = "ORDER BY updateTime DESC"
1015             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
1016         elif order == "Least recent":
1017             tmp = "ORDER BY updateTime"
1018             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
1019         else: # order == "Manual" or invalid value...
1020             tmp = "ORDER BY rank"
1021             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
1022         if onlyUnread:
1023             sql = "SELECT id FROM feeds WHERE unread>0 AND category=%s " %category + tmp 
1024         else:
1025             sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
1026         rows = self.db.execute(sql)
1027         keys = []
1028         for row in rows:
1029             if row[0]:
1030                 keys.append(row[0])
1031         return keys
1032     
1033     def getFavicon(self, key):
1034         filename = "%s%s.d/favicon.ico" % (self.configdir, key)
1035         if isfile(filename):
1036             return filename
1037         else:
1038             return False
1039         
1040     def updateUnread(self, key):
1041         feed = self.getFeed(key)
1042         self.db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
1043         self.db.commit()
1044
1045     def addFeed(self, title, url, id=None, category=1):
1046         if not id:
1047             id = getId(url)
1048         count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
1049         if count == 0:
1050             max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1051             if max_rank == None:
1052                 max_rank = 0
1053             values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
1054             self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
1055             self.db.commit()
1056             # Ask for the feed object, it will create the necessary tables
1057             self.getFeed(id)
1058
1059             if wc().available():
1060                 # Register the stream with Woodchuck.  Update approximately
1061                 # every 6 hours.
1062                 wc().stream_register(stream_identifier=id,
1063                                      human_readable_name=title,
1064                                      freshness=6*60*60)
1065
1066             return True
1067         else:
1068             return False
1069         
1070     def addCategory(self, title):
1071         rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
1072         if rank==None:
1073             rank=1
1074         id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
1075         if id==None:
1076             id=1
1077         self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
1078         self.db.commit()
1079     
1080     def removeFeed(self, key):
1081         if wc().available ():
1082             try:
1083                 del wc()[key]
1084             except KeyError:
1085                 logger.debug("Removing unregistered feed %s failed" % (key,))
1086
1087         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
1088         self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
1089         self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
1090         self.db.commit()
1091
1092         if isdir(self.configdir+key+".d/"):
1093            rmtree(self.configdir+key+".d/")
1094            
1095     def removeCategory(self, key):
1096         if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
1097             rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
1098             self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
1099             self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
1100             self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
1101             self.db.commit()
1102         
1103     #def saveConfig(self):
1104     #    self.listOfFeeds["feedingit-order"] = self.sortedKeys
1105     #    file = open(self.configdir+"feeds.pickle", "w")
1106     #    pickle.dump(self.listOfFeeds, file)
1107     #    file.close()
1108         
1109     def moveUp(self, key):
1110         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1111         if rank>0:
1112             self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
1113             self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
1114             self.db.commit()
1115             
1116     def moveCategoryUp(self, key):
1117         rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1118         if rank>0:
1119             self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
1120             self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
1121             self.db.commit()
1122         
1123     def moveDown(self, key):
1124         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1125         max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1126         if rank<max_rank:
1127             self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
1128             self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
1129             self.db.commit()
1130             
1131     def moveCategoryDown(self, key):
1132         rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1133         max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
1134         if rank<max_rank:
1135             self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
1136             self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )
1137             self.db.commit()
1138             
1139