Correctly set update time to never.

[feedingit] / src / rss_sqlite.py
diff --git a/src/rss_sqlite.py b/src/rss_sqlite.py

index 64aef0a..75c0f34 100644 (file)
--- a/src/rss_sqlite.py
+++ b/src/rss_sqlite.py
@@ -24,10 +24,13 @@
  # Description : Simple RSS Reader
  # ============================================================================
  
  # Description : Simple RSS Reader
  # ============================================================================
  
+from __future__ import with_statement
+
  import sqlite3
  from os.path import isfile, isdir
  from shutil import rmtree
  from os import mkdir, remove, utime
  import sqlite3
  from os.path import isfile, isdir
  from shutil import rmtree
  from os import mkdir, remove, utime
+import os
  import md5
  import feedparser
  import time
  import md5
  import feedparser
  import time
@@ -36,11 +39,51 @@ from BeautifulSoup import BeautifulSoup
  from urlparse import urljoin
  from calendar import timegm
  import threading
  from urlparse import urljoin
  from calendar import timegm
  import threading
+import traceback
+from wc import wc, wc_init, woodchuck
+import subprocess
+import dbus
+from updatedbus import update_server_object
+
+from jobmanager import JobManager
+import mainthread
+from httpprogresshandler import HTTPProgressHandler
+import random
+import sys
+import logging
+logger = logging.getLogger(__name__)
  
  def getId(string):
      return md5.new(string).hexdigest()
  
  
  def getId(string):
      return md5.new(string).hexdigest()
  
+def download_callback(connection):
+    if JobManager().do_quit:
+        raise KeyboardInterrupt
+
+def downloader(progress_handler=None, proxy=None):
+    openers = []
+
+    if progress_handler is not None:
+        openers.append(progress_handler)
+    else:
+        openers.append(HTTPProgressHandler(download_callback))
+
+    if proxy:
+        openers.append(proxy)
+
+    return urllib2.build_opener(*openers)
+
+# If not None, a subprocess.Popen object corresponding to a
+# update_feeds.py process.
+update_feed_process = None
+
+update_feeds_iface = None
+
+jobs_at_start = 0
+
  class Feed:
  class Feed:
+    serial_execution_lock = threading.Lock()
+
      def _getdb(self):
          try:
              db = self.tls.db
      def _getdb(self):
          try:
              db = self.tls.db
@@ -63,17 +106,36 @@ class Feed:
              self.db.execute("CREATE TABLE images (id text, imagePath text);")
              self.db.commit()
  
              self.db.execute("CREATE TABLE images (id text, imagePath text);")
              self.db.commit()
  
-    def addImage(self, configdir, key, baseurl, url):
+    def addImage(self, configdir, key, baseurl, url, proxy=None, opener=None):
          filename = configdir+key+".d/"+getId(url)
          if not isfile(filename):
              try:
          filename = configdir+key+".d/"+getId(url)
          if not isfile(filename):
              try:
-                f = urllib2.urlopen(urljoin(baseurl,url))
-                outf = open(filename, "w")
-                outf.write(f.read())
-                f.close()
-                outf.close()
+                if not opener:
+                    opener = downloader(proxy=proxy)
+
+                abs_url = urljoin(baseurl,url)
+                f = opener.open(abs_url)
+                try:
+                    with open(filename, "w") as outf:
+                        for data in f:
+                            outf.write(data)
+                finally:
+                    f.close()
+            except (urllib2.HTTPError, urllib2.URLError, IOError), exception:
+                logger.info("Could not download image %s: %s"
+                            % (abs_url, str (exception)))
+                return None
              except:
              except:
-                print "Could not download " + url
+                exception = sys.exc_info()[0]
+
+                logger.info("Downloading image %s: %s" %
+                            (abs_url, traceback.format_exc()))
+                try:
+                    remove(filename)
+                except OSError:
+                    pass
+
+                raise exception
          else:
              #open(filename,"a").close()  # "Touch" the file
              file = open(filename,"a")
          else:
              #open(filename,"a").close()  # "Touch" the file
              file = open(filename,"a")
@@ -81,151 +143,403 @@ class Feed:
              file.close()
          return filename
  
              file.close()
          return filename
  
-    def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
-        # Expiry time is in hours
-        if proxy == None:
-            tmp=feedparser.parse(url, etag = etag, modified = modified)
+    def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
+        if (os.path.basename(sys.argv[0]) == 'update_feeds.py'):
+            def doit():
+                def it():
+                    self._updateFeed(configdir, url, etag, modified, expiryTime, proxy, imageCache, postFeedUpdateFunc, *postFeedUpdateFuncArgs)
+                return it
+            JobManager().execute(doit(), self.key, priority=priority)
          else:
          else:
-            tmp=feedparser.parse(url, etag = etag, modified = modified, handlers = [proxy])
-        expiry = float(expiryTime) * 3600.
+            def send_update_request():
+                global update_feeds_iface
+                if update_feeds_iface is None:
+                    bus=dbus.SessionBus()
+                    remote_object = bus.get_object(
+                        "org.marcoz.feedingit", # Connection name
+                        "/org/marcoz/feedingit/update" # Object's path
+                        )
+                    update_feeds_iface = dbus.Interface(
+                        remote_object, 'org.marcoz.feedingit')
  
  
-        currentTime = 0
-        # Check if the parse was succesful (number of entries > 0, else do nothing)
-        if len(tmp["entries"])>0:
-           currentTime = time.time()
-           # The etag and modified value should only be updated if the content was not null
-           try:
-               etag = tmp["etag"]
-           except KeyError:
-               etag = None
-           try:
-               modified = tmp["modified"]
-           except KeyError:
-               modified = None
-           try:
-               f = urllib2.urlopen(urljoin(tmp["feed"]["link"],"/favicon.ico"))
-               data = f.read()
-               f.close()
-               outf = open(self.dir+"/favicon.ico", "w")
-               outf.write(data)
-               outf.close()
-               del data
-           except:
-               #import traceback
-               #traceback.print_exc()
-                pass
+                try:
+                    update_feeds_iface.Update(self.key)
+                except Exception, e:
+                    logger.error("Invoking org.marcoz.feedingit.Update: %s"
+                                 % str(e))
+                    update_feeds_iface = None
+                else:
+                    return True
+
+            if send_update_request():
+                # Success!  It seems we were able to start the update
+                # daemon via dbus (or, it was already running).
+                return
  
  
+            global update_feed_process
+            if (update_feed_process is None
+                or update_feed_process.poll() is not None):
+                # The update_feeds process is not running.  Start it.
+                update_feeds = os.path.join(os.path.dirname(__file__),
+                                            'update_feeds.py')
+                argv = ['/usr/bin/env', 'python', update_feeds, '--daemon' ]
+                logger.debug("Starting update_feeds: running %s"
+                             % (str(argv),))
+                update_feed_process = subprocess.Popen(argv)
+                # Make sure the dbus calls go to the right process:
+                # rebind.
+                update_feeds_iface = None
  
  
-           #reversedEntries = self.getEntries()
-           #reversedEntries.reverse()
+            for _ in xrange(5):
+                if send_update_request():
+                    break
+                time.sleep(1)
+
+    def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
+        success = False
+        have_serial_execution_lock = False
+        try:
+            download_start = time.time ()
  
  
-           ids = self.getIds()
+            progress_handler = HTTPProgressHandler(download_callback)
+
+            openers = [progress_handler]
+            if proxy:
+                openers.append (proxy)
+            kwargs = {'handlers':openers}
+            
+            tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs)
+            download_duration = time.time () - download_start
+    
+            opener = downloader(progress_handler, proxy)
  
  
-           tmp["entries"].reverse()
-           for entry in tmp["entries"]:
-               date = self.extractDate(entry)
+            if JobManager().do_quit:
+                raise KeyboardInterrupt
+
+            process_start = time.time()
+
+            # Expiry time is in hours
+            expiry = float(expiryTime) * 3600.
+    
+            currentTime = 0
+    
+            def wc_success():
+                try:
+                    wc().stream_register (self.key, "", 6 * 60 * 60)
+                except woodchuck.ObjectExistsError:
+                    pass
+                try:
+                    wc()[self.key].updated (
+                        indicator=(woodchuck.Indicator.ApplicationVisual
+                                   |woodchuck.Indicator.StreamWide),
+                        transferred_down=progress_handler.stats['received'],
+                        transferred_up=progress_handler.stats['sent'],
+                        transfer_time=download_start,
+                        transfer_duration=download_duration,
+                        new_objects=len (tmp.entries),
+                        objects_inline=len (tmp.entries))
+                except KeyError:
+                    logger.warn(
+                        "Failed to register update of %s with woodchuck!"
+                        % (self.key))
+    
+            http_status = tmp.get ('status', 200)
+    
+            # Check if the parse was succesful.  If the http status code
+            # is 304, then the download was successful, but there is
+            # nothing new.  Indeed, no content is returned.  This make a
+            # 304 look like an error because there are no entries and the
+            # parse fails.  But really, everything went great!  Check for
+            # this first.
+            if http_status == 304:
+                logger.debug("%s: No changes to feed." % (self.key,))
+                mainthread.execute(wc_success, async=True)
+                success = True
+            elif len(tmp["entries"])==0 and not tmp.version:
+                # An error occured fetching or parsing the feed.  (Version
+                # will be either None if e.g. the connection timed our or
+                # '' if the data is not a proper feed)
+                logger.error(
+                    "Error fetching %s: version is: %s: error: %s"
+                    % (url, str (tmp.version),
+                       str (tmp.get ('bozo_exception', 'Unknown error'))))
+                logger.debug(tmp)
+                def register_stream_update_failed(http_status):
+                    def doit():
+                        logger.debug("%s: stream update failed!" % self.key)
+    
+                        try:
+                            # It's not easy to get the feed's title from here.
+                            # At the latest, the next time the application is
+                            # started, we'll fix up the human readable name.
+                            wc().stream_register (self.key, "", 6 * 60 * 60)
+                        except woodchuck.ObjectExistsError:
+                            pass
+                        ec = woodchuck.TransferStatus.TransientOther
+                        if 300 <= http_status and http_status < 400:
+                            ec = woodchuck.TransferStatus.TransientNetwork
+                        if 400 <= http_status and http_status < 500:
+                            ec = woodchuck.TransferStatus.FailureGone
+                        if 500 <= http_status and http_status < 600:
+                            ec = woodchuck.TransferStatus.TransientNetwork
+                        wc()[self.key].update_failed(ec)
+                    return doit
+                if wc().available:
+                    mainthread.execute(
+                        register_stream_update_failed(
+                            http_status=http_status),
+                        async=True)
+            else:
+               currentTime = time.time()
+               # The etag and modified value should only be updated if the content was not null
                 try:
                 try:
-                   entry["title"]
-               except:
-                   entry["title"] = "No Title"
+                   etag = tmp["etag"]
+               except KeyError:
+                   etag = None
                 try:
                 try:
-                   entry["link"]
-               except:
-                   entry["link"] = ""
+                   modified = tmp["modified"]
+               except KeyError:
+                   modified = None
                 try:
                 try:
-                   entry["author"]
-               except:
-                   entry["author"] = None
-               if(not(entry.has_key("id"))):
-                   entry["id"] = None 
-               tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
-                            "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
-               id = self.generateUniqueId(tmpEntry)
-               
-               #articleTime = time.mktime(self.entries[id]["dateTuple"])
-               soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
-               images = soup('img')
-               baseurl = tmpEntry["link"]
-               #if not id in ids:
-               if imageCache:
-                   for img in images:
-                      try:
-                        filename = self.addImage(configdir, self.key, baseurl, img['src'])
-                        img['src']="file://%s" %filename
-                        count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
-                        if count == 0:
-                            self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
-                      except:
-                          import traceback
-                          traceback.print_exc()
-                          print "Error downloading image %s" % img
-               tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
-               file = open(tmpEntry["contentLink"], "w")
-               file.write(soup.prettify())
-               file.close()
-               if id in ids:
-                   self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
-                   self.db.commit()
-               else:
-                   values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
-                   self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
-                   self.db.commit()
-#               else:
-#                   try:
-#                       self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
-#                       self.db.commit()
-#                       filename = configdir+self.key+".d/"+id+".html"
-#                       file = open(filename,"a")
-#                       utime(filename, None)
-#                       file.close()
-#                       images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
-#                       for image in images:
-#                            file = open(image[0],"a")
-#                            utime(image[0], None)
-#                            file.close()
-#                   except:
-#                       pass
-           self.db.commit()
+                   abs_url = urljoin(tmp["feed"]["link"],"/favicon.ico")
+                   f = opener.open(abs_url)
+                   data = f.read()
+                   f.close()
+                   outf = open(self.dir+"/favicon.ico", "w")
+                   outf.write(data)
+                   outf.close()
+                   del data
+               except (urllib2.HTTPError, urllib2.URLError), exception:
+                   logger.debug("Could not download favicon %s: %s"
+                                % (abs_url, str (exception)))
+    
+               self.serial_execution_lock.acquire ()
+               have_serial_execution_lock = True
+
+               #reversedEntries = self.getEntries()
+               #reversedEntries.reverse()
+    
+               ids = self.getIds()
+    
+               tmp["entries"].reverse()
+               for entry in tmp["entries"]:
+                   # Yield so as to make the main thread a bit more
+                   # responsive.
+                   time.sleep(0)
+    
+                   if JobManager().do_quit:
+                       raise KeyboardInterrupt
+
+                   received_base = progress_handler.stats['received']
+                   sent_base = progress_handler.stats['sent']
+                   object_size = 0
+
+                   date = self.extractDate(entry)
+                   try:
+                       entry["title"]
+                   except KeyError:
+                       entry["title"] = "No Title"
+                   try :
+                       entry["link"]
+                   except KeyError:
+                       entry["link"] = ""
+                   try:
+                       entry["author"]
+                   except KeyError:
+                       entry["author"] = None
+                   if(not(entry.has_key("id"))):
+                       entry["id"] = None
+                   content = self.extractContent(entry)
+                   object_size = len (content)
+                   received_base -= len (content)
+                   tmpEntry = {"title":entry["title"], "content":content,
+                                "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
+                   id = self.generateUniqueId(tmpEntry)
+                   
+                   #articleTime = time.mktime(self.entries[id]["dateTuple"])
+                   soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
+                   images = soup('img')
+                   baseurl = tmpEntry["link"]
+                   #if not id in ids:
+                   if imageCache and len(images) > 0:
+                       self.serial_execution_lock.release ()
+                       have_serial_execution_lock = False
+                       for img in images:
+                           filename = self.addImage(
+                               configdir, self.key, baseurl, img['src'],
+                               opener=opener)
+                           if filename:
+                                img['src']="file://%s" %filename
+                                count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
+                                if count == 0:
+                                    self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
+                                    self.db.commit()
+    
+                                try:
+                                    object_size += os.path.getsize (filename)
+                                except os.error, exception:
+                                    logger.error ("Error getting size of %s: %s"
+                                                  % (filename, exception))
+                       self.serial_execution_lock.acquire ()
+                       have_serial_execution_lock = True
+    
+                   tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
+                   file = open(tmpEntry["contentLink"], "w")
+                   file.write(soup.prettify())
+                   file.close()
+                   if id in ids:
+                       self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
+                       self.db.commit()
+                   else:
+                       values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
+                       self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
+                       self.db.commit()
+#                   else:
+#                       try:
+#                           self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
+#                           self.db.commit()
+#                           filename = configdir+self.key+".d/"+id+".html"
+#                           file = open(filename,"a")
+#                           utime(filename, None)
+#                           file.close()
+#                           images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
+#                           for image in images:
+#                                file = open(image[0],"a")
+#                                utime(image[0], None)
+#                                file.close()
+#                       except:
+#                           pass
+    
+                   # Register the object with Woodchuck and mark it as
+                   # downloaded.
+                   def register_object_transferred(
+                           id, title, publication_time,
+                           sent, received, object_size):
+                       def doit():
+                           logger.debug("Registering transfer of object %s"
+                                        % title)
+                           try:
+                               obj = wc()[self.key].object_register(
+                                   object_identifier=id,
+                                   human_readable_name=title)
+                           except woodchuck.ObjectExistsError:
+                               obj = wc()[self.key][id]
+                           else:
+                               obj.publication_time = publication_time
+                               obj.transferred(
+                                   indicator=(
+                                       woodchuck.Indicator.ApplicationVisual
+                                       |woodchuck.Indicator.StreamWide),
+                                   transferred_down=received,
+                                   transferred_up=sent,
+                                   object_size=object_size)
+                       return doit
+                   if wc().available:
+                       # If the entry does not contain a publication
+                       # time, the attribute won't exist.
+                       pubtime = entry.get('date_parsed', None)
+                       if pubtime:
+                           publication_time = time.mktime (pubtime)
+                       else:
+                           publication_time = None
+
+                       sent = progress_handler.stats['sent'] - sent_base
+                       received = (progress_handler.stats['received']
+                                   - received_base)
+
+                       mainthread.execute(
+                           register_object_transferred(
+                               id=id,
+                               title=tmpEntry["title"],
+                               publication_time=publication_time,
+                               sent=sent, received=received,
+                               object_size=object_size),
+                           async=True)
+               self.db.commit()
+
+               logger.debug (
+                   "%s: Update successful: transferred: %d/%d; objects: %d)"
+                   % (self.key,
+                      progress_handler.stats['sent'],
+                      progress_handler.stats['received'],
+                      len (tmp.entries)))
+               mainthread.execute (wc_success, async=True)
+               success = True
+
+            rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
+            for row in rows:
+               self.removeEntry(row[0])
              
              
-           
-        rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
-        for row in rows:
-           self.removeEntry(row[0])
-        
-        from glob import glob
-        from os import stat
-        for file in glob(configdir+self.key+".d/*"):
-            #
-            stats = stat(file)
-            #
-            # put the two dates into matching format
-            #
-            lastmodDate = stats[8]
-            #
-            expDate = time.time()-expiry*3
-            # check if image-last-modified-date is outdated
-            #
-            if expDate > lastmodDate:
+            from glob import glob
+            from os import stat
+            for file in glob(configdir+self.key+".d/*"):
                  #
                  #
-                try:
-                    #
-                    #print 'Removing', file
-                    #
-                    remove(file) # commented out for testing
-                    #
-                except OSError:
+                stats = stat(file)
+                #
+                # put the two dates into matching format
+                #
+                lastmodDate = stats[8]
+                #
+                expDate = time.time()-expiry*3
+                # check if image-last-modified-date is outdated
+                #
+                if expDate > lastmodDate:
                      #
                      #
-                    print 'Could not remove', file
-        updateTime = 0
-        rows = self.db.execute("SELECT MAX(date) FROM feed;")
-        for row in rows:
-            updateTime=row[0]
-        return (updateTime, etag, modified)
-    
+                    try:
+                        #
+                        #print 'Removing', file
+                        #
+                        # XXX: Tell woodchuck.
+                        remove(file) # commented out for testing
+                        #
+                    except OSError, exception:
+                        #
+                        logger.error('Could not remove %s: %s'
+                                     % (file, str (exception)))
+            logger.debug("updated %s: %fs in download, %fs in processing"
+                         % (self.key, download_duration,
+                            time.time () - process_start))
+        except:
+            logger.error("Updating %s: %s" % (self.key, traceback.format_exc()))
+        finally:
+            self.db.commit ()
+
+            if have_serial_execution_lock:
+                self.serial_execution_lock.release ()
+
+            updateTime = 0
+            try:
+                rows = self.db.execute("SELECT MAX(date) FROM feed;")
+                for row in rows:
+                    updateTime=row[0]
+            except Exception, e:
+                logger.error("Fetching update time: %s: %s"
+                             % (str(e), traceback.format_exc()))
+            finally:
+                if not success:
+                    etag = None
+                    modified = None
+                title = None
+                try:
+                    title = tmp.feed.title
+                except (AttributeError, UnboundLocalError), exception:
+                    pass
+                if postFeedUpdateFunc is not None:
+                    postFeedUpdateFunc (self.key, updateTime, etag, modified,
+                                        title, *postFeedUpdateFuncArgs)
+
      def setEntryRead(self, id):
          self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
          self.db.commit()
      def setEntryRead(self, id):
          self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
          self.db.commit()
-        
+
+        def doit():
+            try:
+                wc()[self.key][id].used()
+            except KeyError:
+                pass
+        if wc().available():
+            mainthread.execute(doit, async=True)
+
      def setEntryUnread(self, id):
          self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
          self.db.commit()     
      def setEntryUnread(self, id):
          self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
          self.db.commit()     
@@ -259,14 +573,30 @@ class Feed:
          return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
      
      def generateUniqueId(self, entry):
          return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
      
      def generateUniqueId(self, entry):
-        if(entry["id"] != None):
-            return getId(str(entry["id"]))
-        else:
-            try:
-                return getId(str(entry["date"]) + str(entry["title"]))
-            except:
-                #print entry["title"]
-                return getId(str(entry["date"]))
+        """
+        Generate a stable identifier for the article.  For the same
+        entry, this should result in the same identifier.  If
+        possible, the identifier should remain the same even if the
+        article is updated.
+        """
+        # Prefer the entry's id, which is supposed to be globally
+        # unique.
+        key = entry.get('id', None)
+        if not key:
+            # Next, try the link to the content.
+            key = entry.get('link', None)
+        if not key:
+            # Ok, the title and the date concatenated are likely to be
+            # relatively stable.
+            key = entry.get('title', None) + entry.get('date', None)
+        if not key:
+            # Hmm, the article's content will at least guarantee no
+            # false negatives (i.e., missing articles)
+            key = entry.get('content', None)
+        if not key:
+            # If all else fails, just use a random number.
+            key = str (random.random ())
+        return getId (key)
      
      def getIds(self, onlyUnread=False):
          if onlyUnread:
      
      def getIds(self, onlyUnread=False):
          if onlyUnread:
@@ -279,15 +609,17 @@ class Feed:
          #ids.reverse()
          return ids
      
          #ids.reverse()
          return ids
      
-    def getNextId(self, id):
+    def getNextId(self, id, forward=True):
+        if forward:
+            delta = 1
+        else:
+            delta = -1
          ids = self.getIds()
          index = ids.index(id)
          ids = self.getIds()
          index = ids.index(id)
-        return ids[(index+1)%len(ids)]
+        return ids[(index + delta) % len(ids)]
          
      def getPreviousId(self, id):
          
      def getPreviousId(self, id):
-        ids = self.getIds()
-        index = ids.index(id)
-        return ids[(index-1)%len(ids)]
+        return self.getNextId(id, forward=False)
      
      def getNumberOfUnreadItems(self):
          return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
      
      def getNumberOfUnreadItems(self):
          return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
@@ -355,10 +687,20 @@ class Feed:
              try:
                  remove(contentLink)
              except OSError, exception:
              try:
                  remove(contentLink)
              except OSError, exception:
-                print "Deleting %s: %s" % (contentLink, str (exception))
+                logger.error("Deleting %s: %s" % (contentLink, str (exception)))
          self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
          self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
          self.db.commit()
          self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
          self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
          self.db.commit()
+
+        def doit():
+            try:
+                wc()[self.key][id].files_deleted (
+                    woodchuck.DeletionResponse.Deleted)
+                del wc()[self.key][id]
+            except KeyError:
+                pass
+        if wc().available():
+            mainthread.execute (doit, async=True)
   
  class ArchivedArticles(Feed):    
      def addArchivedArticle(self, title, link, date, configdir):
   
  class ArchivedArticles(Feed):    
      def addArchivedArticle(self, title, link, date, configdir):
@@ -382,9 +724,10 @@ class ArchivedArticles(Feed):
              images = soup('img')
              baseurl = link
              for img in images:
              images = soup('img')
              baseurl = link
              for img in images:
-                filename = self.addImage(configdir, self.key, baseurl, img['src'])
+                filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
                  img['src']=filename
                  self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
                  img['src']=filename
                  self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
+                self.db.commit()
              contentLink = configdir+self.key+".d/"+id+".html"
              file = open(contentLink, "w")
              file.write(soup.prettify())
              contentLink = configdir+self.key+".d/"+id+".html"
              file = open(contentLink, "w")
              file.write(soup.prettify())
@@ -422,7 +765,8 @@ class Listing:
      db = property(_getdb)
  
      # Lists all the feeds in a dictionary, and expose the data
      db = property(_getdb)
  
      # Lists all the feeds in a dictionary, and expose the data
-    def __init__(self, configdir):
+    def __init__(self, config, configdir):
+        self.config = config
          self.configdir = configdir
  
          self.tls = threading.local ()
          self.configdir = configdir
  
          self.tls = threading.local ()
@@ -448,10 +792,43 @@ class Listing:
                      self.addCategory("Default Category")
                      self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
                      self.db.execute("UPDATE feeds SET category=1;")
                      self.addCategory("Default Category")
                      self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
                      self.db.execute("UPDATE feeds SET category=1;")
-                    self.db.commit()
+            self.db.commit()
          except:
              pass
  
          except:
              pass
  
+        # Check that Woodchuck's state is up to date with respect our
+        # state.
+        updater = os.path.basename(sys.argv[0]) == 'update_feeds.py'
+        wc_init (self, True if updater else False)
+        if wc().available() and updater:
+            # The list of known streams.
+            streams = wc().streams_list ()
+            stream_ids = [s.identifier for s in streams]
+
+            # Register any unknown streams.  Remove known streams from
+            # STREAMS_IDS.
+            for key in self.getListOfFeeds():
+                title = self.getFeedTitle(key)
+                # XXX: We should also check whether the list of
+                # articles/objects in each feed/stream is up to date.
+                if key not in stream_ids:
+                    logger.debug(
+                        "Registering previously unknown channel: %s (%s)"
+                        % (key, title,))
+                    # Use a default refresh interval of 6 hours.
+                    wc().stream_register (key, title, 6 * 60 * 60)
+                else:
+                    # Make sure the human readable name is up to date.
+                    if wc()[key].human_readable_name != title:
+                        wc()[key].human_readable_name = title
+                    stream_ids.remove (key)
+                    
+
+            # Unregister any streams that are no longer subscribed to.
+            for id in stream_ids:
+                logger.debug("Unregistering %s" % (id,))
+                w.stream_unregister (id)
+
      def importOldFormatFeeds(self):
          """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
          import rss
      def importOldFormatFeeds(self):
          """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
          import rss
@@ -489,8 +866,8 @@ class Listing:
                              pass
                  self.updateUnread(id)
              except:
                              pass
                  self.updateUnread(id)
              except:
-                import traceback
-                traceback.print_exc()
+                logger.error("importOldFormatFeeds: %s"
+                             % (traceback.format_exc(),))
          remove(self.configdir+"feeds.pickle")
                  
          
          remove(self.configdir+"feeds.pickle")
                  
          
@@ -507,14 +884,34 @@ class Listing:
          archFeed.addArchivedArticle(title, link, date, self.configdir)
          self.updateUnread("ArchivedArticles")
          
          archFeed.addArchivedArticle(title, link, date, self.configdir)
          self.updateUnread("ArchivedArticles")
          
-    def updateFeed(self, key, expiryTime=24, proxy=None, imageCache=False):
+    def updateFeed(self, key, expiryTime=None, proxy=None, imageCache=None,
+                   priority=0):
+        if expiryTime is None:
+            expiryTime = self.config.getExpiry()
+        if not expiryTime:
+            # Default to 24 hours
+            expriyTime = 24
+        if proxy is None:
+            (use_proxy, proxy) = self.config.getProxy()
+            if not use_proxy:
+                proxy = None
+        if imageCache is None:
+            imageCache = self.config.getImageCache()
+
          feed = self.getFeed(key)
          (url, etag, modified) = self.db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
          try:
              modified = time.struct_time(eval(modified))
          except:
              modified = None
          feed = self.getFeed(key)
          (url, etag, modified) = self.db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
          try:
              modified = time.struct_time(eval(modified))
          except:
              modified = None
-        (updateTime, etag, modified) = feed.updateFeed(self.configdir, url, etag, modified, expiryTime, proxy, imageCache)
+        feed.updateFeed(
+            self.configdir, url, etag, modified, expiryTime, proxy, imageCache,
+            priority, postFeedUpdateFunc=self._queuePostFeedUpdate)
+
+    def _queuePostFeedUpdate(self, *args, **kwargs):
+        mainthread.execute (self._postFeedUpdate, async=True, *args, **kwargs)
+
+    def _postFeedUpdate(self, key, updateTime, etag, modified, title):
          if modified==None:
              modified="None"
          else:
          if modified==None:
              modified="None"
          else:
@@ -523,8 +920,29 @@ class Listing:
              self.db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, modified, key) )
          else:
              self.db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, modified, key) )
              self.db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, modified, key) )
          else:
              self.db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, modified, key) )
+
+        if title is not None:
+            self.db.execute("UPDATE feeds SET title=(case WHEN title=='' THEN ? ELSE title END) where id=?;",
+                            (title, key))
          self.db.commit()
          self.updateUnread(key)
          self.db.commit()
          self.updateUnread(key)
+
+        update_server_object().ArticleCountUpdated()
+
+        stats = JobManager().stats()
+        global jobs_at_start
+        completed = stats['jobs-completed'] - jobs_at_start
+        in_progress = stats['jobs-in-progress']
+        queued = stats['jobs-queued']
+
+        percent = (100 * ((completed + in_progress / 2.))
+                   / (completed + in_progress + queued))
+
+        update_server_object().UpdateProgress(
+            percent, completed, in_progress, queued, 0, 0, 0, key)
+
+        if in_progress == 0 and queued == 0:
+            jobs_at_start = stats['jobs-completed']
          
      def getFeed(self, key):
          if key == "ArchivedArticles":
          
      def getFeed(self, key):
          if key == "ArchivedArticles":
@@ -537,15 +955,28 @@ class Listing:
          else:
              self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
          self.db.commit()
          else:
              self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
          self.db.commit()
+
+        if wc().available():
+            try:
+                wc()[key].human_readable_name = title
+            except KeyError:
+                logger.debug("Feed %s (%s) unknown." % (key, title))
          
      def getFeedUpdateTime(self, key):
          
      def getFeedUpdateTime(self, key):
-        return time.ctime(self.db.execute("SELECT updateTime FROM feeds WHERE id=?;", (key,)).fetchone()[0])
+        update_time = self.db.execute(
+            "SELECT updateTime FROM feeds WHERE id=?;", (key,)).fetchone()[0]
+        if not update_time:
+            return "Never"
+        return time.ctime(update_time)
          
      def getFeedNumberOfUnreadItems(self, key):
          return self.db.execute("SELECT unread FROM feeds WHERE id=?;", (key,)).fetchone()[0]
          
      def getFeedTitle(self, key):
          
      def getFeedNumberOfUnreadItems(self, key):
          return self.db.execute("SELECT unread FROM feeds WHERE id=?;", (key,)).fetchone()[0]
          
      def getFeedTitle(self, key):
-        return self.db.execute("SELECT title FROM feeds WHERE id=?;", (key,)).fetchone()[0]
+        (title, url) = self.db.execute("SELECT title, url FROM feeds WHERE id=?;", (key,)).fetchone()
+        if title:
+            return title
+        return url
          
      def getFeedUrl(self, key):
          return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
          
      def getFeedUrl(self, key):
          return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
@@ -617,7 +1048,7 @@ class Listing:
  
      def addFeed(self, title, url, id=None, category=1):
          if not id:
  
      def addFeed(self, title, url, id=None, category=1):
          if not id:
-            id = getId(title)
+            id = getId(url)
          count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
          if count == 0:
              max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
          count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
          if count == 0:
              max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
@@ -628,6 +1059,14 @@ class Listing:
              self.db.commit()
              # Ask for the feed object, it will create the necessary tables
              self.getFeed(id)
              self.db.commit()
              # Ask for the feed object, it will create the necessary tables
              self.getFeed(id)
+
+            if wc().available():
+                # Register the stream with Woodchuck.  Update approximately
+                # every 6 hours.
+                wc().stream_register(stream_identifier=id,
+                                     human_readable_name=title,
+                                     freshness=6*60*60)
+
              return True
          else:
              return False
              return True
          else:
              return False
@@ -643,6 +1082,12 @@ class Listing:
          self.db.commit()
      
      def removeFeed(self, key):
          self.db.commit()
      
      def removeFeed(self, key):
+        if wc().available ():
+            try:
+                del wc()[key]
+            except KeyError:
+                logger.debug("Removing unregistered feed %s failed" % (key,))
+
          rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
          self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
          self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
          rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
          self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
          self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )