rss_sqlite: When update a feed: improve statistics reporting.
authorNeal H. Walfield <neal@walfield.org>
Tue, 6 Sep 2011 22:16:45 +0000 (00:16 +0200)
committerNeal H. Walfield <neal@walfield.org>
Thu, 8 Sep 2011 16:37:31 +0000 (18:37 +0200)
src/rss_sqlite.py

index 5735de2..0319999 100644 (file)
@@ -73,6 +73,27 @@ def downloader(progress_handler=None, proxy=None):
 
     return urllib2.build_opener(*openers)
 
+def transfer_stats(sent, received, **kwargs):
+    """
+    This function takes two arguments: sent is the number of bytes
+    sent so far, received is the number of bytes received.  The
+    function returns a continuation that you can call later.
+
+    The continuation takes the same two arguments.  It returns a tuple
+    of the number of bytes sent, the number of bytes received and the
+    time since the original function was invoked.
+    """
+    start_time = time.time()
+    start_sent = sent
+    start_received = received
+
+    def e(sent, received, **kwargs):
+        return (sent - start_sent,
+                received - start_received,
+                time.time() - start_time)
+
+    return e
+
 # If not None, a subprocess.Popen object corresponding to a
 # update_feeds.py process.
 update_feed_process = None
@@ -297,10 +318,12 @@ class Feed(BaseObject):
                 time.sleep(1)
 
     def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
+        logger.debug("Updating %s" % url)
+
         success = False
         have_serial_execution_lock = False
         try:
-            download_start = time.time ()
+            update_start = time.time ()
 
             progress_handler = HTTPProgressHandler(download_callback)
 
@@ -309,9 +332,11 @@ class Feed(BaseObject):
                 openers.append (proxy)
             kwargs = {'handlers':openers}
             
+            feed_transfer_stats = transfer_stats(0, 0)
+
             tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs)
-            download_duration = time.time () - download_start
-    
+            download_duration = time.time () - update_start
+
             opener = downloader(progress_handler, proxy)
 
             if JobManager().do_quit:
@@ -335,7 +360,7 @@ class Feed(BaseObject):
                                    |woodchuck.Indicator.StreamWide),
                         transferred_down=progress_handler.stats['received'],
                         transferred_up=progress_handler.stats['sent'],
-                        transfer_time=download_start,
+                        transfer_time=update_start,
                         transfer_duration=download_duration,
                         new_objects=len (tmp.entries),
                         objects_inline=len (tmp.entries))
@@ -428,11 +453,12 @@ class Feed(BaseObject):
                    # responsive.
                    time.sleep(0)
     
+                   entry_transfer_stats = transfer_stats(
+                       *feed_transfer_stats(**progress_handler.stats)[0:2])
+
                    if JobManager().do_quit:
                        raise KeyboardInterrupt
 
-                   received_base = progress_handler.stats['received']
-                   sent_base = progress_handler.stats['sent']
                    object_size = 0
 
                    date = self.extractDate(entry)
@@ -452,7 +478,6 @@ class Feed(BaseObject):
                        entry["id"] = None
                    content = self.extractContent(entry)
                    object_size = len (content)
-                   received_base -= len (content)
                    tmpEntry = {"title":entry["title"], "content":content,
                                 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
                    id = self.generateUniqueId(tmpEntry)
@@ -544,9 +569,14 @@ class Feed(BaseObject):
                        else:
                            publication_time = None
 
-                       sent = progress_handler.stats['sent'] - sent_base
-                       received = (progress_handler.stats['received']
-                                   - received_base)
+                       sent, received, _ \
+                           = entry_transfer_stats(**progress_handler.stats)
+                       # sent and received are for objects (in
+                       # particular, images) associated with this
+                       # item.  We also want to attribute the data
+                       # transferred for the item's content.  This is
+                       # a good first approximation.
+                       received += len(content)
 
                        mainthread.execute(
                            register_object_transferred(
@@ -558,12 +588,11 @@ class Feed(BaseObject):
                            async=True)
                self.db.commit()
 
+               sent, received, _ \
+                   = feed_transfer_stats(**progress_handler.stats)
                logger.debug (
                    "%s: Update successful: transferred: %d/%d; objects: %d)"
-                   % (self.key,
-                      progress_handler.stats['sent'],
-                      progress_handler.stats['received'],
-                      len (tmp.entries)))
+                   % (url, sent, received, len (tmp.entries)))
                mainthread.execute (wc_success, async=True)
                success = True