Don't reprocess downloaded articles that are already up to date.
authorNeal H. Walfield <neal@walfield.org>
Tue, 6 Sep 2011 23:03:32 +0000 (01:03 +0200)
committerNeal H. Walfield <neal@walfield.org>
Thu, 8 Sep 2011 16:37:31 +0000 (18:37 +0200)
src/rss_sqlite.py

index 0319999..077fce0 100644 (file)
@@ -348,7 +348,10 @@ class Feed(BaseObject):
             expiry = float(expiryTime) * 3600.
     
             currentTime = 0
-    
+            
+            updated_objects = 0
+            new_objects = 0
+
             def wc_success():
                 try:
                     wc().stream_register (self.key, "", 6 * 60 * 60)
@@ -362,8 +365,9 @@ class Feed(BaseObject):
                         transferred_up=progress_handler.stats['sent'],
                         transfer_time=update_start,
                         transfer_duration=download_duration,
-                        new_objects=len (tmp.entries),
-                        objects_inline=len (tmp.entries))
+                        new_objects=new_objects,
+                        updated_objects=updated_objects,
+                        objects_inline=new_objects + updated_objects)
                 except KeyError:
                     logger.warn(
                         "Failed to register update of %s with woodchuck!"
@@ -482,6 +486,26 @@ class Feed(BaseObject):
                                 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
                    id = self.generateUniqueId(tmpEntry)
                    
+                   current_version \
+                       = self.db.execute('select date from feed where id=?',
+                                         (id,)).fetchone()
+                   if (current_version is not None
+                       and current_version[0] == date):
+                       logger.debug("ALREADY DOWNLOADED %s (%s)"
+                                    % (entry["title"], entry["link"]))
+                       continue                       
+
+                   if current_version is not None:
+                       # The version was updated.  Mark it as unread.
+                       logger.debug("UPDATED: %s (%s)"
+                                    % (entry["title"], entry["link"]))
+                       self.setEntryUnread(id)
+                       updated_objects += 1
+                   else:
+                       logger.debug("NEW: %s (%s)"
+                                    % (entry["title"], entry["link"]))
+                       new_objects += 1
+
                    #articleTime = time.mktime(self.entries[id]["dateTuple"])
                    soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
                    images = soup('img')