- entry["author"]
- except:
- entry["author"] = None
- if(not(entry.has_key("id"))):
- entry["id"] = None
- tmpEntry = {"title":entry["title"], "content":self.extractContent(entry),
- "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
- id = self.generateUniqueId(tmpEntry)
-
- #articleTime = time.mktime(self.entries[id]["dateTuple"])
- soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
- images = soup('img')
- baseurl = tmpEntry["link"]
- #if not id in ids:
- if imageCache:
- for img in images:
- try:
- filename = self.addImage(configdir, self.key, baseurl, img['src'])
- img['src']="file://%s" %filename
- count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
- if count == 0:
- self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
- except:
- import traceback
- traceback.print_exc()
- print "Error downloading image %s" % img
- tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
- file = open(tmpEntry["contentLink"], "w")
- file.write(soup.prettify())
- file.close()
- if id in ids:
- self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
- self.db.commit()
- else:
- values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
- self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
- self.db.commit()
-# else:
-# try:
-# self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
-# self.db.commit()
-# filename = configdir+self.key+".d/"+id+".html"
-# file = open(filename,"a")
-# utime(filename, None)
-# file.close()
-# images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
-# for image in images:
-# file = open(image[0],"a")
-# utime(image[0], None)
-# file.close()
-# except:
-# pass
- self.db.commit()
+ abs_url = urljoin(tmp["feed"]["link"],"/favicon.ico")
+ f = opener.open(abs_url)
+ data = f.read()
+ f.close()
+ outf = open(self.dir+"/favicon.ico", "w")
+ outf.write(data)
+ outf.close()
+ del data
+ except (urllib2.HTTPError, urllib2.URLError), exception:
+ logger.debug("Could not download favicon %s: %s"
+ % (abs_url, str (exception)))
+
+ self.serial_execution_lock.acquire ()
+ have_serial_execution_lock = True
+
+ #reversedEntries = self.getEntries()
+ #reversedEntries.reverse()
+
+ ids = self.getIds()
+
+ tmp["entries"].reverse()
+ for entry in tmp["entries"]:
+ # Yield so as to make the main thread a bit more
+ # responsive.
+ time.sleep(0)
+
+ if JobManager().do_quit:
+ raise KeyboardInterrupt
+
+ received_base = progress_handler.stats['received']
+ sent_base = progress_handler.stats['sent']
+ object_size = 0
+
+ date = self.extractDate(entry)
+ try:
+ entry["title"]
+ except KeyError:
+ entry["title"] = "No Title"
+ try :
+ entry["link"]
+ except KeyError:
+ entry["link"] = ""
+ try:
+ entry["author"]
+ except KeyError:
+ entry["author"] = None
+ if(not(entry.has_key("id"))):
+ entry["id"] = None
+ content = self.extractContent(entry)
+ object_size = len (content)
+ received_base -= len (content)
+ tmpEntry = {"title":entry["title"], "content":content,
+ "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
+ id = self.generateUniqueId(tmpEntry)
+
+ #articleTime = time.mktime(self.entries[id]["dateTuple"])
+ soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
+ images = soup('img')
+ baseurl = tmpEntry["link"]
+ #if not id in ids:
+ if imageCache and len(images) > 0:
+ self.serial_execution_lock.release ()
+ have_serial_execution_lock = False
+ for img in images:
+ filename = self.addImage(
+ configdir, self.key, baseurl, img['src'],
+ opener=opener)
+ if filename:
+ img['src']="file://%s" %filename
+ count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
+ if count == 0:
+ self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
+ self.db.commit()
+
+ try:
+ object_size += os.path.getsize (filename)
+ except os.error, exception:
+ logger.error ("Error getting size of %s: %s"
+ % (filename, exception))
+ self.serial_execution_lock.acquire ()
+ have_serial_execution_lock = True
+
+ tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
+ file = open(tmpEntry["contentLink"], "w")
+ file.write(soup.prettify())
+ file.close()
+ if id in ids:
+ self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
+ self.db.commit()
+ else:
+ values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
+ self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
+ self.db.commit()
+# else:
+# try:
+# self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
+# self.db.commit()
+# filename = configdir+self.key+".d/"+id+".html"
+# file = open(filename,"a")
+# utime(filename, None)
+# file.close()
+# images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
+# for image in images:
+# file = open(image[0],"a")
+# utime(image[0], None)
+# file.close()
+# except:
+# pass
+
+ # Register the object with Woodchuck and mark it as
+ # downloaded.
+ def register_object_transferred(
+ id, title, publication_time,
+ sent, received, object_size):
+ def doit():
+ logger.debug("Registering transfer of object %s"
+ % title)
+ try:
+ obj = wc()[self.key].object_register(
+ object_identifier=id,
+ human_readable_name=title)
+ except woodchuck.ObjectExistsError:
+ obj = wc()[self.key][id]
+ else:
+ obj.publication_time = publication_time
+ obj.transferred(
+ indicator=(
+ woodchuck.Indicator.ApplicationVisual
+ |woodchuck.Indicator.StreamWide),
+ transferred_down=received,
+ transferred_up=sent,
+ object_size=object_size)
+ return doit
+ if wc().available:
+ # If the entry does not contain a publication
+ # time, the attribute won't exist.
+ pubtime = entry.get('date_parsed', None)
+ if pubtime:
+ publication_time = time.mktime (pubtime)
+ else:
+ publication_time = None
+
+ sent = progress_handler.stats['sent'] - sent_base
+ received = (progress_handler.stats['received']
+ - received_base)
+
+ mainthread.execute(
+ register_object_transferred(
+ id=id,
+ title=tmpEntry["title"],
+ publication_time=publication_time,
+ sent=sent, received=received,
+ object_size=object_size),
+ async=True)
+ self.db.commit()
+
+ logger.debug (
+ "%s: Update successful: transferred: %d/%d; objects: %d)"
+ % (self.key,
+ progress_handler.stats['sent'],
+ progress_handler.stats['received'],
+ len (tmp.entries)))
+ mainthread.execute (wc_success, async=True)
+ success = True
+
+ rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
+ for row in rows:
+ self.removeEntry(row[0])