1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # Copyright (c) 2011 Neal H. Walfield
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Lesser General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU Lesser General Public License for more details.
16 # You should have received a copy of the GNU Lesser General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 # ============================================================================
22 # Author : Yves Marcoz
24 # Description : Simple RSS Reader
25 # ============================================================================
27 from __future__ import with_statement
30 from os.path import isfile, isdir
31 from shutil import rmtree
32 from os import mkdir, remove, utime
38 from BeautifulSoup import BeautifulSoup
39 from urlparse import urljoin
40 from calendar import timegm
43 from wc import wc, wc_init, woodchuck
46 from updatedbus import update_server_object
48 from jobmanager import JobManager
50 from httpprogresshandler import HTTPProgressHandler
54 logger = logging.getLogger(__name__)
57 return md5.new(string).hexdigest()
59 def download_callback(connection):
60 if JobManager().do_quit:
61 raise KeyboardInterrupt
63 def downloader(progress_handler=None, proxy=None):
66 if progress_handler is not None:
67 openers.append(progress_handler)
69 openers.append(HTTPProgressHandler(download_callback))
74 return urllib2.build_opener(*openers)
76 def transfer_stats(sent, received, **kwargs):
78 This function takes two arguments: sent is the number of bytes
79 sent so far, received is the number of bytes received. The
80 function returns a continuation that you can call later.
82 The continuation takes the same two arguments. It returns a tuple
83 of the number of bytes sent, the number of bytes received and the
84 time since the original function was invoked.
86 start_time = time.time()
88 start_received = received
90 def e(sent, received, **kwargs):
91 return (sent - start_sent,
92 received - start_received,
93 time.time() - start_time)
97 # If not None, a subprocess.Popen object corresponding to a
98 # update_feeds.py process.
99 update_feed_process = None
101 update_feeds_iface = None
105 class BaseObject(object):
106 # Columns to cache. Classes that inherit from this and use the
107 # cache mechanism should set this to a list of tuples, each of
108 # which contains two entries: the table and the column. Note that
109 # both are case sensitive.
112 def cache_invalidate(self, table=None):
114 Invalidate the cache.
116 If table is not None, invalidate only the specified table.
117 Otherwise, drop the whole cache.
119 if not hasattr(self, 'cache'):
125 if table in self.cache:
126 del self.cache[table]
128 def lookup(self, table, column, id=None):
130 Look up a column or value. Uses a cache for columns in
131 cached_columns. Note: the column is returned unsorted.
133 if not hasattr(self, 'cache'):
136 # Cache data for at most 60 seconds.
139 cache = self.cache[table]
141 if time.time() - cache[None] > 60:
142 # logger.debug("%s: Cache too old: clearing" % (table,))
143 del self.cache[table]
149 or (table, column) not in self.cached_columns):
150 # The cache is empty or the caller wants a column that we
152 if (table, column) in self.cached_columns:
153 # logger.debug("%s: Rebuilding cache" % (table,))
157 self.cache[table] = cache = {}
159 for t, c in self.cached_columns:
171 where = "where id = '%s'" % id
175 results = self.db.execute(
176 "SELECT %s FROM %s %s" % (','.join(columns), table, where))
182 for index, value in enumerate(values):
183 cache[columns[index]][i] = value
192 results.append(values[0])
196 cache = self.cache[table]
200 value = cache[column][id]
201 # logger.debug("%s.%s:%s -> %s" % (table, column, id, value))
204 return cache[column].values()
206 # logger.debug("%s.%s:%s -> Not found" % (table, column, id))
209 class Feed(BaseObject):
211 cached_columns = (('feed', 'read'),
214 serial_execution_lock = threading.Lock()
219 except AttributeError:
220 db = sqlite3.connect("%s/%s.db" % (self.dir, self.key), timeout=120)
223 db = property(_getdb)
225 def __init__(self, configdir, key):
227 self.configdir = configdir
228 self.dir = "%s/%s.d" %(self.configdir, self.key)
229 self.tls = threading.local ()
231 if not isdir(self.dir):
233 if not isfile("%s/%s.db" %(self.dir, self.key)):
234 self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
235 self.db.execute("CREATE TABLE images (id text, imagePath text);")
238 def addImage(self, configdir, key, baseurl, url, proxy=None, opener=None):
239 filename = configdir+key+".d/"+getId(url)
240 if not isfile(filename):
243 opener = downloader(proxy=proxy)
245 abs_url = urljoin(baseurl,url)
246 f = opener.open(abs_url)
248 with open(filename, "w") as outf:
253 except (urllib2.HTTPError, urllib2.URLError, IOError), exception:
254 logger.info("Could not download image %s: %s"
255 % (abs_url, str (exception)))
258 exception = sys.exc_info()[0]
260 logger.info("Downloading image %s: %s" %
261 (abs_url, traceback.format_exc()))
269 #open(filename,"a").close() # "Touch" the file
270 file = open(filename,"a")
271 utime(filename, None)
275 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
276 if (os.path.basename(sys.argv[0]) == 'update_feeds.py'):
279 self._updateFeed(configdir, url, etag, modified, expiryTime, proxy, imageCache, postFeedUpdateFunc, *postFeedUpdateFuncArgs)
281 JobManager().execute(doit(), self.key, priority=priority)
283 def send_update_request():
284 global update_feeds_iface
285 if update_feeds_iface is None:
286 bus=dbus.SessionBus()
287 remote_object = bus.get_object(
288 "org.marcoz.feedingit", # Connection name
289 "/org/marcoz/feedingit/update" # Object's path
291 update_feeds_iface = dbus.Interface(
292 remote_object, 'org.marcoz.feedingit')
295 update_feeds_iface.Update(self.key)
297 logger.error("Invoking org.marcoz.feedingit.Update: %s"
299 update_feeds_iface = None
303 if send_update_request():
304 # Success! It seems we were able to start the update
305 # daemon via dbus (or, it was already running).
308 global update_feed_process
309 if (update_feed_process is None
310 or update_feed_process.poll() is not None):
311 # The update_feeds process is not running. Start it.
312 update_feeds = os.path.join(os.path.dirname(__file__),
314 argv = ['/usr/bin/env', 'python', update_feeds, '--daemon' ]
315 logger.debug("Starting update_feeds: running %s"
317 update_feed_process = subprocess.Popen(argv)
318 # Make sure the dbus calls go to the right process:
320 update_feeds_iface = None
323 if send_update_request():
327 def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
328 logger.debug("Updating %s" % url)
331 have_serial_execution_lock = False
333 update_start = time.time ()
335 progress_handler = HTTPProgressHandler(download_callback)
337 openers = [progress_handler]
339 openers.append (proxy)
340 kwargs = {'handlers':openers}
342 feed_transfer_stats = transfer_stats(0, 0)
344 tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs)
345 download_duration = time.time () - update_start
347 opener = downloader(progress_handler, proxy)
349 if JobManager().do_quit:
350 raise KeyboardInterrupt
352 process_start = time.time()
354 # Expiry time is in hours
355 expiry = float(expiryTime) * 3600.
364 wc().stream_register (self.key, "", 6 * 60 * 60)
365 except woodchuck.ObjectExistsError:
368 wc()[self.key].updated (
369 indicator=(woodchuck.Indicator.ApplicationVisual
370 |woodchuck.Indicator.StreamWide),
371 transferred_down=progress_handler.stats['received'],
372 transferred_up=progress_handler.stats['sent'],
373 transfer_time=update_start,
374 transfer_duration=download_duration,
375 new_objects=new_objects,
376 updated_objects=updated_objects,
377 objects_inline=new_objects + updated_objects)
380 "Failed to register update of %s with woodchuck!"
383 http_status = tmp.get ('status', 200)
385 # Check if the parse was succesful. If the http status code
386 # is 304, then the download was successful, but there is
387 # nothing new. Indeed, no content is returned. This make a
388 # 304 look like an error because there are no entries and the
389 # parse fails. But really, everything went great! Check for
391 if http_status == 304:
392 logger.debug("%s: No changes to feed." % (self.key,))
393 mainthread.execute(wc_success, async=True)
395 elif len(tmp["entries"])==0 and not tmp.version:
396 # An error occured fetching or parsing the feed. (Version
397 # will be either None if e.g. the connection timed our or
398 # '' if the data is not a proper feed)
400 "Error fetching %s: version is: %s: error: %s"
401 % (url, str (tmp.version),
402 str (tmp.get ('bozo_exception', 'Unknown error'))))
404 def register_stream_update_failed(http_status):
406 logger.debug("%s: stream update failed!" % self.key)
409 # It's not easy to get the feed's title from here.
410 # At the latest, the next time the application is
411 # started, we'll fix up the human readable name.
412 wc().stream_register (self.key, "", 6 * 60 * 60)
413 except woodchuck.ObjectExistsError:
415 ec = woodchuck.TransferStatus.TransientOther
416 if 300 <= http_status and http_status < 400:
417 ec = woodchuck.TransferStatus.TransientNetwork
418 if 400 <= http_status and http_status < 500:
419 ec = woodchuck.TransferStatus.FailureGone
420 if 500 <= http_status and http_status < 600:
421 ec = woodchuck.TransferStatus.TransientNetwork
422 wc()[self.key].update_failed(ec)
426 register_stream_update_failed(
427 http_status=http_status),
430 currentTime = time.time()
431 # The etag and modified value should only be updated if the content was not null
437 modified = tmp["modified"]
441 abs_url = urljoin(tmp["feed"]["link"],"/favicon.ico")
442 f = opener.open(abs_url)
445 outf = open(self.dir+"/favicon.ico", "w")
449 except (urllib2.HTTPError, urllib2.URLError), exception:
450 logger.debug("Could not download favicon %s: %s"
451 % (abs_url, str (exception)))
453 self.serial_execution_lock.acquire ()
454 have_serial_execution_lock = True
456 #reversedEntries = self.getEntries()
457 #reversedEntries.reverse()
461 tmp["entries"].reverse()
462 for entry in tmp["entries"]:
463 # Yield so as to make the main thread a bit more
467 entry_transfer_stats = transfer_stats(
468 *feed_transfer_stats(**progress_handler.stats)[0:2])
470 if JobManager().do_quit:
471 raise KeyboardInterrupt
475 date = self.extractDate(entry)
479 entry["title"] = "No Title"
487 entry["author"] = None
488 if(not(entry.has_key("id"))):
490 content = self.extractContent(entry)
491 object_size = len (content)
492 tmpEntry = {"title":entry["title"], "content":content,
493 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
494 id = self.generateUniqueId(tmpEntry)
497 = self.db.execute('select date from feed where id=?',
499 if (current_version is not None
500 and current_version[0] == date):
501 logger.debug("ALREADY DOWNLOADED %s (%s)"
502 % (entry["title"], entry["link"]))
505 if current_version is not None:
506 # The version was updated. Mark it as unread.
507 logger.debug("UPDATED: %s (%s)"
508 % (entry["title"], entry["link"]))
509 self.setEntryUnread(id)
512 logger.debug("NEW: %s (%s)"
513 % (entry["title"], entry["link"]))
516 #articleTime = time.mktime(self.entries[id]["dateTuple"])
517 soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
519 baseurl = tmpEntry["link"]
521 if imageCache and len(images) > 0:
522 self.serial_execution_lock.release ()
523 have_serial_execution_lock = False
528 filename = self.addImage(
529 configdir, self.key, baseurl, img['src'],
532 img['src']="file://%s" %filename
533 count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
535 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
539 object_size += os.path.getsize (filename)
540 except os.error, exception:
541 logger.error ("Error getting size of %s: %s"
542 % (filename, exception))
543 self.serial_execution_lock.acquire ()
544 have_serial_execution_lock = True
546 tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
547 file = open(tmpEntry["contentLink"], "w")
548 file.write(soup.prettify())
551 self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
554 values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
555 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
559 # self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
561 # filename = configdir+self.key+".d/"+id+".html"
562 # file = open(filename,"a")
563 # utime(filename, None)
565 # images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
566 # for image in images:
567 # file = open(image[0],"a")
568 # utime(image[0], None)
573 # Register the object with Woodchuck and mark it as
575 def register_object_transferred(
576 id, title, publication_time,
577 sent, received, object_size):
579 logger.debug("Registering transfer of object %s"
582 obj = wc()[self.key].object_register(
583 object_identifier=id,
584 human_readable_name=title)
585 except woodchuck.ObjectExistsError:
586 obj = wc()[self.key][id]
588 obj.publication_time = publication_time
591 woodchuck.Indicator.ApplicationVisual
592 |woodchuck.Indicator.StreamWide),
593 transferred_down=received,
595 object_size=object_size)
598 # If the entry does not contain a publication
599 # time, the attribute won't exist.
600 pubtime = entry.get('date_parsed', None)
602 publication_time = time.mktime (pubtime)
604 publication_time = None
607 = entry_transfer_stats(**progress_handler.stats)
608 # sent and received are for objects (in
609 # particular, images) associated with this
610 # item. We also want to attribute the data
611 # transferred for the item's content. This is
612 # a good first approximation.
613 received += len(content)
616 register_object_transferred(
618 title=tmpEntry["title"],
619 publication_time=publication_time,
620 sent=sent, received=received,
621 object_size=object_size),
626 = feed_transfer_stats(**progress_handler.stats)
628 "%s: Update successful: transferred: %d/%d; objects: %d)"
629 % (url, sent, received, len (tmp.entries)))
630 mainthread.execute (wc_success, async=True)
633 rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
635 self.removeEntry(row[0])
637 from glob import glob
639 for file in glob(configdir+self.key+".d/*"):
643 # put the two dates into matching format
645 lastmodDate = stats[8]
647 expDate = time.time()-expiry*3
648 # check if image-last-modified-date is outdated
650 if expDate > lastmodDate:
654 #print 'Removing', file
656 # XXX: Tell woodchuck.
657 remove(file) # commented out for testing
659 except OSError, exception:
661 logger.error('Could not remove %s: %s'
662 % (file, str (exception)))
663 logger.debug("updated %s: %fs in download, %fs in processing"
664 % (self.key, download_duration,
665 time.time () - process_start))
667 logger.error("Updating %s: %s" % (self.key, traceback.format_exc()))
671 if have_serial_execution_lock:
672 self.serial_execution_lock.release ()
676 rows = self.db.execute("SELECT MAX(date) FROM feed;")
680 logger.error("Fetching update time: %s: %s"
681 % (str(e), traceback.format_exc()))
688 title = tmp.feed.title
689 except (AttributeError, UnboundLocalError), exception:
691 if postFeedUpdateFunc is not None:
692 postFeedUpdateFunc (self.key, updateTime, etag, modified,
693 title, *postFeedUpdateFuncArgs)
695 self.cache_invalidate()
697 def setEntryRead(self, id):
698 self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
703 wc()[self.key][id].used()
707 mainthread.execute(doit, async=True)
708 self.cache_invalidate('feed')
710 def setEntryUnread(self, id):
711 self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
713 self.cache_invalidate('feed')
715 def markAllAsRead(self):
716 self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
718 self.cache_invalidate('feed')
720 def isEntryRead(self, id):
721 return self.lookup('feed', 'read', id) == 1
723 def getTitle(self, id):
724 return self.lookup('feed', 'title', id)
726 def getContentLink(self, id):
727 return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
729 def getExternalLink(self, id):
730 return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
732 def getDate(self, id):
733 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
734 return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
736 def getDateTuple(self, id):
737 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
738 return time.localtime(dateStamp)
740 def getDateStamp(self, id):
741 return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
743 def generateUniqueId(self, entry):
745 Generate a stable identifier for the article. For the same
746 entry, this should result in the same identifier. If
747 possible, the identifier should remain the same even if the
750 # Prefer the entry's id, which is supposed to be globally
752 key = entry.get('id', None)
754 # Next, try the link to the content.
755 key = entry.get('link', None)
757 # Ok, the title and the date concatenated are likely to be
759 key = entry.get('title', None) + entry.get('date', None)
761 # Hmm, the article's content will at least guarantee no
762 # false negatives (i.e., missing articles)
763 key = entry.get('content', None)
765 # If all else fails, just use a random number.
766 key = str (random.random ())
769 def getIds(self, onlyUnread=False):
771 rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
773 rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
780 def getNextId(self, id, forward=True):
786 index = ids.index(id)
787 return ids[(index + delta) % len(ids)]
789 def getPreviousId(self, id):
790 return self.getNextId(id, forward=False)
792 def getNumberOfUnreadItems(self):
793 return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
795 def getNumberOfEntries(self):
796 return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
798 def getArticle(self, entry):
799 #self.setEntryRead(id)
800 #entry = self.entries[id]
801 title = entry['title']
802 #content = entry.get('content', entry.get('summary_detail', {}))
803 content = entry["content"]
806 author = entry['author']
807 date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
809 #text = '''<div style="color: black; background-color: white;">'''
810 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
811 text += "<html><head><title>" + title + "</title>"
812 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
813 #text += '<style> body {-webkit-user-select: none;} </style>'
814 text += '</head><body bgcolor=\"#ffffff\"><div><a href=\"' + link + '\">' + title + "</a>"
816 text += "<BR /><small><i>Author: " + author + "</i></small>"
817 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
818 text += "<BR /><BR />"
820 text += "</body></html>"
823 def getContent(self, id):
824 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
826 file = open(self.entries[id]["contentLink"])
827 content = file.read()
830 content = "Content unavailable"
833 def extractDate(self, entry):
834 if entry.has_key("updated_parsed"):
835 return timegm(entry["updated_parsed"])
836 elif entry.has_key("published_parsed"):
837 return timegm(entry["published_parsed"])
841 def extractContent(self, entry):
843 if entry.has_key('summary'):
844 content = entry.get('summary', '')
845 if entry.has_key('content'):
846 if len(entry.content[0].value) > len(content):
847 content = entry.content[0].value
849 content = entry.get('description', '')
852 def removeEntry(self, id):
853 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
857 except OSError, exception:
858 logger.error("Deleting %s: %s" % (contentLink, str (exception)))
859 self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
860 self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
865 wc()[self.key][id].files_deleted (
866 woodchuck.DeletionResponse.Deleted)
867 del wc()[self.key][id]
871 mainthread.execute (doit, async=True)
873 class ArchivedArticles(Feed):
874 def addArchivedArticle(self, title, link, date, configdir):
875 id = self.generateUniqueId({"date":date, "title":title})
876 values = (id, title, link, date, 0, link, 0)
877 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
880 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
882 rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
884 currentTime = time.time()
887 f = urllib2.urlopen(link)
888 #entry["content"] = f.read()
891 soup = BeautifulSoup(html)
895 filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
897 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
899 contentLink = configdir+self.key+".d/"+id+".html"
900 file = open(contentLink, "w")
901 file.write(soup.prettify())
904 self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
906 return (currentTime, None, None)
908 def purgeReadArticles(self):
909 rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
912 self.removeArticle(row[0])
914 def removeArticle(self, id):
915 rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
918 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
925 class Listing(BaseObject):
927 cached_columns = (('feeds', 'updateTime'),
930 ('categories', 'title'))
935 except AttributeError:
936 db = sqlite3.connect("%s/feeds.db" % self.configdir, timeout=120)
939 db = property(_getdb)
941 # Lists all the feeds in a dictionary, and expose the data
942 def __init__(self, config, configdir):
944 self.configdir = configdir
946 self.tls = threading.local ()
949 table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
951 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
952 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
953 self.addCategory("Default Category")
954 if isfile(self.configdir+"feeds.pickle"):
955 self.importOldFormatFeeds()
957 self.addFeed("Maemo News", "http://maemo.org/news/items.xml")
959 from string import find, upper
960 if find(upper(table[0]), "WIDGET")<0:
961 self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
962 self.db.execute("UPDATE feeds SET widget=1;")
964 if find(upper(table[0]), "CATEGORY")<0:
965 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
966 self.addCategory("Default Category")
967 self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
968 self.db.execute("UPDATE feeds SET category=1;")
973 # Check that Woodchuck's state is up to date with respect our
976 updater = os.path.basename(sys.argv[0]) == 'update_feeds.py'
977 wc_init (self, True if updater else False)
978 if wc().available() and updater:
979 # The list of known streams.
980 streams = wc().streams_list ()
981 stream_ids = [s.identifier for s in streams]
983 # Register any unknown streams. Remove known streams from
985 for key in self.getListOfFeeds():
986 title = self.getFeedTitle(key)
987 # XXX: We should also check whether the list of
988 # articles/objects in each feed/stream is up to date.
989 if key not in stream_ids:
991 "Registering previously unknown channel: %s (%s)"
993 # Use a default refresh interval of 6 hours.
994 wc().stream_register (key, title, 6 * 60 * 60)
996 # Make sure the human readable name is up to date.
997 if wc()[key].human_readable_name != title:
998 wc()[key].human_readable_name = title
999 stream_ids.remove (key)
1002 # Unregister any streams that are no longer subscribed to.
1003 for id in stream_ids:
1004 logger.debug("Unregistering %s" % (id,))
1005 w.stream_unregister (id)
1007 logger.exception("Registering streams with Woodchuck")
1009 def importOldFormatFeeds(self):
1010 """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
1012 listing = rss.Listing(self.configdir)
1014 for id in listing.getListOfFeeds():
1017 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
1018 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
1021 feed = listing.getFeed(id)
1022 new_feed = self.getFeed(id)
1024 items = feed.getIds()[:]
1027 if feed.isEntryRead(item):
1031 date = timegm(feed.getDateTuple(item))
1032 title = feed.getTitle(item)
1033 newId = new_feed.generateUniqueId({"date":date, "title":title})
1034 values = (newId, title , feed.getContentLink(item), date, tuple(time.time()), feed.getExternalLink(item), read_status)
1035 new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
1036 new_feed.db.commit()
1038 images = feed.getImages(item)
1039 for image in images:
1040 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
1041 new_feed.db.commit()
1044 self.updateUnread(id)
1046 logger.error("importOldFormatFeeds: %s"
1047 % (traceback.format_exc(),))
1048 remove(self.configdir+"feeds.pickle")
1051 def addArchivedArticle(self, key, index):
1052 feed = self.getFeed(key)
1053 title = feed.getTitle(index)
1054 link = feed.getExternalLink(index)
1055 date = feed.getDate(index)
1056 count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
1058 self.addFeed("Archived Articles", "", id="ArchivedArticles")
1060 archFeed = self.getFeed("ArchivedArticles")
1061 archFeed.addArchivedArticle(title, link, date, self.configdir)
1062 self.updateUnread("ArchivedArticles")
1064 def updateFeed(self, key, expiryTime=None, proxy=None, imageCache=None,
1066 if expiryTime is None:
1067 expiryTime = self.config.getExpiry()
1069 # Default to 24 hours
1072 (use_proxy, proxy) = self.config.getProxy()
1075 if imageCache is None:
1076 imageCache = self.config.getImageCache()
1078 feed = self.getFeed(key)
1079 (url, etag, modified) = self.db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
1081 modified = time.struct_time(eval(modified))
1085 self.configdir, url, etag, modified, expiryTime, proxy, imageCache,
1086 priority, postFeedUpdateFunc=self._queuePostFeedUpdate)
1088 def _queuePostFeedUpdate(self, *args, **kwargs):
1089 mainthread.execute (self._postFeedUpdate, async=True, *args, **kwargs)
1091 def _postFeedUpdate(self, key, updateTime, etag, modified, title):
1095 modified=str(tuple(modified))
1097 self.db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, modified, key) )
1099 self.db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, modified, key) )
1101 if title is not None:
1102 self.db.execute("UPDATE feeds SET title=(case WHEN title=='' THEN ? ELSE title END) where id=?;",
1105 self.cache_invalidate('feeds')
1106 self.updateUnread(key)
1108 update_server_object().ArticleCountUpdated()
1110 stats = JobManager().stats()
1111 global jobs_at_start
1112 completed = stats['jobs-completed'] - jobs_at_start
1113 in_progress = stats['jobs-in-progress']
1114 queued = stats['jobs-queued']
1117 percent = (100 * ((completed + in_progress / 2.))
1118 / (completed + in_progress + queued))
1119 except ZeroDivisionError:
1122 update_server_object().UpdateProgress(
1123 percent, completed, in_progress, queued, 0, 0, 0, key)
1125 if in_progress == 0 and queued == 0:
1126 jobs_at_start = stats['jobs-completed']
1128 def getFeed(self, key):
1129 if key == "ArchivedArticles":
1130 return ArchivedArticles(self.configdir, key)
1131 return Feed(self.configdir, key)
1133 def editFeed(self, key, title, url, category=None):
1135 self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
1137 self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
1139 self.cache_invalidate('feeds')
1141 if wc().available():
1143 wc()[key].human_readable_name = title
1145 logger.debug("Feed %s (%s) unknown." % (key, title))
1147 def getFeedUpdateTime(self, key):
1148 update_time = self.lookup('feeds', 'updateTime', key)
1153 delta = time.time() - update_time
1155 delta_hours = delta / (60. * 60.)
1156 if delta_hours < .1:
1157 return "A few minutes ago"
1158 if delta_hours < .75:
1159 return "Less than an hour ago"
1160 if delta_hours < 1.5:
1161 return "About an hour ago"
1162 if delta_hours < 18:
1163 return "About %d hours ago" % (int(delta_hours + 0.5),)
1165 delta_days = delta_hours / 24.
1166 if delta_days < 1.5:
1167 return "About a day ago"
1169 return "%d days ago" % (int(delta_days + 0.5),)
1171 delta_weeks = delta_days / 7.
1172 if delta_weeks <= 8:
1173 return "%d weeks ago" % int(delta_weeks + 0.5)
1175 delta_months = delta_days / 30.
1176 if delta_months <= 30:
1177 return "%d months ago" % int(delta_months + 0.5)
1179 return time.strftime("%x", time.gmtime(update_time))
1181 def getFeedNumberOfUnreadItems(self, key):
1182 return self.lookup('feeds', 'unread', key)
1184 def getFeedTitle(self, key):
1185 title = self.lookup('feeds', 'title', key)
1189 return self.getFeedUrl(key)
1191 def getFeedUrl(self, key):
1192 return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1194 def getFeedCategory(self, key):
1195 return self.db.execute("SELECT category FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1197 def getListOfFeeds(self, category=None):
1199 rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
1201 rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
1208 def getListOfCategories(self):
1209 return list(row[0] for row in self.db.execute(
1210 "SELECT id FROM categories ORDER BY rank;"))
1212 def getCategoryTitle(self, id):
1213 return self.lookup('categories', 'title', id)
1215 def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
1216 if order == "Most unread":
1217 tmp = "ORDER BY unread DESC"
1218 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
1219 elif order == "Least unread":
1220 tmp = "ORDER BY unread"
1221 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
1222 elif order == "Most recent":
1223 tmp = "ORDER BY updateTime DESC"
1224 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
1225 elif order == "Least recent":
1226 tmp = "ORDER BY updateTime"
1227 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
1228 else: # order == "Manual" or invalid value...
1229 tmp = "ORDER BY rank"
1230 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
1232 sql = "SELECT id FROM feeds WHERE unread>0 AND category=%s " %category + tmp
1234 sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
1235 rows = self.db.execute(sql)
1242 def getFavicon(self, key):
1243 filename = "%s%s.d/favicon.ico" % (self.configdir, key)
1244 if isfile(filename):
1249 def updateUnread(self, key):
1250 feed = self.getFeed(key)
1251 self.db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
1253 self.cache_invalidate('feeds')
1255 def addFeed(self, title, url, id=None, category=1):
1258 count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
1260 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1261 if max_rank == None:
1263 values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
1264 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
1266 # Ask for the feed object, it will create the necessary tables
1269 if wc().available():
1270 # Register the stream with Woodchuck. Update approximately
1272 wc().stream_register(stream_identifier=id,
1273 human_readable_name=title,
1280 def addCategory(self, title):
1281 rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
1284 id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
1287 self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
1290 def removeFeed(self, key):
1291 if wc().available ():
1295 logger.debug("Removing unregistered feed %s failed" % (key,))
1297 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
1298 self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
1299 self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
1302 if isdir(self.configdir+key+".d/"):
1303 rmtree(self.configdir+key+".d/")
1305 def removeCategory(self, key):
1306 if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
1307 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
1308 self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
1309 self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
1310 self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
1313 #def saveConfig(self):
1314 # self.listOfFeeds["feedingit-order"] = self.sortedKeys
1315 # file = open(self.configdir+"feeds.pickle", "w")
1316 # pickle.dump(self.listOfFeeds, file)
1319 def moveUp(self, key):
1320 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1322 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
1323 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
1326 def moveCategoryUp(self, key):
1327 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1329 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
1330 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
1333 def moveDown(self, key):
1334 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1335 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1337 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
1338 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
1341 def moveCategoryDown(self, key):
1342 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1343 max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
1345 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
1346 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )