1 #!/usr/bin/env python2.5
4 # Copyright (c) 2007-2008 INdT.
5 # Copyright (c) 2011 Neal H. Walfield
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Lesser General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU Lesser General Public License for more details.
16 # You should have received a copy of the GNU Lesser General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 # ============================================================================
22 # Author : Yves Marcoz
24 # Description : Simple RSS Reader
25 # ============================================================================
27 from __future__ import with_statement
30 from os.path import isfile, isdir
31 from shutil import rmtree
32 from os import mkdir, remove, utime
38 from BeautifulSoup import BeautifulSoup
39 from urlparse import urljoin
40 from calendar import timegm
43 from wc import wc, wc_init, woodchuck
46 from updatedbus import update_server_object
48 from jobmanager import JobManager
50 from httpprogresshandler import HTTPProgressHandler
54 logger = logging.getLogger(__name__)
57 if issubclass(string.__class__, unicode):
58 string = string.encode('utf8', 'replace')
60 return md5.new(string).hexdigest()
62 def download_callback(connection):
63 if JobManager().do_quit:
64 raise KeyboardInterrupt
66 def downloader(progress_handler=None, proxy=None):
69 if progress_handler is not None:
70 openers.append(progress_handler)
72 openers.append(HTTPProgressHandler(download_callback))
77 return urllib2.build_opener(*openers)
79 def transfer_stats(sent, received, **kwargs):
81 This function takes two arguments: sent is the number of bytes
82 sent so far, received is the number of bytes received. The
83 function returns a continuation that you can call later.
85 The continuation takes the same two arguments. It returns a tuple
86 of the number of bytes sent, the number of bytes received and the
87 time since the original function was invoked.
89 start_time = time.time()
91 start_received = received
93 def e(sent, received, **kwargs):
94 return (sent - start_sent,
95 received - start_received,
96 time.time() - start_time)
100 # If not None, a subprocess.Popen object corresponding to a
101 # update_feeds.py process.
102 update_feed_process = None
104 update_feeds_iface = None
108 class BaseObject(object):
109 # Columns to cache. Classes that inherit from this and use the
110 # cache mechanism should set this to a list of tuples, each of
111 # which contains two entries: the table and the column. Note that
112 # both are case sensitive.
115 def cache_invalidate(self, table=None):
117 Invalidate the cache.
119 If table is not None, invalidate only the specified table.
120 Otherwise, drop the whole cache.
122 if not hasattr(self, 'cache'):
128 if table in self.cache:
129 del self.cache[table]
131 def lookup(self, table, column, id=None):
133 Look up a column or value. Uses a cache for columns in
134 cached_columns. Note: the column is returned unsorted.
136 if not hasattr(self, 'cache'):
139 # Cache data for at most 60 seconds.
142 cache = self.cache[table]
144 if time.time() - cache[None] > 60:
145 # logger.debug("%s: Cache too old: clearing" % (table,))
146 del self.cache[table]
152 or (table, column) not in self.cached_columns):
153 # The cache is empty or the caller wants a column that we
155 if (table, column) in self.cached_columns:
156 # logger.debug("%s: Rebuilding cache" % (table,))
160 self.cache[table] = cache = {}
162 for t, c in self.cached_columns:
174 where = "where id = '%s'" % id
178 results = self.db.execute(
179 "SELECT %s FROM %s %s" % (','.join(columns), table, where))
185 for index, value in enumerate(values):
186 cache[columns[index]][i] = value
195 results.append(values[0])
199 cache = self.cache[table]
203 value = cache[column][id]
204 # logger.debug("%s.%s:%s -> %s" % (table, column, id, value))
207 return cache[column].values()
209 # logger.debug("%s.%s:%s -> Not found" % (table, column, id))
212 class Feed(BaseObject):
214 cached_columns = (('feed', 'read'),
217 serial_execution_lock = threading.Lock()
222 except AttributeError:
223 db = sqlite3.connect("%s/%s.db" % (self.dir, self.key), timeout=120)
226 db = property(_getdb)
228 def __init__(self, configdir, key):
230 self.configdir = configdir
231 self.dir = "%s/%s.d" %(self.configdir, self.key)
232 self.tls = threading.local()
234 if not isdir(self.dir):
236 filename = "%s/%s.db" % (self.dir, self.key)
237 if not isfile(filename):
238 self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, contentHash text, date float, updated float, link text, read int);")
239 self.db.execute("CREATE TABLE images (id text, imagePath text);")
243 self.db.execute("ALTER TABLE feed ADD COLUMN contentHash text")
245 except sqlite3.OperationalError, e:
246 if 'duplicate column name' in str(e):
249 logger.exception("Add column contentHash to %s", filename)
251 def addImage(self, configdir, key, baseurl, url, proxy=None, opener=None):
252 filename = configdir+key+".d/"+getId(url)
253 if not isfile(filename):
256 opener = downloader(proxy=proxy)
258 abs_url = urljoin(baseurl,url)
259 f = opener.open(abs_url)
261 with open(filename, "w") as outf:
266 except (urllib2.HTTPError, urllib2.URLError, IOError), exception:
267 logger.info("Could not download image %s: %s"
268 % (abs_url, str (exception)))
271 exception = sys.exc_info()[0]
273 logger.info("Downloading image %s: %s" %
274 (abs_url, traceback.format_exc()))
282 #open(filename,"a").close() # "Touch" the file
283 file = open(filename,"a")
284 utime(filename, None)
288 def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
289 if (os.path.basename(sys.argv[0]) == 'update_feeds.py'):
292 self._updateFeed(configdir, url, etag, modified, expiryTime, proxy, imageCache, postFeedUpdateFunc, *postFeedUpdateFuncArgs)
294 JobManager().execute(doit(), self.key, priority=priority)
296 def send_update_request():
297 global update_feeds_iface
298 if update_feeds_iface is None:
299 bus=dbus.SessionBus()
300 remote_object = bus.get_object(
301 "org.marcoz.feedingit", # Connection name
302 "/org/marcoz/feedingit/update" # Object's path
304 update_feeds_iface = dbus.Interface(
305 remote_object, 'org.marcoz.feedingit')
308 update_feeds_iface.Update(self.key)
310 logger.error("Invoking org.marcoz.feedingit.Update: %s"
312 update_feeds_iface = None
316 if send_update_request():
317 # Success! It seems we were able to start the update
318 # daemon via dbus (or, it was already running).
321 global update_feed_process
322 if (update_feed_process is None
323 or update_feed_process.poll() is not None):
324 # The update_feeds process is not running. Start it.
325 update_feeds = os.path.join(os.path.dirname(__file__),
327 argv = ['/usr/bin/env', 'python', update_feeds, '--daemon' ]
328 logger.debug("Starting update_feeds: running %s"
330 update_feed_process = subprocess.Popen(argv)
331 # Make sure the dbus calls go to the right process:
333 update_feeds_iface = None
336 if send_update_request():
340 def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
341 logger.debug("Updating %s" % url)
344 have_serial_execution_lock = False
346 update_start = time.time ()
348 progress_handler = HTTPProgressHandler(download_callback)
350 openers = [progress_handler]
352 openers.append (proxy)
353 kwargs = {'handlers':openers}
355 feed_transfer_stats = transfer_stats(0, 0)
357 tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs)
358 download_duration = time.time () - update_start
360 opener = downloader(progress_handler, proxy)
362 if JobManager().do_quit:
363 raise KeyboardInterrupt
365 process_start = time.time()
367 # Expiry time is in hours
368 expiry = float(expiryTime) * 3600.
378 wc().stream_register (self.key, "", 6 * 60 * 60)
379 except woodchuck.ObjectExistsError:
382 wc()[self.key].updated (
383 indicator=(woodchuck.Indicator.ApplicationVisual
384 |woodchuck.Indicator.StreamWide),
385 transferred_down=progress_handler.stats['received'],
386 transferred_up=progress_handler.stats['sent'],
387 transfer_time=update_start,
388 transfer_duration=download_duration,
389 new_objects=new_objects,
390 updated_objects=updated_objects,
391 objects_inline=new_objects + updated_objects)
394 "Failed to register update of %s with woodchuck!"
397 http_status = tmp.get ('status', 200)
399 # Check if the parse was succesful. If the http status code
400 # is 304, then the download was successful, but there is
401 # nothing new. Indeed, no content is returned. This make a
402 # 304 look like an error because there are no entries and the
403 # parse fails. But really, everything went great! Check for
405 if http_status == 304:
406 logger.debug("%s: No changes to feed." % (self.key,))
408 mainthread.execute(wc_success, async=True)
410 elif len(tmp["entries"])==0 and not tmp.get('version', None):
411 # An error occured fetching or parsing the feed. (Version
412 # will be either None if e.g. the connection timed our or
413 # '' if the data is not a proper feed)
415 "Error fetching %s: version is: %s: error: %s"
416 % (url, str (tmp.get('version', 'unset')),
417 str (tmp.get ('bozo_exception', 'Unknown error'))))
419 def register_stream_update_failed(http_status):
421 logger.debug("%s: stream update failed!" % self.key)
424 # It's not easy to get the feed's title from here.
425 # At the latest, the next time the application is
426 # started, we'll fix up the human readable name.
427 wc().stream_register (self.key, "", 6 * 60 * 60)
428 except woodchuck.ObjectExistsError:
430 ec = woodchuck.TransferStatus.TransientOther
431 if 300 <= http_status and http_status < 400:
432 ec = woodchuck.TransferStatus.TransientNetwork
433 if 400 <= http_status and http_status < 500:
434 ec = woodchuck.TransferStatus.FailureGone
435 if 500 <= http_status and http_status < 600:
436 ec = woodchuck.TransferStatus.TransientNetwork
437 wc()[self.key].update_failed(ec)
441 register_stream_update_failed(
442 http_status=http_status),
445 currentTime = time.time()
446 # The etag and modified value should only be updated if the content was not null
452 modified = tmp["modified"]
456 abs_url = urljoin(tmp["feed"]["link"],"/favicon.ico")
457 f = opener.open(abs_url)
460 outf = open(self.dir+"/favicon.ico", "w")
464 except (urllib2.HTTPError, urllib2.URLError), exception:
465 logger.debug("Could not download favicon %s: %s"
466 % (abs_url, str (exception)))
468 self.serial_execution_lock.acquire ()
469 have_serial_execution_lock = True
471 #reversedEntries = self.getEntries()
472 #reversedEntries.reverse()
474 tmp["entries"].reverse()
475 for entry in tmp["entries"]:
476 # Yield so as to make the main thread a bit more
480 entry_transfer_stats = transfer_stats(
481 *feed_transfer_stats(**progress_handler.stats)[0:2])
483 if JobManager().do_quit:
484 raise KeyboardInterrupt
488 date = self.extractDate(entry)
492 entry["title"] = "No Title"
500 entry["author"] = None
501 if(not(entry.has_key("id"))):
503 content = self.extractContent(entry)
504 contentHash = getId(content)
505 object_size = len (content)
506 tmpEntry = {"title":entry["title"], "content":content,
507 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
508 id = self.generateUniqueId(tmpEntry)
510 current_version = self.db.execute(
511 'select date, ROWID, contentHash from feed where id=?',
513 if (current_version is not None
514 # To detect updates, don't compare by date:
515 # compare by content.
517 # - If an article update is just a date change
518 # and the content remains the same, we don't
519 # want to register an update.
521 # - If an article's content changes but not the
522 # date, we want to recognize an update.
523 and current_version[2] == contentHash):
524 logger.debug("ALREADY DOWNLOADED %s (%s)"
525 % (entry["title"], entry["link"]))
526 ## This article is already present in the feed listing. Update the "updated" time, so it doesn't expire
527 self.db.execute("UPDATE feed SET updated=? WHERE id=?;",(currentTime,id))
529 logger.debug("Updating already downloaded files for %s" %(id))
530 filename = configdir+self.key+".d/"+id+".html"
531 file = open(filename,"a")
532 utime(filename, None)
534 images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
536 file = open(image[0],"a")
537 utime(image[0], None)
540 logger.debug("Error in refreshing images for %s" % (id))
544 if current_version is not None:
545 # The version was updated. Mark it as unread.
546 logger.debug("UPDATED: %s (%s)"
547 % (entry["title"], entry["link"]))
550 logger.debug("NEW: %s (%s)"
551 % (entry["title"], entry["link"]))
554 #articleTime = time.mktime(self.entries[id]["dateTuple"])
555 soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
557 baseurl = tmpEntry["link"]
558 if imageCache and len(images) > 0:
559 self.serial_execution_lock.release ()
560 have_serial_execution_lock = False
562 if not img.has_key('src'):
565 filename = self.addImage(
566 configdir, self.key, baseurl, img['src'],
569 img['src']="file://%s" %filename
570 count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
572 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
576 object_size += os.path.getsize (filename)
577 except os.error, exception:
578 logger.error ("Error getting size of %s: %s"
579 % (filename, exception))
580 self.serial_execution_lock.acquire ()
581 have_serial_execution_lock = True
583 tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
584 file = open(tmpEntry["contentLink"], "w")
585 file.write(soup.prettify())
589 'title': tmpEntry["title"],
590 'contentLink': tmpEntry["contentLink"],
591 'contentHash': contentHash,
592 'date': tmpEntry["date"],
593 'updated': currentTime,
594 'link': tmpEntry["link"],
597 if current_version is not None:
598 # This is an update. Ensure that the existing
600 values['ROWID'] = current_version[1]
602 cols, values = zip(*values.items())
604 "INSERT OR REPLACE INTO feed (%s) VALUES (%s);"
605 % (','.join(cols), ','.join(('?',) * len(values))),
609 # Register the object with Woodchuck and mark it as
611 def register_object_transferred(
612 id, title, publication_time,
613 sent, received, object_size):
615 logger.debug("Registering transfer of object %s"
618 obj = wc()[self.key].object_register(
619 object_identifier=id,
620 human_readable_name=title)
621 except woodchuck.ObjectExistsError:
622 obj = wc()[self.key][id]
624 obj.publication_time = publication_time
627 woodchuck.Indicator.ApplicationVisual
628 |woodchuck.Indicator.StreamWide),
629 transferred_down=received,
631 object_size=object_size)
634 # If the entry does not contain a publication
635 # time, the attribute won't exist.
636 pubtime = entry.get('date_parsed', None)
638 publication_time = time.mktime (pubtime)
640 publication_time = None
643 = entry_transfer_stats(**progress_handler.stats)
644 # sent and received are for objects (in
645 # particular, images) associated with this
646 # item. We also want to attribute the data
647 # transferred for the item's content. This is
648 # a good first approximation.
649 received += len(content)
652 register_object_transferred(
654 title=tmpEntry["title"],
655 publication_time=publication_time,
656 sent=sent, received=received,
657 object_size=object_size),
662 = feed_transfer_stats(**progress_handler.stats)
664 "%s: Update successful: transferred: %d/%d; objects: %d)"
665 % (url, sent, received, len (tmp.entries)))
666 mainthread.execute (wc_success, async=True)
669 rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
671 self.removeEntry(row[0])
673 from glob import glob
675 for file in glob(configdir+self.key+".d/*"):
679 # put the two dates into matching format
681 lastmodDate = stats[8]
683 expDate = time.time()-expiry*3
684 # check if image-last-modified-date is outdated
686 if expDate > lastmodDate:
690 #print 'Removing', file
692 # XXX: Tell woodchuck.
693 remove(file) # commented out for testing
695 except OSError, exception:
697 logger.error('Could not remove %s: %s'
698 % (file, str (exception)))
699 logger.debug("updated %s: %fs in download, %fs in processing"
700 % (self.key, download_duration,
701 time.time () - process_start))
703 logger.error("Updating %s: %s" % (self.key, traceback.format_exc()))
707 if have_serial_execution_lock:
708 self.serial_execution_lock.release ()
712 rows = self.db.execute("SELECT MAX(date) FROM feed;")
716 logger.error("Fetching update time: %s: %s"
717 % (str(e), traceback.format_exc()))
724 title = tmp.feed.title
725 except (AttributeError, UnboundLocalError), exception:
727 if postFeedUpdateFunc is not None:
728 postFeedUpdateFunc (self.key, updateTime, etag, modified,
729 title, *postFeedUpdateFuncArgs)
731 self.cache_invalidate()
733 def setEntryRead(self, id):
734 self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
739 wc()[self.key][id].used()
743 mainthread.execute(doit, async=True)
744 self.cache_invalidate('feed')
746 def setEntryUnread(self, id):
747 self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
749 self.cache_invalidate('feed')
751 def markAllAsRead(self):
752 self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
754 self.cache_invalidate('feed')
756 def isEntryRead(self, id):
757 return self.lookup('feed', 'read', id) == 1
759 def getTitle(self, id):
760 return self.lookup('feed', 'title', id)
762 def getContentLink(self, id):
763 return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
765 def getContentHash(self, id):
766 return self.db.execute("SELECT contentHash FROM feed WHERE id=?;", (id,) ).fetchone()[0]
768 def getExternalLink(self, id):
769 return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
771 def getDate(self, id):
772 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
773 return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
775 def getDateTuple(self, id):
776 dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
777 return time.localtime(dateStamp)
779 def getDateStamp(self, id):
780 return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
782 def generateUniqueId(self, entry):
784 Generate a stable identifier for the article. For the same
785 entry, this should result in the same identifier. If
786 possible, the identifier should remain the same even if the
789 # Prefer the entry's id, which is supposed to be globally
791 key = entry.get('id', None)
793 # Next, try the link to the content.
794 key = entry.get('link', None)
796 # Ok, the title and the date concatenated are likely to be
798 key = entry.get('title', None) + entry.get('date', None)
800 # Hmm, the article's content will at least guarantee no
801 # false negatives (i.e., missing articles)
802 key = entry.get('content', None)
804 # If all else fails, just use a random number.
805 key = str (random.random ())
808 def getIds(self, onlyUnread=False):
810 rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
812 rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
819 def getNextId(self, id, forward=True, onlyUnread=False):
824 ids = self.getIds(onlyUnread=onlyUnread)
825 index = ids.index(id)
826 return ids[(index + delta) % len(ids)]
828 def getPreviousId(self, id, onlyUnread=False):
829 return self.getNextId(id, forward=False, onlyUnread=onlyUnread)
831 def getNumberOfUnreadItems(self):
832 return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
834 def getNumberOfEntries(self):
835 return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
837 def getArticle(self, entry):
838 #self.setEntryRead(id)
839 #entry = self.entries[id]
840 title = entry['title']
841 #content = entry.get('content', entry.get('summary_detail', {}))
842 content = entry["content"]
845 author = entry['author']
846 date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
848 #text = '''<div style="color: black; background-color: white;">'''
849 text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
850 text += "<html><head><title>" + title + "</title>"
851 text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
852 #text += '<style> body {-webkit-user-select: none;} </style>'
853 text += '</head><body bgcolor=\"#ffffff\"><div><a href=\"' + link + '\">' + title + "</a>"
855 text += "<BR /><small><i>Author: " + author + "</i></small>"
856 text += "<BR /><small><i>Date: " + date + "</i></small></div>"
857 text += "<BR /><BR />"
859 text += "</body></html>"
862 def getContent(self, id):
864 Return the content of the article with the specified ID. If
865 the content is not available, returns None.
867 contentLink = self.getContentLink(id)
869 with open(contentLink, 'r') as file:
870 content = file.read()
872 logger.exception("Failed get content for %s: reading %s failed",
877 def extractDate(self, entry):
878 if entry.has_key("updated_parsed"):
879 return timegm(entry["updated_parsed"])
880 elif entry.has_key("published_parsed"):
881 return timegm(entry["published_parsed"])
885 def extractContent(self, entry):
887 if entry.has_key('summary'):
888 content = entry.get('summary', '')
889 if entry.has_key('content'):
890 if len(entry.content[0].value) > len(content):
891 content = entry.content[0].value
893 content = entry.get('description', '')
896 def removeEntry(self, id):
897 contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
901 except OSError, exception:
902 logger.error("Deleting %s: %s" % (contentLink, str (exception)))
903 self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
904 self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
909 wc()[self.key][id].files_deleted (
910 woodchuck.DeletionResponse.Deleted)
911 del wc()[self.key][id]
915 mainthread.execute (doit, async=True)
917 class ArchivedArticles(Feed):
918 def addArchivedArticle(self, title, link, date, configdir):
919 id = self.generateUniqueId({"date":date, "title":title})
920 values = (id, title, link, date, 0, link, 0)
921 self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
924 # Feed.UpdateFeed calls this function.
925 def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
927 rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
930 currentTime = time.time()
933 f = urllib2.urlopen(link)
934 #entry["content"] = f.read()
937 soup = BeautifulSoup(html)
941 filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
943 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
945 contentLink = configdir+self.key+".d/"+id+".html"
946 file = open(contentLink, "w")
947 file.write(soup.prettify())
950 self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
953 logger.error("Error updating Archived Article: %s %s"
954 % (link,traceback.format_exc(),))
956 if postFeedUpdateFunc is not None:
957 postFeedUpdateFunc (self.key, currentTime, None, None, None,
958 *postFeedUpdateFuncArgs)
960 def purgeReadArticles(self):
961 rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
964 self.removeArticle(row[0])
966 def removeArticle(self, id):
967 rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
970 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
977 class Listing(BaseObject):
979 cached_columns = (('feeds', 'updateTime'),
982 ('categories', 'title'))
987 except AttributeError:
988 db = sqlite3.connect("%s/feeds.db" % self.configdir, timeout=120)
991 db = property(_getdb)
993 # Lists all the feeds in a dictionary, and expose the data
994 def __init__(self, config, configdir):
996 self.configdir = configdir
998 self.tls = threading.local ()
1001 table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
1003 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
1004 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
1005 self.addCategory("Default Category")
1006 if isfile(self.configdir+"feeds.pickle"):
1007 self.importOldFormatFeeds()
1009 self.addFeed("Maemo News", "http://maemo.org/news/items.xml")
1011 from string import find, upper
1012 if find(upper(table[0]), "WIDGET")<0:
1013 self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
1014 self.db.execute("UPDATE feeds SET widget=1;")
1016 if find(upper(table[0]), "CATEGORY")<0:
1017 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
1018 self.addCategory("Default Category")
1019 self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
1020 self.db.execute("UPDATE feeds SET category=1;")
1025 # Check that Woodchuck's state is up to date with respect our
1028 updater = os.path.basename(sys.argv[0]) == 'update_feeds.py'
1029 wc_init(config, self, True if updater else False)
1030 if wc().available() and updater:
1031 # The list of known streams.
1032 streams = wc().streams_list ()
1033 stream_ids = [s.identifier for s in streams]
1035 # Register any unknown streams. Remove known streams from
1037 for key in self.getListOfFeeds():
1038 title = self.getFeedTitle(key)
1039 # XXX: We should also check whether the list of
1040 # articles/objects in each feed/stream is up to date.
1041 if key not in stream_ids:
1043 "Registering previously unknown channel: %s (%s)"
1045 wc().stream_register(
1047 self.config.getUpdateInterval() * 60 * 60)
1049 # Make sure the human readable name is up to date.
1050 if wc()[key].human_readable_name != title:
1051 wc()[key].human_readable_name = title
1052 stream_ids.remove (key)
1053 wc()[key].freshness \
1054 = self.config.getUpdateInterval() * 60 * 60
1057 # Unregister any streams that are no longer subscribed to.
1058 for id in stream_ids:
1059 logger.debug("Unregistering %s" % (id,))
1060 wc().stream_unregister (id)
1062 logger.exception("Registering streams with Woodchuck")
1064 def importOldFormatFeeds(self):
1065 """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
1067 listing = rss.Listing(self.configdir)
1069 for id in listing.getListOfFeeds():
1072 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
1073 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
1076 feed = listing.getFeed(id)
1077 new_feed = self.getFeed(id)
1079 items = feed.getIds()[:]
1082 if feed.isEntryRead(item):
1086 date = timegm(feed.getDateTuple(item))
1087 title = feed.getTitle(item)
1088 newId = new_feed.generateUniqueId({"date":date, "title":title})
1089 values = (newId, title , feed.getContentLink(item), date, tuple(time.time()), feed.getExternalLink(item), read_status)
1090 new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
1091 new_feed.db.commit()
1093 images = feed.getImages(item)
1094 for image in images:
1095 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
1096 new_feed.db.commit()
1099 self.updateUnread(id)
1101 logger.error("importOldFormatFeeds: %s"
1102 % (traceback.format_exc(),))
1103 remove(self.configdir+"feeds.pickle")
1106 def addArchivedArticle(self, key, index):
1107 feed = self.getFeed(key)
1108 title = feed.getTitle(index)
1109 link = feed.getExternalLink(index)
1110 date = feed.getDate(index)
1111 count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
1113 self.addFeed("Archived Articles", "", id="ArchivedArticles")
1115 archFeed = self.getFeed("ArchivedArticles")
1116 archFeed.addArchivedArticle(title, link, date, self.configdir)
1117 self.updateUnread("ArchivedArticles")
1119 def updateFeed(self, key, expiryTime=None, proxy=None, imageCache=None,
1121 if expiryTime is None:
1122 expiryTime = self.config.getExpiry()
1124 # Default to 24 hours
1127 (use_proxy, proxy) = self.config.getProxy()
1130 if imageCache is None:
1131 imageCache = self.config.getImageCache()
1133 feed = self.getFeed(key)
1134 (url, etag, modified) = self.db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
1136 modified = time.struct_time(eval(modified))
1140 self.configdir, url, etag, modified, expiryTime, proxy, imageCache,
1141 priority, postFeedUpdateFunc=self._queuePostFeedUpdate)
1143 def _queuePostFeedUpdate(self, *args, **kwargs):
1144 mainthread.execute (self._postFeedUpdate, async=True, *args, **kwargs)
1146 def _postFeedUpdate(self, key, updateTime, etag, modified, title):
1150 modified=str(tuple(modified))
1152 self.db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, modified, key) )
1154 self.db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, modified, key) )
1156 if title is not None:
1157 self.db.execute("UPDATE feeds SET title=(case WHEN title=='' THEN ? ELSE title END) where id=?;",
1160 self.cache_invalidate('feeds')
1161 self.updateUnread(key)
1163 ### Update the harmattan event feed if necessary
1164 from gconf import client_get_default
1165 enable_event_feed = client_get_default().get_bool('/apps/ControlPanel/FeedingIt/EnableFeed')
1166 if enable_event_feed and (self.getFeedNumberOfUnreadItems(key)>0):
1167 if not client_get_default().get_bool('/apps/ControlPanel/FeedingIt/EventFeed/Hide/'+key):
1168 from eventfeed import EventFeedSender, EventFeedItem
1169 sender = EventFeedSender('feedingit', 'FeedingIt RSS Reader')
1171 oldId = client_get_default().get_int('/apps/ControlPanel/FeedingIt/EventFeed/Ids/'+key)
1173 sender.remove_item(oldId)
1175 item = EventFeedItem('/usr/share/feedingit/qml/common/images/feedingit.png', self.getFeedTitle(key))
1176 item.set_body(str(self.getFeedNumberOfUnreadItems(key)) + ' unread items')
1177 item.set_action_data([key,])
1178 eventId = sender.add_item(item)
1179 client_get_default().set_int('/apps/ControlPanel/FeedingIt/EventFeed/Ids/'+key, eventId)
1181 update_server_object().ArticleCountUpdated()
1183 stats = JobManager().stats()
1184 global jobs_at_start
1185 completed = stats['jobs-completed'] - jobs_at_start
1186 in_progress = stats['jobs-in-progress']
1187 queued = stats['jobs-queued']
1190 percent = (100 * ((completed + in_progress / 2.))
1191 / (completed + in_progress + queued))
1192 except ZeroDivisionError:
1195 update_server_object().UpdateProgress(
1196 percent, completed, in_progress, queued, 0, 0, 0, key)
1198 if in_progress == 0 and queued == 0:
1199 jobs_at_start = stats['jobs-completed']
1201 def getFeed(self, key):
1202 if key == "ArchivedArticles":
1203 return ArchivedArticles(self.configdir, key)
1204 return Feed(self.configdir, key)
1206 def editFeed(self, key, title, url, category=None):
1208 self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
1210 self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
1212 self.cache_invalidate('feeds')
1214 if wc().available():
1216 wc()[key].human_readable_name = title
1218 logger.debug("Feed %s (%s) unknown." % (key, title))
1220 def getFeedUpdateTime(self, key):
1221 update_time = self.lookup('feeds', 'updateTime', key)
1226 delta = time.time() - update_time
1228 delta_hours = delta / (60. * 60.)
1229 if delta_hours < .1:
1230 return "A few minutes ago"
1231 if delta_hours < .75:
1232 return "Less than an hour ago"
1233 if delta_hours < 1.5:
1234 return "About an hour ago"
1235 if delta_hours < 18:
1236 return "About %d hours ago" % (int(delta_hours + 0.5),)
1238 delta_days = delta_hours / 24.
1239 if delta_days < 1.5:
1240 return "About a day ago"
1242 return "%d days ago" % (int(delta_days + 0.5),)
1244 delta_weeks = delta_days / 7.
1245 if delta_weeks <= 8:
1246 return "%d weeks ago" % int(delta_weeks + 0.5)
1248 delta_months = delta_days / 30.
1249 if delta_months <= 30:
1250 return "%d months ago" % int(delta_months + 0.5)
1252 return time.strftime("%x", time.gmtime(update_time))
1254 def getFeedNumberOfUnreadItems(self, key):
1255 return self.lookup('feeds', 'unread', key)
1257 def getFeedTitle(self, key):
1258 title = self.lookup('feeds', 'title', key)
1262 return self.getFeedUrl(key)
1264 def getFeedUrl(self, key):
1265 return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1267 def getFeedCategory(self, key):
1268 return self.db.execute("SELECT category FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1270 def getListOfFeeds(self, category=None):
1272 rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
1274 rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
1281 def getListOfCategories(self):
1282 return list(row[0] for row in self.db.execute(
1283 "SELECT id FROM categories ORDER BY rank;"))
1285 def getCategoryTitle(self, id):
1286 return self.lookup('categories', 'title', id)
1288 def getCategoryUnread(self, id):
1290 for key in self.getListOfFeeds(category=id):
1292 count = count + self.getFeedNumberOfUnreadItems(key)
1297 def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
1298 if order == "Most unread":
1299 tmp = "ORDER BY unread DESC"
1300 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
1301 elif order == "Least unread":
1302 tmp = "ORDER BY unread"
1303 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
1304 elif order == "Most recent":
1305 tmp = "ORDER BY updateTime DESC"
1306 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
1307 elif order == "Least recent":
1308 tmp = "ORDER BY updateTime"
1309 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
1310 else: # order == "Manual" or invalid value...
1311 tmp = "ORDER BY rank"
1312 #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
1314 sql = "SELECT id FROM feeds WHERE unread>0 AND category=%s " %category + tmp
1316 sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
1317 rows = self.db.execute(sql)
1324 def getFavicon(self, key):
1325 filename = "%s%s.d/favicon.ico" % (self.configdir, key)
1326 if isfile(filename):
1331 def updateUnread(self, key):
1332 feed = self.getFeed(key)
1333 self.db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
1335 self.cache_invalidate('feeds')
1337 def addFeed(self, title, url, id=None, category=1):
1340 count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
1342 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1343 if max_rank == None:
1345 values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
1346 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
1348 # Ask for the feed object, it will create the necessary tables
1351 if wc().available():
1352 # Register the stream with Woodchuck. Update approximately
1354 wc().stream_register(stream_identifier=id,
1355 human_readable_name=title,
1358 self.cache_invalidate('feeds')
1363 def addCategory(self, title):
1364 rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
1367 id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
1370 self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
1372 self.cache_invalidate('categories')
1374 def removeFeed(self, key):
1375 if wc().available():
1378 except KeyError, woodchuck.Error:
1379 logger.debug("Removing unregistered feed %s failed" % (key,))
1381 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
1382 self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
1383 self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
1386 if isdir(self.configdir+key+".d/"):
1387 rmtree(self.configdir+key+".d/")
1388 self.cache_invalidate('feeds')
1390 def removeCategory(self, key):
1391 if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
1392 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
1393 self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
1394 self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
1395 self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
1397 self.cache_invalidate('categories')
1399 #def saveConfig(self):
1400 # self.listOfFeeds["feedingit-order"] = self.sortedKeys
1401 # file = open(self.configdir+"feeds.pickle", "w")
1402 # pickle.dump(self.listOfFeeds, file)
1405 def moveUp(self, key):
1406 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1408 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
1409 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
1412 def moveCategoryUp(self, key):
1413 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1415 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
1416 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
1419 def moveDown(self, key):
1420 rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1421 max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1423 self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
1424 self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
1427 def moveCategoryDown(self, key):
1428 rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1429 max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
1431 self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
1432 self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )