When navigating to the next article, reuse the DisplayArticle widget.
[feedingit] / src / rss_sqlite.py
1 #!/usr/bin/env python2.5
2
3
4 # Copyright (c) 2007-2008 INdT.
5 # Copyright (c) 2011 Neal H. Walfield
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Lesser General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
10 #
11 #  This program is distributed in the hope that it will be useful,
12 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
13 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 #  GNU Lesser General Public License for more details.
15 #
16 #  You should have received a copy of the GNU Lesser General Public License
17 #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 #
19
20 # ============================================================================
21 # Name        : FeedingIt.py
22 # Author      : Yves Marcoz
23 # Version     : 0.5.4
24 # Description : Simple RSS Reader
25 # ============================================================================
26
27 import sqlite3
28 from os.path import isfile, isdir
29 from shutil import rmtree
30 from os import mkdir, remove, utime
31 import os
32 import md5
33 import feedparser
34 import time
35 import urllib2
36 from BeautifulSoup import BeautifulSoup
37 from urlparse import urljoin
38 from calendar import timegm
39 import threading
40 import traceback
41 from wc import wc, wc_init, woodchuck
42 import subprocess
43 import dbus
44 from updatedbus import update_server_object
45
46 from jobmanager import JobManager
47 import mainthread
48 from httpprogresshandler import HTTPProgressHandler
49 import random
50 import sys
51 import logging
52 logger = logging.getLogger(__name__)
53
54 def getId(string):
55     return md5.new(string).hexdigest()
56
57 def download_callback(connection):
58     if JobManager().do_quit:
59         raise KeyboardInterrupt
60
61 def downloader(progress_handler=None, proxy=None):
62     openers = []
63
64     if progress_handler:
65         openers.append (progress_handler)
66     else:
67         openers.append(HTTPProgressHandler(download_callback))
68
69     if proxy:
70         openers.append (proxy)
71
72     return urllib2.build_opener (*openers)
73
74 # If not None, a subprocess.Popen object corresponding to a
75 # update_feeds.py process.
76 update_feed_process = None
77
78 update_feeds_iface = None
79
80 jobs_at_start = 0
81
82 class Feed:
83     serial_execution_lock = threading.Lock()
84
85     def _getdb(self):
86         try:
87             db = self.tls.db
88         except AttributeError:
89             db = sqlite3.connect("%s/%s.db" % (self.dir, self.key), timeout=120)
90             self.tls.db = db
91         return db
92     db = property(_getdb)
93
94     def __init__(self, configdir, key):
95         self.key = key
96         self.configdir = configdir
97         self.dir = "%s/%s.d" %(self.configdir, self.key)
98         self.tls = threading.local ()
99
100         if not isdir(self.dir):
101             mkdir(self.dir)
102         if not isfile("%s/%s.db" %(self.dir, self.key)):
103             self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
104             self.db.execute("CREATE TABLE images (id text, imagePath text);")
105             self.db.commit()
106
107     def addImage(self, configdir, key, baseurl, url, proxy=None, opener=None):
108         filename = configdir+key+".d/"+getId(url)
109         if not isfile(filename):
110             try:
111                 if not opener:
112                     opener = downloader(proxy=proxy)
113
114                 abs_url = urljoin(baseurl,url)
115                 f = opener.open(abs_url)
116                 outf = open(filename, "w")
117                 outf.write(f.read())
118                 f.close()
119                 outf.close()
120             except (urllib2.HTTPError, urllib2.URLError, IOError), exception:
121                 logger.info("Could not download image %s: %s"
122                             % (abs_url, str (exception)))
123                 return None
124             except:
125                 exception = sys.exc_info()[0]
126
127                 logger.info("Downloading image %s: %s" %
128                             (abs_url, traceback.format_exc()))
129                 try:
130                     remove(filename)
131                 except OSError:
132                     pass
133
134                 raise exception
135         else:
136             #open(filename,"a").close()  # "Touch" the file
137             file = open(filename,"a")
138             utime(filename, None)
139             file.close()
140         return filename
141
142     def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
143         if (os.path.basename(sys.argv[0]) == 'update_feeds.py'):
144             def doit():
145                 def it():
146                     self._updateFeed(configdir, url, etag, modified, expiryTime, proxy, imageCache, postFeedUpdateFunc, *postFeedUpdateFuncArgs)
147                 return it
148             JobManager().execute(doit(), self.key, priority=priority)
149         else:
150             def send_update_request():
151                 global update_feeds_iface
152                 if update_feeds_iface is None:
153                     bus=dbus.SessionBus()
154                     remote_object = bus.get_object(
155                         "org.marcoz.feedingit", # Connection name
156                         "/org/marcoz/feedingit/update" # Object's path
157                         )
158                     update_feeds_iface = dbus.Interface(
159                         remote_object, 'org.marcoz.feedingit')
160
161                 try:
162                     update_feeds_iface.Update(self.key)
163                 except Exception, e:
164                     logger.error("Invoking org.marcoz.feedingit.Update: %s"
165                                  % str(e))
166                     update_feeds_iface = None
167                 else:
168                     return True
169
170             if send_update_request():
171                 # Success!  It seems we were able to start the update
172                 # daemon via dbus (or, it was already running).
173                 return
174
175             global update_feed_process
176             if (update_feed_process is None
177                 or update_feed_process.poll() is not None):
178                 # The update_feeds process is not running.  Start it.
179                 update_feeds = os.path.join(os.path.dirname(__file__),
180                                             'update_feeds.py')
181                 argv = ['/usr/bin/env', 'python', update_feeds, '--daemon' ]
182                 logger.debug("Starting update_feeds: running %s"
183                              % (str(argv),))
184                 update_feed_process = subprocess.Popen(argv)
185                 # Make sure the dbus calls go to the right process:
186                 # rebind.
187                 update_feeds_iface = None
188
189             for _ in xrange(5):
190                 if send_update_request():
191                     break
192                 time.sleep(1)
193
194     def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
195         success = False
196         have_serial_execution_lock = False
197         try:
198             download_start = time.time ()
199
200             progress_handler = HTTPProgressHandler(download_callback)
201
202             openers = [progress_handler]
203             if proxy:
204                 openers.append (proxy)
205             kwargs = {'handlers':openers}
206             
207             tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs)
208             download_duration = time.time () - download_start
209     
210             opener = downloader(progress_handler, proxy)
211
212             if JobManager().do_quit:
213                 raise KeyboardInterrupt
214
215             process_start = time.time()
216
217             # Expiry time is in hours
218             expiry = float(expiryTime) * 3600.
219     
220             currentTime = 0
221     
222             have_woodchuck = mainthread.execute (wc().available)
223
224             def wc_success():
225                 try:
226                     wc().stream_register (self.key, "", 6 * 60 * 60)
227                 except woodchuck.ObjectExistsError:
228                     pass
229                 try:
230                     wc()[self.key].updated (
231                         indicator=(woodchuck.Indicator.ApplicationVisual
232                                    |woodchuck.Indicator.StreamWide),
233                         transferred_down=progress_handler.stats['received'],
234                         transferred_up=progress_handler.stats['sent'],
235                         transfer_time=download_start,
236                         transfer_duration=download_duration,
237                         new_objects=len (tmp.entries),
238                         objects_inline=len (tmp.entries))
239                 except KeyError:
240                     logger.warn(
241                         "Failed to register update of %s with woodchuck!"
242                         % (self.key))
243     
244             http_status = tmp.get ('status', 200)
245     
246             # Check if the parse was succesful.  If the http status code
247             # is 304, then the download was successful, but there is
248             # nothing new.  Indeed, no content is returned.  This make a
249             # 304 look like an error because there are no entries and the
250             # parse fails.  But really, everything went great!  Check for
251             # this first.
252             if http_status == 304:
253                 logger.debug("%s: No changes to feed." % (self.key,))
254                 mainthread.execute (wc_success, async=True)
255                 success = True
256             elif len(tmp["entries"])==0 and not tmp.version:
257                 # An error occured fetching or parsing the feed.  (Version
258                 # will be either None if e.g. the connection timed our or
259                 # '' if the data is not a proper feed)
260                 logger.error(
261                     "Error fetching %s: version is: %s: error: %s"
262                     % (url, str (tmp.version),
263                        str (tmp.get ('bozo_exception', 'Unknown error'))))
264                 logger.debug(tmp)
265                 if have_woodchuck:
266                     def e():
267                         logger.debug("%s: stream update failed!" % self.key)
268     
269                         try:
270                             # It's not easy to get the feed's title from here.
271                             # At the latest, the next time the application is
272                             # started, we'll fix up the human readable name.
273                             wc().stream_register (self.key, "", 6 * 60 * 60)
274                         except woodchuck.ObjectExistsError:
275                             pass
276                         ec = woodchuck.TransferStatus.TransientOther
277                         if 300 <= http_status and http_status < 400:
278                             ec = woodchuck.TransferStatus.TransientNetwork
279                         if 400 <= http_status and http_status < 500:
280                             ec = woodchuck.TransferStatus.FailureGone
281                         if 500 <= http_status and http_status < 600:
282                             ec = woodchuck.TransferStatus.TransientNetwork
283                         wc()[self.key].update_failed(ec)
284                     mainthread.execute (e, async=True)
285             else:
286                currentTime = time.time()
287                # The etag and modified value should only be updated if the content was not null
288                try:
289                    etag = tmp["etag"]
290                except KeyError:
291                    etag = None
292                try:
293                    modified = tmp["modified"]
294                except KeyError:
295                    modified = None
296                try:
297                    abs_url = urljoin(tmp["feed"]["link"],"/favicon.ico")
298                    f = opener.open(abs_url)
299                    data = f.read()
300                    f.close()
301                    outf = open(self.dir+"/favicon.ico", "w")
302                    outf.write(data)
303                    outf.close()
304                    del data
305                except (urllib2.HTTPError, urllib2.URLError), exception:
306                    logger.debug("Could not download favicon %s: %s"
307                                 % (abs_url, str (exception)))
308     
309                self.serial_execution_lock.acquire ()
310                have_serial_execution_lock = True
311
312                #reversedEntries = self.getEntries()
313                #reversedEntries.reverse()
314     
315                ids = self.getIds()
316     
317                tmp["entries"].reverse()
318                for entry in tmp["entries"]:
319                    # Yield so as to make the main thread a bit more
320                    # responsive.
321                    time.sleep(0)
322     
323                    if JobManager().do_quit:
324                        raise KeyboardInterrupt
325
326                    received_base = progress_handler.stats['received']
327                    sent_base = progress_handler.stats['sent']
328                    object_size = 0
329
330                    date = self.extractDate(entry)
331                    try:
332                        entry["title"]
333                    except KeyError:
334                        entry["title"] = "No Title"
335                    try :
336                        entry["link"]
337                    except KeyError:
338                        entry["link"] = ""
339                    try:
340                        entry["author"]
341                    except KeyError:
342                        entry["author"] = None
343                    if(not(entry.has_key("id"))):
344                        entry["id"] = None
345                    content = self.extractContent(entry)
346                    object_size = len (content)
347                    received_base -= len (content)
348                    tmpEntry = {"title":entry["title"], "content":content,
349                                 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
350                    id = self.generateUniqueId(tmpEntry)
351                    
352                    #articleTime = time.mktime(self.entries[id]["dateTuple"])
353                    soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
354                    images = soup('img')
355                    baseurl = tmpEntry["link"]
356                    #if not id in ids:
357                    if imageCache and len(images) > 0:
358                        self.serial_execution_lock.release ()
359                        have_serial_execution_lock = False
360                        for img in images:
361                             filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
362                             if filename:
363                                 img['src']="file://%s" %filename
364                                 count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
365                                 if count == 0:
366                                     self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
367                                     self.db.commit()
368     
369                                 try:
370                                     object_size += os.path.getsize (filename)
371                                 except os.error, exception:
372                                     logger.error ("Error getting size of %s: %s"
373                                                   % (filename, exception))
374                        self.serial_execution_lock.acquire ()
375                        have_serial_execution_lock = True
376     
377                    tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
378                    file = open(tmpEntry["contentLink"], "w")
379                    file.write(soup.prettify())
380                    file.close()
381                    if id in ids:
382                        self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
383                        self.db.commit()
384                    else:
385                        values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
386                        self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
387                        self.db.commit()
388 #                   else:
389 #                       try:
390 #                           self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
391 #                           self.db.commit()
392 #                           filename = configdir+self.key+".d/"+id+".html"
393 #                           file = open(filename,"a")
394 #                           utime(filename, None)
395 #                           file.close()
396 #                           images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
397 #                           for image in images:
398 #                                file = open(image[0],"a")
399 #                                utime(image[0], None)
400 #                                file.close()
401 #                       except:
402 #                           pass
403     
404                    # Register the object with Woodchuck and mark it as
405                    # downloaded.
406                    if have_woodchuck:
407                        def e():
408                            try:
409                                obj = wc()[self.key].object_register(
410                                    object_identifier=id,
411                                    human_readable_name=tmpEntry["title"])
412                            except woodchuck.ObjectExistsError:
413                                obj = wc()[self.key][id]
414                            else:
415                                # If the entry does not contain a publication
416                                # time, the attribute won't exist.
417                                pubtime = entry.get ('date_parsed', None)
418                                if pubtime:
419                                    obj.publication_time = time.mktime (pubtime)
420         
421                                received = (progress_handler.stats['received']
422                                            - received_base)
423                                sent = progress_handler.stats['sent'] - sent_base
424                                obj.transferred (
425                                    indicator=(woodchuck.Indicator.ApplicationVisual
426                                               |woodchuck.Indicator.StreamWide),
427                                    transferred_down=received,
428                                    transferred_up=sent,
429                                    object_size=object_size)
430                        mainthread.execute(e, async=True)
431                self.db.commit()
432
433                logger.debug (
434                    "%s: Update successful: transferred: %d/%d; objects: %d)"
435                    % (self.key,
436                       progress_handler.stats['sent'],
437                       progress_handler.stats['received'],
438                       len (tmp.entries)))
439                mainthread.execute (wc_success, async=True)
440                success = True
441
442             rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
443             for row in rows:
444                self.removeEntry(row[0])
445             
446             from glob import glob
447             from os import stat
448             for file in glob(configdir+self.key+".d/*"):
449                 #
450                 stats = stat(file)
451                 #
452                 # put the two dates into matching format
453                 #
454                 lastmodDate = stats[8]
455                 #
456                 expDate = time.time()-expiry*3
457                 # check if image-last-modified-date is outdated
458                 #
459                 if expDate > lastmodDate:
460                     #
461                     try:
462                         #
463                         #print 'Removing', file
464                         #
465                         # XXX: Tell woodchuck.
466                         remove(file) # commented out for testing
467                         #
468                     except OSError, exception:
469                         #
470                         logger.error('Could not remove %s: %s'
471                                      % (file, str (exception)))
472             logger.debug("updated %s: %fs in download, %fs in processing"
473                          % (self.key, download_duration,
474                             time.time () - process_start))
475         except:
476             logger.error("Updating %s: %s" % (self.key, traceback.format_exc()))
477         finally:
478             self.db.commit ()
479
480             if have_serial_execution_lock:
481                 self.serial_execution_lock.release ()
482
483             updateTime = 0
484             try:
485                 rows = self.db.execute("SELECT MAX(date) FROM feed;")
486                 for row in rows:
487                     updateTime=row[0]
488             except Exception, e:
489                 logger.error("Fetching update time: %s: %s"
490                              % (str(e), traceback.format_exc()))
491             finally:
492                 if not success:
493                     etag = None
494                     modified = None
495                 title = None
496                 try:
497                     title = tmp.feed.title
498                 except (AttributeError, UnboundLocalError), exception:
499                     pass
500                 if postFeedUpdateFunc is not None:
501                     postFeedUpdateFunc (self.key, updateTime, etag, modified,
502                                         title, *postFeedUpdateFuncArgs)
503
504     def setEntryRead(self, id):
505         self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
506         self.db.commit()
507
508         def e():
509             if wc().available():
510                 try:
511                     wc()[self.key][id].used()
512                 except KeyError:
513                     pass
514
515     def setEntryUnread(self, id):
516         self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
517         self.db.commit()     
518         
519     def markAllAsRead(self):
520         self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
521         self.db.commit()
522
523     def isEntryRead(self, id):
524         read_status = self.db.execute("SELECT read FROM feed WHERE id=?;", (id,) ).fetchone()[0]
525         return read_status==1  # Returns True if read==1, and False if read==0
526     
527     def getTitle(self, id):
528         return self.db.execute("SELECT title FROM feed WHERE id=?;", (id,) ).fetchone()[0]
529     
530     def getContentLink(self, id):
531         return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
532     
533     def getExternalLink(self, id):
534         return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
535     
536     def getDate(self, id):
537         dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
538         return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
539
540     def getDateTuple(self, id):
541         dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
542         return time.localtime(dateStamp)
543     
544     def getDateStamp(self, id):
545         return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
546     
547     def generateUniqueId(self, entry):
548         """
549         Generate a stable identifier for the article.  For the same
550         entry, this should result in the same identifier.  If
551         possible, the identifier should remain the same even if the
552         article is updated.
553         """
554         # Prefer the entry's id, which is supposed to be globally
555         # unique.
556         key = entry.get('id', None)
557         if not key:
558             # Next, try the link to the content.
559             key = entry.get('link', None)
560         if not key:
561             # Ok, the title and the date concatenated are likely to be
562             # relatively stable.
563             key = entry.get('title', None) + entry.get('date', None)
564         if not key:
565             # Hmm, the article's content will at least guarantee no
566             # false negatives (i.e., missing articles)
567             key = entry.get('content', None)
568         if not key:
569             # If all else fails, just use a random number.
570             key = str (random.random ())
571         return getId (key)
572     
573     def getIds(self, onlyUnread=False):
574         if onlyUnread:
575             rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
576         else:
577             rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
578         ids = []
579         for row in rows:
580             ids.append(row[0])
581         #ids.reverse()
582         return ids
583     
584     def getNextId(self, id, forward=True):
585         if forward:
586             delta = 1
587         else:
588             delta = -1
589         ids = self.getIds()
590         index = ids.index(id)
591         return ids[(index + delta) % len(ids)]
592         
593     def getPreviousId(self, id):
594         return self.getNextId(id, forward=False)
595     
596     def getNumberOfUnreadItems(self):
597         return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
598     
599     def getNumberOfEntries(self):
600         return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
601
602     def getArticle(self, entry):
603         #self.setEntryRead(id)
604         #entry = self.entries[id]
605         title = entry['title']
606         #content = entry.get('content', entry.get('summary_detail', {}))
607         content = entry["content"]
608
609         link = entry['link']
610         author = entry['author']
611         date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
612
613         #text = '''<div style="color: black; background-color: white;">'''
614         text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
615         text += "<html><head><title>" + title + "</title>"
616         text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
617         #text += '<style> body {-webkit-user-select: none;} </style>'
618         text += '</head><body bgcolor=\"#ffffff\"><div><a href=\"' + link + '\">' + title + "</a>"
619         if author != None:
620             text += "<BR /><small><i>Author: " + author + "</i></small>"
621         text += "<BR /><small><i>Date: " + date + "</i></small></div>"
622         text += "<BR /><BR />"
623         text += content
624         text += "</body></html>"
625         return text
626    
627     def getContent(self, id):
628         contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
629         try:
630             file = open(self.entries[id]["contentLink"])
631             content = file.read()
632             file.close()
633         except:
634             content = "Content unavailable"
635         return content
636     
637     def extractDate(self, entry):
638         if entry.has_key("updated_parsed"):
639             return timegm(entry["updated_parsed"])
640         elif entry.has_key("published_parsed"):
641             return timegm(entry["published_parsed"])
642         else:
643             return time.time()
644         
645     def extractContent(self, entry):
646         content = ""
647         if entry.has_key('summary'):
648             content = entry.get('summary', '')
649         if entry.has_key('content'):
650             if len(entry.content[0].value) > len(content):
651                 content = entry.content[0].value
652         if content == "":
653             content = entry.get('description', '')
654         return content
655     
656     def removeEntry(self, id):
657         contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
658         if contentLink:
659             try:
660                 remove(contentLink)
661             except OSError, exception:
662                 logger.error("Deleting %s: %s" % (contentLink, str (exception)))
663         self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
664         self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
665         self.db.commit()
666
667         def e():
668             if wc().available():
669                 try:
670                     wc()[self.key][id].files_deleted (
671                         woodchuck.DeletionResponse.Deleted)
672                     del wc()[self.key][id]
673                 except KeyError:
674                     pass
675         mainthread.execute (e, async=True)
676  
677 class ArchivedArticles(Feed):    
678     def addArchivedArticle(self, title, link, date, configdir):
679         id = self.generateUniqueId({"date":date, "title":title})
680         values = (id, title, link, date, 0, link, 0)
681         self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
682         self.db.commit()
683
684     def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
685         currentTime = 0
686         rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
687         for row in rows:
688             currentTime = time.time()
689             id = row[0]
690             link = row[1]
691             f = urllib2.urlopen(link)
692             #entry["content"] = f.read()
693             html = f.read()
694             f.close()
695             soup = BeautifulSoup(html)
696             images = soup('img')
697             baseurl = link
698             for img in images:
699                 filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
700                 img['src']=filename
701                 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
702                 self.db.commit()
703             contentLink = configdir+self.key+".d/"+id+".html"
704             file = open(contentLink, "w")
705             file.write(soup.prettify())
706             file.close()
707             
708             self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
709             self.db.commit()
710         return (currentTime, None, None)
711     
712     def purgeReadArticles(self):
713         rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
714         #ids = self.getIds()
715         for row in rows:
716             self.removeArticle(row[0])
717
718     def removeArticle(self, id):
719         rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
720         for row in rows:
721             try:
722                 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
723                 if count == 0:
724                     os.remove(row[0])
725             except:
726                 pass
727         self.removeEntry(id)
728
729 class Listing:
730     def _getdb(self):
731         try:
732             db = self.tls.db
733         except AttributeError:
734             db = sqlite3.connect("%s/feeds.db" % self.configdir, timeout=120)
735             self.tls.db = db
736         return db
737     db = property(_getdb)
738
739     # Lists all the feeds in a dictionary, and expose the data
740     def __init__(self, config, configdir):
741         self.config = config
742         self.configdir = configdir
743
744         self.tls = threading.local ()
745         
746         try:
747             table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
748             if table == None:
749                 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
750                 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
751                 self.addCategory("Default Category")
752                 if isfile(self.configdir+"feeds.pickle"):
753                     self.importOldFormatFeeds()
754                 else:
755                     self.addFeed("Maemo News", "http://maemo.org/news/items.xml")    
756             else:
757                 from string import find, upper
758                 if find(upper(table[0]), "WIDGET")<0:
759                     self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
760                     self.db.execute("UPDATE feeds SET widget=1;")
761                     self.db.commit()
762                 if find(upper(table[0]), "CATEGORY")<0:
763                     self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
764                     self.addCategory("Default Category")
765                     self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
766                     self.db.execute("UPDATE feeds SET category=1;")
767             self.db.commit()
768         except:
769             pass
770
771         # Check that Woodchuck's state is up to date with respect our
772         # state.
773         updater = os.path.basename(sys.argv[0]) == 'update_feeds.py'
774         wc_init (self, True if updater else False)
775         if wc().available() and updater:
776             # The list of known streams.
777             streams = wc().streams_list ()
778             stream_ids = [s.identifier for s in streams]
779
780             # Register any unknown streams.  Remove known streams from
781             # STREAMS_IDS.
782             for key in self.getListOfFeeds():
783                 title = self.getFeedTitle(key)
784                 # XXX: We should also check whether the list of
785                 # articles/objects in each feed/stream is up to date.
786                 if key not in stream_ids:
787                     logger.debug(
788                         "Registering previously unknown channel: %s (%s)"
789                         % (key, title,))
790                     # Use a default refresh interval of 6 hours.
791                     wc().stream_register (key, title, 6 * 60 * 60)
792                 else:
793                     # Make sure the human readable name is up to date.
794                     if wc()[key].human_readable_name != title:
795                         wc()[key].human_readable_name = title
796                     stream_ids.remove (key)
797                     
798
799             # Unregister any streams that are no longer subscribed to.
800             for id in stream_ids:
801                 logger.debug("Unregistering %s" % (id,))
802                 w.stream_unregister (id)
803
804     def importOldFormatFeeds(self):
805         """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
806         import rss
807         listing = rss.Listing(self.configdir)
808         rank = 0
809         for id in listing.getListOfFeeds():
810             try:
811                 rank += 1
812                 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
813                 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
814                 self.db.commit()
815                 
816                 feed = listing.getFeed(id)
817                 new_feed = self.getFeed(id)
818                 
819                 items = feed.getIds()[:]
820                 items.reverse()
821                 for item in items:
822                         if feed.isEntryRead(item):
823                             read_status = 1
824                         else:
825                             read_status = 0 
826                         date = timegm(feed.getDateTuple(item))
827                         title = feed.getTitle(item)
828                         newId = new_feed.generateUniqueId({"date":date, "title":title})
829                         values = (newId, title , feed.getContentLink(item), date, tuple(time.time()), feed.getExternalLink(item), read_status)
830                         new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
831                         new_feed.db.commit()
832                         try:
833                             images = feed.getImages(item)
834                             for image in images:
835                                 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
836                                 new_feed.db.commit()
837                         except:
838                             pass
839                 self.updateUnread(id)
840             except:
841                 logger.error("importOldFormatFeeds: %s"
842                              % (traceback.format_exc(),))
843         remove(self.configdir+"feeds.pickle")
844                 
845         
846     def addArchivedArticle(self, key, index):
847         feed = self.getFeed(key)
848         title = feed.getTitle(index)
849         link = feed.getExternalLink(index)
850         date = feed.getDate(index)
851         count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
852         if count == 0:
853             self.addFeed("Archived Articles", "", id="ArchivedArticles")
854
855         archFeed = self.getFeed("ArchivedArticles")
856         archFeed.addArchivedArticle(title, link, date, self.configdir)
857         self.updateUnread("ArchivedArticles")
858         
859     def updateFeed(self, key, expiryTime=None, proxy=None, imageCache=None,
860                    priority=0):
861         if expiryTime is None:
862             expiryTime = self.config.getExpiry()
863         if not expiryTime:
864             # Default to 24 hours
865             expriyTime = 24
866         if proxy is None:
867             (use_proxy, proxy) = self.config.getProxy()
868             if not use_proxy:
869                 proxy = None
870         if imageCache is None:
871             imageCache = self.config.getImageCache()
872
873         feed = self.getFeed(key)
874         (url, etag, modified) = self.db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
875         try:
876             modified = time.struct_time(eval(modified))
877         except:
878             modified = None
879         feed.updateFeed(
880             self.configdir, url, etag, modified, expiryTime, proxy, imageCache,
881             priority, postFeedUpdateFunc=self._queuePostFeedUpdate)
882
883     def _queuePostFeedUpdate(self, *args, **kwargs):
884         mainthread.execute (self._postFeedUpdate, async=True, *args, **kwargs)
885
886     def _postFeedUpdate(self, key, updateTime, etag, modified, title):
887         if modified==None:
888             modified="None"
889         else:
890             modified=str(tuple(modified))
891         if updateTime > 0:
892             self.db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, modified, key) )
893         else:
894             self.db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, modified, key) )
895
896         if title is not None:
897             self.db.execute("UPDATE feeds SET title=(case WHEN title=='' THEN ? ELSE title END) where id=?;",
898                             (title, key))
899         self.db.commit()
900         self.updateUnread(key)
901
902         update_server_object().ArticleCountUpdated()
903
904         stats = JobManager().stats()
905         global jobs_at_start
906         completed = stats['jobs-completed'] - jobs_at_start
907         in_progress = stats['jobs-in-progress']
908         queued = stats['jobs-queued']
909
910         percent = (100 * ((completed + in_progress / 2.))
911                    / (completed + in_progress + queued))
912
913         update_server_object().UpdateProgress(
914             percent, completed, in_progress, queued, 0, 0, 0, key)
915
916         if in_progress == 0 and queued == 0:
917             jobs_at_start = stats['jobs-completed']
918         
919     def getFeed(self, key):
920         if key == "ArchivedArticles":
921             return ArchivedArticles(self.configdir, key)
922         return Feed(self.configdir, key)
923         
924     def editFeed(self, key, title, url, category=None):
925         if category:
926             self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
927         else:
928             self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
929         self.db.commit()
930
931         if wc().available():
932             try:
933                 wc()[key].human_readable_name = title
934             except KeyError:
935                 logger.debug("Feed %s (%s) unknown." % (key, title))
936         
937     def getFeedUpdateTime(self, key):
938         return time.ctime(self.db.execute("SELECT updateTime FROM feeds WHERE id=?;", (key,)).fetchone()[0])
939         
940     def getFeedNumberOfUnreadItems(self, key):
941         return self.db.execute("SELECT unread FROM feeds WHERE id=?;", (key,)).fetchone()[0]
942         
943     def getFeedTitle(self, key):
944         (title, url) = self.db.execute("SELECT title, url FROM feeds WHERE id=?;", (key,)).fetchone()
945         if title:
946             return title
947         return url
948         
949     def getFeedUrl(self, key):
950         return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
951     
952     def getFeedCategory(self, key):
953         return self.db.execute("SELECT category FROM feeds WHERE id=?;", (key,)).fetchone()[0]
954         
955     def getListOfFeeds(self, category=None):
956         if category:
957             rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
958         else:
959             rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
960         keys = []
961         for row in rows:
962             if row[0]:
963                 keys.append(row[0])
964         return keys
965     
966     def getListOfCategories(self):
967         rows = self.db.execute("SELECT id FROM categories ORDER BY rank;" )
968         keys = []
969         for row in rows:
970             if row[0]:
971                 keys.append(row[0])
972         return keys
973     
974     def getCategoryTitle(self, id):
975         row = self.db.execute("SELECT title FROM categories WHERE id=?;", (id, )).fetchone()
976         return row[0]
977     
978     def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
979         if   order == "Most unread":
980             tmp = "ORDER BY unread DESC"
981             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
982         elif order == "Least unread":
983             tmp = "ORDER BY unread"
984             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
985         elif order == "Most recent":
986             tmp = "ORDER BY updateTime DESC"
987             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
988         elif order == "Least recent":
989             tmp = "ORDER BY updateTime"
990             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
991         else: # order == "Manual" or invalid value...
992             tmp = "ORDER BY rank"
993             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
994         if onlyUnread:
995             sql = "SELECT id FROM feeds WHERE unread>0 AND category=%s " %category + tmp 
996         else:
997             sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
998         rows = self.db.execute(sql)
999         keys = []
1000         for row in rows:
1001             if row[0]:
1002                 keys.append(row[0])
1003         return keys
1004     
1005     def getFavicon(self, key):
1006         filename = "%s%s.d/favicon.ico" % (self.configdir, key)
1007         if isfile(filename):
1008             return filename
1009         else:
1010             return False
1011         
1012     def updateUnread(self, key):
1013         feed = self.getFeed(key)
1014         self.db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
1015         self.db.commit()
1016
1017     def addFeed(self, title, url, id=None, category=1):
1018         if not id:
1019             id = getId(url)
1020         count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
1021         if count == 0:
1022             max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1023             if max_rank == None:
1024                 max_rank = 0
1025             values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
1026             self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
1027             self.db.commit()
1028             # Ask for the feed object, it will create the necessary tables
1029             self.getFeed(id)
1030
1031             if wc().available():
1032                 # Register the stream with Woodchuck.  Update approximately
1033                 # every 6 hours.
1034                 wc().stream_register(stream_identifier=id,
1035                                      human_readable_name=title,
1036                                      freshness=6*60*60)
1037
1038             return True
1039         else:
1040             return False
1041         
1042     def addCategory(self, title):
1043         rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
1044         if rank==None:
1045             rank=1
1046         id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
1047         if id==None:
1048             id=1
1049         self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
1050         self.db.commit()
1051     
1052     def removeFeed(self, key):
1053         if wc().available ():
1054             try:
1055                 del wc()[key]
1056             except KeyError:
1057                 logger.debug("Removing unregistered feed %s failed" % (key,))
1058
1059         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
1060         self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
1061         self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
1062         self.db.commit()
1063
1064         if isdir(self.configdir+key+".d/"):
1065            rmtree(self.configdir+key+".d/")
1066            
1067     def removeCategory(self, key):
1068         if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
1069             rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
1070             self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
1071             self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
1072             self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
1073             self.db.commit()
1074         
1075     #def saveConfig(self):
1076     #    self.listOfFeeds["feedingit-order"] = self.sortedKeys
1077     #    file = open(self.configdir+"feeds.pickle", "w")
1078     #    pickle.dump(self.listOfFeeds, file)
1079     #    file.close()
1080         
1081     def moveUp(self, key):
1082         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1083         if rank>0:
1084             self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
1085             self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
1086             self.db.commit()
1087             
1088     def moveCategoryUp(self, key):
1089         rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1090         if rank>0:
1091             self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
1092             self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
1093             self.db.commit()
1094         
1095     def moveDown(self, key):
1096         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1097         max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1098         if rank<max_rank:
1099             self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
1100             self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
1101             self.db.commit()
1102             
1103     def moveCategoryDown(self, key):
1104         rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1105         max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
1106         if rank<max_rank:
1107             self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
1108             self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )
1109             self.db.commit()
1110             
1111