git.maemo.org Git - feedingit/blob - src/rss_sqlite.py

   1 #!/usr/bin/env python2.5
   2
   3 #
   4 # Copyright (c) 2007-2008 INdT.
   5 # Copyright (c) 2011 Neal H. Walfield
   6 # This program is free software: you can redistribute it and/or modify
   7 # it under the terms of the GNU Lesser General Public License as published by
   8 # the Free Software Foundation, either version 3 of the License, or
   9 # (at your option) any later version.
  10 #
  11 #  This program is distributed in the hope that it will be useful,
  12 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 #  GNU Lesser General Public License for more details.
  15 #
  16 #  You should have received a copy of the GNU Lesser General Public License
  17 #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18 #
  19
  20 # ============================================================================
  21 # Name        : FeedingIt.py
  22 # Author      : Yves Marcoz
  23 # Version     : 0.5.4
  24 # Description : Simple RSS Reader
  25 # ============================================================================
  26
  27 from __future__ import with_statement
  28
  29 import sqlite3
  30 from os.path import isfile, isdir
  31 from shutil import rmtree
  32 from os import mkdir, remove, utime
  33 import os
  34 import md5
  35 import feedparser
  36 import time
  37 import urllib2
  38 from BeautifulSoup import BeautifulSoup
  39 from urlparse import urljoin
  40 from calendar import timegm
  41 import threading
  42 import traceback
  43 from wc import wc, wc_init, woodchuck
  44 import subprocess
  45 import dbus
  46 from updatedbus import update_server_object
  47
  48 from jobmanager import JobManager
  49 import mainthread
  50 from httpprogresshandler import HTTPProgressHandler
  51 import random
  52 import sys
  53 import logging
  54 logger = logging.getLogger(__name__)
  55
  56 def getId(string):
  57     return md5.new(string).hexdigest()
  58
  59 def download_callback(connection):
  60     if JobManager().do_quit:
  61         raise KeyboardInterrupt
  62
  63 def downloader(progress_handler=None, proxy=None):
  64     openers = []
  65
  66     if progress_handler is not None:
  67         openers.append(progress_handler)
  68     else:
  69         openers.append(HTTPProgressHandler(download_callback))
  70
  71     if proxy:
  72         openers.append(proxy)
  73
  74     return urllib2.build_opener(*openers)
  75
  76 # If not None, a subprocess.Popen object corresponding to a
  77 # update_feeds.py process.
  78 update_feed_process = None
  79
  80 update_feeds_iface = None
  81
  82 jobs_at_start = 0
  83
  84 class Feed:
  85     serial_execution_lock = threading.Lock()
  86
  87     def _getdb(self):
  88         try:
  89             db = self.tls.db
  90         except AttributeError:
  91             db = sqlite3.connect("%s/%s.db" % (self.dir, self.key), timeout=120)
  92             self.tls.db = db
  93         return db
  94     db = property(_getdb)
  95
  96     def __init__(self, configdir, key):
  97         self.key = key
  98         self.configdir = configdir
  99         self.dir = "%s/%s.d" %(self.configdir, self.key)
 100         self.tls = threading.local ()
 101
 102         if not isdir(self.dir):
 103             mkdir(self.dir)
 104         if not isfile("%s/%s.db" %(self.dir, self.key)):
 105             self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
 106             self.db.execute("CREATE TABLE images (id text, imagePath text);")
 107             self.db.commit()
 108
 109     def addImage(self, configdir, key, baseurl, url, proxy=None, opener=None):
 110         filename = configdir+key+".d/"+getId(url)
 111         if not isfile(filename):
 112             try:
 113                 if not opener:
 114                     opener = downloader(proxy=proxy)
 115
 116                 abs_url = urljoin(baseurl,url)
 117                 f = opener.open(abs_url)
 118                 try:
 119                     with open(filename, "w") as outf:
 120                         for data in f:
 121                             outf.write(data)
 122                 finally:
 123                     f.close()
 124             except (urllib2.HTTPError, urllib2.URLError, IOError), exception:
 125                 logger.info("Could not download image %s: %s"
 126                             % (abs_url, str (exception)))
 127                 return None
 128             except:
 129                 exception = sys.exc_info()[0]
 130
 131                 logger.info("Downloading image %s: %s" %
 132                             (abs_url, traceback.format_exc()))
 133                 try:
 134                     remove(filename)
 135                 except OSError:
 136                     pass
 137
 138                 raise exception
 139         else:
 140             #open(filename,"a").close()  # "Touch" the file
 141             file = open(filename,"a")
 142             utime(filename, None)
 143             file.close()
 144         return filename
 145
 146     def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
 147         if (os.path.basename(sys.argv[0]) == 'update_feeds.py'):
 148             def doit():
 149                 def it():
 150                     self._updateFeed(configdir, url, etag, modified, expiryTime, proxy, imageCache, postFeedUpdateFunc, *postFeedUpdateFuncArgs)
 151                 return it
 152             JobManager().execute(doit(), self.key, priority=priority)
 153         else:
 154             def send_update_request():
 155                 global update_feeds_iface
 156                 if update_feeds_iface is None:
 157                     bus=dbus.SessionBus()
 158                     remote_object = bus.get_object(
 159                         "org.marcoz.feedingit", # Connection name
 160                         "/org/marcoz/feedingit/update" # Object's path
 161                         )
 162                     update_feeds_iface = dbus.Interface(
 163                         remote_object, 'org.marcoz.feedingit')
 164
 165                 try:
 166                     update_feeds_iface.Update(self.key)
 167                 except Exception, e:
 168                     logger.error("Invoking org.marcoz.feedingit.Update: %s"
 169                                  % str(e))
 170                     update_feeds_iface = None
 171                 else:
 172                     return True
 173
 174             if send_update_request():
 175                 # Success!  It seems we were able to start the update
 176                 # daemon via dbus (or, it was already running).
 177                 return
 178
 179             global update_feed_process
 180             if (update_feed_process is None
 181                 or update_feed_process.poll() is not None):
 182                 # The update_feeds process is not running.  Start it.
 183                 update_feeds = os.path.join(os.path.dirname(__file__),
 184                                             'update_feeds.py')
 185                 argv = ['/usr/bin/env', 'python', update_feeds, '--daemon' ]
 186                 logger.debug("Starting update_feeds: running %s"
 187                              % (str(argv),))
 188                 update_feed_process = subprocess.Popen(argv)
 189                 # Make sure the dbus calls go to the right process:
 190                 # rebind.
 191                 update_feeds_iface = None
 192
 193             for _ in xrange(5):
 194                 if send_update_request():
 195                     break
 196                 time.sleep(1)
 197
 198     def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
 199         success = False
 200         have_serial_execution_lock = False
 201         try:
 202             download_start = time.time ()
 203
 204             progress_handler = HTTPProgressHandler(download_callback)
 205
 206             openers = [progress_handler]
 207             if proxy:
 208                 openers.append (proxy)
 209             kwargs = {'handlers':openers}
 210
 211             tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs)
 212             download_duration = time.time () - download_start
 213
 214             opener = downloader(progress_handler, proxy)
 215
 216             if JobManager().do_quit:
 217                 raise KeyboardInterrupt
 218
 219             process_start = time.time()
 220
 221             # Expiry time is in hours
 222             expiry = float(expiryTime) * 3600.
 223
 224             currentTime = 0
 225
 226             def wc_success():
 227                 try:
 228                     wc().stream_register (self.key, "", 6 * 60 * 60)
 229                 except woodchuck.ObjectExistsError:
 230                     pass
 231                 try:
 232                     wc()[self.key].updated (
 233                         indicator=(woodchuck.Indicator.ApplicationVisual
 234                                    |woodchuck.Indicator.StreamWide),
 235                         transferred_down=progress_handler.stats['received'],
 236                         transferred_up=progress_handler.stats['sent'],
 237                         transfer_time=download_start,
 238                         transfer_duration=download_duration,
 239                         new_objects=len (tmp.entries),
 240                         objects_inline=len (tmp.entries))
 241                 except KeyError:
 242                     logger.warn(
 243                         "Failed to register update of %s with woodchuck!"
 244                         % (self.key))
 245
 246             http_status = tmp.get ('status', 200)
 247
 248             # Check if the parse was succesful.  If the http status code
 249             # is 304, then the download was successful, but there is
 250             # nothing new.  Indeed, no content is returned.  This make a
 251             # 304 look like an error because there are no entries and the
 252             # parse fails.  But really, everything went great!  Check for
 253             # this first.
 254             if http_status == 304:
 255                 logger.debug("%s: No changes to feed." % (self.key,))
 256                 mainthread.execute(wc_success, async=True)
 257                 success = True
 258             elif len(tmp["entries"])==0 and not tmp.version:
 259                 # An error occured fetching or parsing the feed.  (Version
 260                 # will be either None if e.g. the connection timed our or
 261                 # '' if the data is not a proper feed)
 262                 logger.error(
 263                     "Error fetching %s: version is: %s: error: %s"
 264                     % (url, str (tmp.version),
 265                        str (tmp.get ('bozo_exception', 'Unknown error'))))
 266                 logger.debug(tmp)
 267                 def register_stream_update_failed(http_status):
 268                     def doit():
 269                         logger.debug("%s: stream update failed!" % self.key)
 270
 271                         try:
 272                             # It's not easy to get the feed's title from here.
 273                             # At the latest, the next time the application is
 274                             # started, we'll fix up the human readable name.
 275                             wc().stream_register (self.key, "", 6 * 60 * 60)
 276                         except woodchuck.ObjectExistsError:
 277                             pass
 278                         ec = woodchuck.TransferStatus.TransientOther
 279                         if 300 <= http_status and http_status < 400:
 280                             ec = woodchuck.TransferStatus.TransientNetwork
 281                         if 400 <= http_status and http_status < 500:
 282                             ec = woodchuck.TransferStatus.FailureGone
 283                         if 500 <= http_status and http_status < 600:
 284                             ec = woodchuck.TransferStatus.TransientNetwork
 285                         wc()[self.key].update_failed(ec)
 286                     return doit
 287                 if wc().available:
 288                     mainthread.execute(
 289                         register_stream_update_failed(
 290                             http_status=http_status),
 291                         async=True)
 292             else:
 293                currentTime = time.time()
 294                # The etag and modified value should only be updated if the content was not null
 295                try:
 296                    etag = tmp["etag"]
 297                except KeyError:
 298                    etag = None
 299                try:
 300                    modified = tmp["modified"]
 301                except KeyError:
 302                    modified = None
 303                try:
 304                    abs_url = urljoin(tmp["feed"]["link"],"/favicon.ico")
 305                    f = opener.open(abs_url)
 306                    data = f.read()
 307                    f.close()
 308                    outf = open(self.dir+"/favicon.ico", "w")
 309                    outf.write(data)
 310                    outf.close()
 311                    del data
 312                except (urllib2.HTTPError, urllib2.URLError), exception:
 313                    logger.debug("Could not download favicon %s: %s"
 314                                 % (abs_url, str (exception)))
 315
 316                self.serial_execution_lock.acquire ()
 317                have_serial_execution_lock = True
 318
 319                #reversedEntries = self.getEntries()
 320                #reversedEntries.reverse()
 321
 322                ids = self.getIds()
 323
 324                tmp["entries"].reverse()
 325                for entry in tmp["entries"]:
 326                    # Yield so as to make the main thread a bit more
 327                    # responsive.
 328                    time.sleep(0)
 329
 330                    if JobManager().do_quit:
 331                        raise KeyboardInterrupt
 332
 333                    received_base = progress_handler.stats['received']
 334                    sent_base = progress_handler.stats['sent']
 335                    object_size = 0
 336
 337                    date = self.extractDate(entry)
 338                    try:
 339                        entry["title"]
 340                    except KeyError:
 341                        entry["title"] = "No Title"
 342                    try :
 343                        entry["link"]
 344                    except KeyError:
 345                        entry["link"] = ""
 346                    try:
 347                        entry["author"]
 348                    except KeyError:
 349                        entry["author"] = None
 350                    if(not(entry.has_key("id"))):
 351                        entry["id"] = None
 352                    content = self.extractContent(entry)
 353                    object_size = len (content)
 354                    received_base -= len (content)
 355                    tmpEntry = {"title":entry["title"], "content":content,
 356                                 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
 357                    id = self.generateUniqueId(tmpEntry)
 358
 359                    #articleTime = time.mktime(self.entries[id]["dateTuple"])
 360                    soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
 361                    images = soup('img')
 362                    baseurl = tmpEntry["link"]
 363                    #if not id in ids:
 364                    if imageCache and len(images) > 0:
 365                        self.serial_execution_lock.release ()
 366                        have_serial_execution_lock = False
 367                        for img in images:
 368                            filename = self.addImage(
 369                                configdir, self.key, baseurl, img['src'],
 370                                opener=opener)
 371                            if filename:
 372                                 img['src']="file://%s" %filename
 373                                 count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
 374                                 if count == 0:
 375                                     self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
 376                                     self.db.commit()
 377
 378                                 try:
 379                                     object_size += os.path.getsize (filename)
 380                                 except os.error, exception:
 381                                     logger.error ("Error getting size of %s: %s"
 382                                                   % (filename, exception))
 383                        self.serial_execution_lock.acquire ()
 384                        have_serial_execution_lock = True
 385
 386                    tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
 387                    file = open(tmpEntry["contentLink"], "w")
 388                    file.write(soup.prettify())
 389                    file.close()
 390                    if id in ids:
 391                        self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
 392                        self.db.commit()
 393                    else:
 394                        values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
 395                        self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
 396                        self.db.commit()
 397 #                   else:
 398 #                       try:
 399 #                           self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
 400 #                           self.db.commit()
 401 #                           filename = configdir+self.key+".d/"+id+".html"
 402 #                           file = open(filename,"a")
 403 #                           utime(filename, None)
 404 #                           file.close()
 405 #                           images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
 406 #                           for image in images:
 407 #                                file = open(image[0],"a")
 408 #                                utime(image[0], None)
 409 #                                file.close()
 410 #                       except:
 411 #                           pass
 412
 413                    # Register the object with Woodchuck and mark it as
 414                    # downloaded.
 415                    def register_object_transferred(
 416                            id, title, publication_time,
 417                            sent, received, object_size):
 418                        def doit():
 419                            logger.debug("Registering transfer of object %s"
 420                                         % title)
 421                            try:
 422                                obj = wc()[self.key].object_register(
 423                                    object_identifier=id,
 424                                    human_readable_name=title)
 425                            except woodchuck.ObjectExistsError:
 426                                obj = wc()[self.key][id]
 427                            else:
 428                                obj.publication_time = publication_time
 429                                obj.transferred(
 430                                    indicator=(
 431                                        woodchuck.Indicator.ApplicationVisual
 432                                        |woodchuck.Indicator.StreamWide),
 433                                    transferred_down=received,
 434                                    transferred_up=sent,
 435                                    object_size=object_size)
 436                        return doit
 437                    if wc().available:
 438                        # If the entry does not contain a publication
 439                        # time, the attribute won't exist.
 440                        pubtime = entry.get('date_parsed', None)
 441                        if pubtime:
 442                            publication_time = time.mktime (pubtime)
 443                        else:
 444                            publication_time = None
 445
 446                        sent = progress_handler.stats['sent'] - sent_base
 447                        received = (progress_handler.stats['received']
 448                                    - received_base)
 449
 450                        mainthread.execute(
 451                            register_object_transferred(
 452                                id=id,
 453                                title=tmpEntry["title"],
 454                                publication_time=publication_time,
 455                                sent=sent, received=received,
 456                                object_size=object_size),
 457                            async=True)
 458                self.db.commit()
 459
 460                logger.debug (
 461                    "%s: Update successful: transferred: %d/%d; objects: %d)"
 462                    % (self.key,
 463                       progress_handler.stats['sent'],
 464                       progress_handler.stats['received'],
 465                       len (tmp.entries)))
 466                mainthread.execute (wc_success, async=True)
 467                success = True
 468
 469             rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
 470             for row in rows:
 471                self.removeEntry(row[0])
 472
 473             from glob import glob
 474             from os import stat
 475             for file in glob(configdir+self.key+".d/*"):
 476                 #
 477                 stats = stat(file)
 478                 #
 479                 # put the two dates into matching format
 480                 #
 481                 lastmodDate = stats[8]
 482                 #
 483                 expDate = time.time()-expiry*3
 484                 # check if image-last-modified-date is outdated
 485                 #
 486                 if expDate > lastmodDate:
 487                     #
 488                     try:
 489                         #
 490                         #print 'Removing', file
 491                         #
 492                         # XXX: Tell woodchuck.
 493                         remove(file) # commented out for testing
 494                         #
 495                     except OSError, exception:
 496                         #
 497                         logger.error('Could not remove %s: %s'
 498                                      % (file, str (exception)))
 499             logger.debug("updated %s: %fs in download, %fs in processing"
 500                          % (self.key, download_duration,
 501                             time.time () - process_start))
 502         except:
 503             logger.error("Updating %s: %s" % (self.key, traceback.format_exc()))
 504         finally:
 505             self.db.commit ()
 506
 507             if have_serial_execution_lock:
 508                 self.serial_execution_lock.release ()
 509
 510             updateTime = 0
 511             try:
 512                 rows = self.db.execute("SELECT MAX(date) FROM feed;")
 513                 for row in rows:
 514                     updateTime=row[0]
 515             except Exception, e:
 516                 logger.error("Fetching update time: %s: %s"
 517                              % (str(e), traceback.format_exc()))
 518             finally:
 519                 if not success:
 520                     etag = None
 521                     modified = None
 522                 title = None
 523                 try:
 524                     title = tmp.feed.title
 525                 except (AttributeError, UnboundLocalError), exception:
 526                     pass
 527                 if postFeedUpdateFunc is not None:
 528                     postFeedUpdateFunc (self.key, updateTime, etag, modified,
 529                                         title, *postFeedUpdateFuncArgs)
 530
 531     def setEntryRead(self, id):
 532         self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
 533         self.db.commit()
 534
 535         def doit():
 536             try:
 537                 wc()[self.key][id].used()
 538             except KeyError:
 539                 pass
 540         if wc().available():
 541             mainthread.execute(doit, async=True)
 542
 543     def setEntryUnread(self, id):
 544         self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
 545         self.db.commit()
 546
 547     def markAllAsRead(self):
 548         self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
 549         self.db.commit()
 550
 551     def isEntryRead(self, id):
 552         read_status = self.db.execute("SELECT read FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 553         return read_status==1  # Returns True if read==1, and False if read==0
 554
 555     def getTitle(self, id):
 556         return self.db.execute("SELECT title FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 557
 558     def getContentLink(self, id):
 559         return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 560
 561     def getExternalLink(self, id):
 562         return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 563
 564     def getDate(self, id):
 565         dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 566         return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
 567
 568     def getDateTuple(self, id):
 569         dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 570         return time.localtime(dateStamp)
 571
 572     def getDateStamp(self, id):
 573         return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 574
 575     def generateUniqueId(self, entry):
 576         """
 577         Generate a stable identifier for the article.  For the same
 578         entry, this should result in the same identifier.  If
 579         possible, the identifier should remain the same even if the
 580         article is updated.
 581         """
 582         # Prefer the entry's id, which is supposed to be globally
 583         # unique.
 584         key = entry.get('id', None)
 585         if not key:
 586             # Next, try the link to the content.
 587             key = entry.get('link', None)
 588         if not key:
 589             # Ok, the title and the date concatenated are likely to be
 590             # relatively stable.
 591             key = entry.get('title', None) + entry.get('date', None)
 592         if not key:
 593             # Hmm, the article's content will at least guarantee no
 594             # false negatives (i.e., missing articles)
 595             key = entry.get('content', None)
 596         if not key:
 597             # If all else fails, just use a random number.
 598             key = str (random.random ())
 599         return getId (key)
 600
 601     def getIds(self, onlyUnread=False):
 602         if onlyUnread:
 603             rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
 604         else:
 605             rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
 606         ids = []
 607         for row in rows:
 608             ids.append(row[0])
 609         #ids.reverse()
 610         return ids
 611
 612     def getNextId(self, id, forward=True):
 613         if forward:
 614             delta = 1
 615         else:
 616             delta = -1
 617         ids = self.getIds()
 618         index = ids.index(id)
 619         return ids[(index + delta) % len(ids)]
 620
 621     def getPreviousId(self, id):
 622         return self.getNextId(id, forward=False)
 623
 624     def getNumberOfUnreadItems(self):
 625         return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
 626
 627     def getNumberOfEntries(self):
 628         return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
 629
 630     def getArticle(self, entry):
 631         #self.setEntryRead(id)
 632         #entry = self.entries[id]
 633         title = entry['title']
 634         #content = entry.get('content', entry.get('summary_detail', {}))
 635         content = entry["content"]
 636
 637         link = entry['link']
 638         author = entry['author']
 639         date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
 640
 641         #text = '''<div style="color: black; background-color: white;">'''
 642         text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
 643         text += "<html><head><title>" + title + "</title>"
 644         text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
 645         #text += '<style> body {-webkit-user-select: none;} </style>'
 646         text += '</head><body bgcolor=\"#ffffff\"><div><a href=\"' + link + '\">' + title + "</a>"
 647         if author != None:
 648             text += "<BR /><small><i>Author: " + author + "</i></small>"
 649         text += "<BR /><small><i>Date: " + date + "</i></small></div>"
 650         text += "<BR /><BR />"
 651         text += content
 652         text += "</body></html>"
 653         return text
 654
 655     def getContent(self, id):
 656         contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
 657         try:
 658             file = open(self.entries[id]["contentLink"])
 659             content = file.read()
 660             file.close()
 661         except:
 662             content = "Content unavailable"
 663         return content
 664
 665     def extractDate(self, entry):
 666         if entry.has_key("updated_parsed"):
 667             return timegm(entry["updated_parsed"])
 668         elif entry.has_key("published_parsed"):
 669             return timegm(entry["published_parsed"])
 670         else:
 671             return time.time()
 672
 673     def extractContent(self, entry):
 674         content = ""
 675         if entry.has_key('summary'):
 676             content = entry.get('summary', '')
 677         if entry.has_key('content'):
 678             if len(entry.content[0].value) > len(content):
 679                 content = entry.content[0].value
 680         if content == "":
 681             content = entry.get('description', '')
 682         return content
 683
 684     def removeEntry(self, id):
 685         contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
 686         if contentLink:
 687             try:
 688                 remove(contentLink)
 689             except OSError, exception:
 690                 logger.error("Deleting %s: %s" % (contentLink, str (exception)))
 691         self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
 692         self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
 693         self.db.commit()
 694
 695         def doit():
 696             try:
 697                 wc()[self.key][id].files_deleted (
 698                     woodchuck.DeletionResponse.Deleted)
 699                 del wc()[self.key][id]
 700             except KeyError:
 701                 pass
 702         if wc().available():
 703             mainthread.execute (doit, async=True)
 704
 705 class ArchivedArticles(Feed):
 706     def addArchivedArticle(self, title, link, date, configdir):
 707         id = self.generateUniqueId({"date":date, "title":title})
 708         values = (id, title, link, date, 0, link, 0)
 709         self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
 710         self.db.commit()
 711
 712     def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
 713         currentTime = 0
 714         rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
 715         for row in rows:
 716             currentTime = time.time()
 717             id = row[0]
 718             link = row[1]
 719             f = urllib2.urlopen(link)
 720             #entry["content"] = f.read()
 721             html = f.read()
 722             f.close()
 723             soup = BeautifulSoup(html)
 724             images = soup('img')
 725             baseurl = link
 726             for img in images:
 727                 filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
 728                 img['src']=filename
 729                 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
 730                 self.db.commit()
 731             contentLink = configdir+self.key+".d/"+id+".html"
 732             file = open(contentLink, "w")
 733             file.write(soup.prettify())
 734             file.close()
 735
 736             self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
 737             self.db.commit()
 738         return (currentTime, None, None)
 739
 740     def purgeReadArticles(self):
 741         rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
 742         #ids = self.getIds()
 743         for row in rows:
 744             self.removeArticle(row[0])
 745
 746     def removeArticle(self, id):
 747         rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
 748         for row in rows:
 749             try:
 750                 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
 751                 if count == 0:
 752                     os.remove(row[0])
 753             except:
 754                 pass
 755         self.removeEntry(id)
 756
 757 class Listing:
 758     def _getdb(self):
 759         try:
 760             db = self.tls.db
 761         except AttributeError:
 762             db = sqlite3.connect("%s/feeds.db" % self.configdir, timeout=120)
 763             self.tls.db = db
 764         return db
 765     db = property(_getdb)
 766
 767     # Lists all the feeds in a dictionary, and expose the data
 768     def __init__(self, config, configdir):
 769         self.config = config
 770         self.configdir = configdir
 771
 772         self.tls = threading.local ()
 773
 774         try:
 775             table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
 776             if table == None:
 777                 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
 778                 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
 779                 self.addCategory("Default Category")
 780                 if isfile(self.configdir+"feeds.pickle"):
 781                     self.importOldFormatFeeds()
 782                 else:
 783                     self.addFeed("Maemo News", "http://maemo.org/news/items.xml")
 784             else:
 785                 from string import find, upper
 786                 if find(upper(table[0]), "WIDGET")<0:
 787                     self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
 788                     self.db.execute("UPDATE feeds SET widget=1;")
 789                     self.db.commit()
 790                 if find(upper(table[0]), "CATEGORY")<0:
 791                     self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
 792                     self.addCategory("Default Category")
 793                     self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
 794                     self.db.execute("UPDATE feeds SET category=1;")
 795             self.db.commit()
 796         except:
 797             pass
 798
 799         # Check that Woodchuck's state is up to date with respect our
 800         # state.
 801         updater = os.path.basename(sys.argv[0]) == 'update_feeds.py'
 802         wc_init (self, True if updater else False)
 803         if wc().available() and updater:
 804             # The list of known streams.
 805             streams = wc().streams_list ()
 806             stream_ids = [s.identifier for s in streams]
 807
 808             # Register any unknown streams.  Remove known streams from
 809             # STREAMS_IDS.
 810             for key in self.getListOfFeeds():
 811                 title = self.getFeedTitle(key)
 812                 # XXX: We should also check whether the list of
 813                 # articles/objects in each feed/stream is up to date.
 814                 if key not in stream_ids:
 815                     logger.debug(
 816                         "Registering previously unknown channel: %s (%s)"
 817                         % (key, title,))
 818                     # Use a default refresh interval of 6 hours.
 819                     wc().stream_register (key, title, 6 * 60 * 60)
 820                 else:
 821                     # Make sure the human readable name is up to date.
 822                     if wc()[key].human_readable_name != title:
 823                         wc()[key].human_readable_name = title
 824                     stream_ids.remove (key)
 825
 826
 827             # Unregister any streams that are no longer subscribed to.
 828             for id in stream_ids:
 829                 logger.debug("Unregistering %s" % (id,))
 830                 w.stream_unregister (id)
 831
 832     def importOldFormatFeeds(self):
 833         """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
 834         import rss
 835         listing = rss.Listing(self.configdir)
 836         rank = 0
 837         for id in listing.getListOfFeeds():
 838             try:
 839                 rank += 1
 840                 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
 841                 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
 842                 self.db.commit()
 843
 844                 feed = listing.getFeed(id)
 845                 new_feed = self.getFeed(id)
 846
 847                 items = feed.getIds()[:]
 848                 items.reverse()
 849                 for item in items:
 850                         if feed.isEntryRead(item):
 851                             read_status = 1
 852                         else:
 853                             read_status = 0
 854                         date = timegm(feed.getDateTuple(item))
 855                         title = feed.getTitle(item)
 856                         newId = new_feed.generateUniqueId({"date":date, "title":title})
 857                         values = (newId, title , feed.getContentLink(item), date, tuple(time.time()), feed.getExternalLink(item), read_status)
 858                         new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
 859                         new_feed.db.commit()
 860                         try:
 861                             images = feed.getImages(item)
 862                             for image in images:
 863                                 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
 864                                 new_feed.db.commit()
 865                         except:
 866                             pass
 867                 self.updateUnread(id)
 868             except:
 869                 logger.error("importOldFormatFeeds: %s"
 870                              % (traceback.format_exc(),))
 871         remove(self.configdir+"feeds.pickle")
 872
 873
 874     def addArchivedArticle(self, key, index):
 875         feed = self.getFeed(key)
 876         title = feed.getTitle(index)
 877         link = feed.getExternalLink(index)
 878         date = feed.getDate(index)
 879         count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
 880         if count == 0:
 881             self.addFeed("Archived Articles", "", id="ArchivedArticles")
 882
 883         archFeed = self.getFeed("ArchivedArticles")
 884         archFeed.addArchivedArticle(title, link, date, self.configdir)
 885         self.updateUnread("ArchivedArticles")
 886
 887     def updateFeed(self, key, expiryTime=None, proxy=None, imageCache=None,
 888                    priority=0):
 889         if expiryTime is None:
 890             expiryTime = self.config.getExpiry()
 891         if not expiryTime:
 892             # Default to 24 hours
 893             expriyTime = 24
 894         if proxy is None:
 895             (use_proxy, proxy) = self.config.getProxy()
 896             if not use_proxy:
 897                 proxy = None
 898         if imageCache is None:
 899             imageCache = self.config.getImageCache()
 900
 901         feed = self.getFeed(key)
 902         (url, etag, modified) = self.db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
 903         try:
 904             modified = time.struct_time(eval(modified))
 905         except:
 906             modified = None
 907         feed.updateFeed(
 908             self.configdir, url, etag, modified, expiryTime, proxy, imageCache,
 909             priority, postFeedUpdateFunc=self._queuePostFeedUpdate)
 910
 911     def _queuePostFeedUpdate(self, *args, **kwargs):
 912         mainthread.execute (self._postFeedUpdate, async=True, *args, **kwargs)
 913
 914     def _postFeedUpdate(self, key, updateTime, etag, modified, title):
 915         if modified==None:
 916             modified="None"
 917         else:
 918             modified=str(tuple(modified))
 919         if updateTime > 0:
 920             self.db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, modified, key) )
 921         else:
 922             self.db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, modified, key) )
 923
 924         if title is not None:
 925             self.db.execute("UPDATE feeds SET title=(case WHEN title=='' THEN ? ELSE title END) where id=?;",
 926                             (title, key))
 927         self.db.commit()
 928         self.updateUnread(key)
 929
 930         update_server_object().ArticleCountUpdated()
 931
 932         stats = JobManager().stats()
 933         global jobs_at_start
 934         completed = stats['jobs-completed'] - jobs_at_start
 935         in_progress = stats['jobs-in-progress']
 936         queued = stats['jobs-queued']
 937
 938         percent = (100 * ((completed + in_progress / 2.))
 939                    / (completed + in_progress + queued))
 940
 941         update_server_object().UpdateProgress(
 942             percent, completed, in_progress, queued, 0, 0, 0, key)
 943
 944         if in_progress == 0 and queued == 0:
 945             jobs_at_start = stats['jobs-completed']
 946
 947     def getFeed(self, key):
 948         if key == "ArchivedArticles":
 949             return ArchivedArticles(self.configdir, key)
 950         return Feed(self.configdir, key)
 951
 952     def editFeed(self, key, title, url, category=None):
 953         if category:
 954             self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
 955         else:
 956             self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
 957         self.db.commit()
 958
 959         if wc().available():
 960             try:
 961                 wc()[key].human_readable_name = title
 962             except KeyError:
 963                 logger.debug("Feed %s (%s) unknown." % (key, title))
 964
 965     def getFeedUpdateTime(self, key):
 966         return time.ctime(self.db.execute("SELECT updateTime FROM feeds WHERE id=?;", (key,)).fetchone()[0])
 967
 968     def getFeedNumberOfUnreadItems(self, key):
 969         return self.db.execute("SELECT unread FROM feeds WHERE id=?;", (key,)).fetchone()[0]
 970
 971     def getFeedTitle(self, key):
 972         (title, url) = self.db.execute("SELECT title, url FROM feeds WHERE id=?;", (key,)).fetchone()
 973         if title:
 974             return title
 975         return url
 976
 977     def getFeedUrl(self, key):
 978         return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
 979
 980     def getFeedCategory(self, key):
 981         return self.db.execute("SELECT category FROM feeds WHERE id=?;", (key,)).fetchone()[0]
 982
 983     def getListOfFeeds(self, category=None):
 984         if category:
 985             rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
 986         else:
 987             rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
 988         keys = []
 989         for row in rows:
 990             if row[0]:
 991                 keys.append(row[0])
 992         return keys
 993
 994     def getListOfCategories(self):
 995         rows = self.db.execute("SELECT id FROM categories ORDER BY rank;" )
 996         keys = []
 997         for row in rows:
 998             if row[0]:
 999                 keys.append(row[0])
1000         return keys
1001
1002     def getCategoryTitle(self, id):
1003         row = self.db.execute("SELECT title FROM categories WHERE id=?;", (id, )).fetchone()
1004         return row[0]
1005
1006     def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
1007         if   order == "Most unread":
1008             tmp = "ORDER BY unread DESC"
1009             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
1010         elif order == "Least unread":
1011             tmp = "ORDER BY unread"
1012             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
1013         elif order == "Most recent":
1014             tmp = "ORDER BY updateTime DESC"
1015             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
1016         elif order == "Least recent":
1017             tmp = "ORDER BY updateTime"
1018             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
1019         else: # order == "Manual" or invalid value...
1020             tmp = "ORDER BY rank"
1021             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
1022         if onlyUnread:
1023             sql = "SELECT id FROM feeds WHERE unread>0 AND category=%s " %category + tmp
1024         else:
1025             sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
1026         rows = self.db.execute(sql)
1027         keys = []
1028         for row in rows:
1029             if row[0]:
1030                 keys.append(row[0])
1031         return keys
1032
1033     def getFavicon(self, key):
1034         filename = "%s%s.d/favicon.ico" % (self.configdir, key)
1035         if isfile(filename):
1036             return filename
1037         else:
1038             return False
1039
1040     def updateUnread(self, key):
1041         feed = self.getFeed(key)
1042         self.db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
1043         self.db.commit()
1044
1045     def addFeed(self, title, url, id=None, category=1):
1046         if not id:
1047             id = getId(url)
1048         count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
1049         if count == 0:
1050             max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1051             if max_rank == None:
1052                 max_rank = 0
1053             values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
1054             self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
1055             self.db.commit()
1056             # Ask for the feed object, it will create the necessary tables
1057             self.getFeed(id)
1058
1059             if wc().available():
1060                 # Register the stream with Woodchuck.  Update approximately
1061                 # every 6 hours.
1062                 wc().stream_register(stream_identifier=id,
1063                                      human_readable_name=title,
1064                                      freshness=6*60*60)
1065
1066             return True
1067         else:
1068             return False
1069
1070     def addCategory(self, title):
1071         rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
1072         if rank==None:
1073             rank=1
1074         id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
1075         if id==None:
1076             id=1
1077         self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
1078         self.db.commit()
1079
1080     def removeFeed(self, key):
1081         if wc().available ():
1082             try:
1083                 del wc()[key]
1084             except KeyError:
1085                 logger.debug("Removing unregistered feed %s failed" % (key,))
1086
1087         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
1088         self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
1089         self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
1090         self.db.commit()
1091
1092         if isdir(self.configdir+key+".d/"):
1093            rmtree(self.configdir+key+".d/")
1094
1095     def removeCategory(self, key):
1096         if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
1097             rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
1098             self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
1099             self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
1100             self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
1101             self.db.commit()
1102
1103     #def saveConfig(self):
1104     #    self.listOfFeeds["feedingit-order"] = self.sortedKeys
1105     #    file = open(self.configdir+"feeds.pickle", "w")
1106     #    pickle.dump(self.listOfFeeds, file)
1107     #    file.close()
1108
1109     def moveUp(self, key):
1110         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1111         if rank>0:
1112             self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
1113             self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
1114             self.db.commit()
1115
1116     def moveCategoryUp(self, key):
1117         rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1118         if rank>0:
1119             self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
1120             self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
1121             self.db.commit()
1122
1123     def moveDown(self, key):
1124         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1125         max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1126         if rank<max_rank:
1127             self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
1128             self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
1129             self.db.commit()
1130
1131     def moveCategoryDown(self, key):
1132         rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1133         max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
1134         if rank<max_rank:
1135             self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
1136             self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )
1137             self.db.commit()
1138
1139