git.maemo.org Git - feedingit/blob - src/rss_sqlite.py

   1 #!/usr/bin/env python2.5
   2
   3 #
   4 # Copyright (c) 2007-2008 INdT.
   5 # Copyright (c) 2011 Neal H. Walfield
   6 # This program is free software: you can redistribute it and/or modify
   7 # it under the terms of the GNU Lesser General Public License as published by
   8 # the Free Software Foundation, either version 3 of the License, or
   9 # (at your option) any later version.
  10 #
  11 #  This program is distributed in the hope that it will be useful,
  12 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 #  GNU Lesser General Public License for more details.
  15 #
  16 #  You should have received a copy of the GNU Lesser General Public License
  17 #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18 #
  19
  20 # ============================================================================
  21 # Name        : FeedingIt.py
  22 # Author      : Yves Marcoz
  23 # Version     : 0.5.4
  24 # Description : Simple RSS Reader
  25 # ============================================================================
  26
  27 import sqlite3
  28 from os.path import isfile, isdir
  29 from shutil import rmtree
  30 from os import mkdir, remove, utime
  31 import os
  32 import md5
  33 import feedparser
  34 import time
  35 import urllib2
  36 from BeautifulSoup import BeautifulSoup
  37 from urlparse import urljoin
  38 from calendar import timegm
  39 from updatedbus import get_lock, release_lock
  40 import threading
  41 import traceback
  42 from wc import wc, wc_init
  43 try:
  44     import woodchuck
  45 except:
  46     woodchuck = None
  47
  48 from jobmanager import JobManager
  49 import mainthread
  50 from httpprogresshandler import HTTPProgressHandler
  51 import random
  52 import sys
  53
  54 def getId(string):
  55     return md5.new(string).hexdigest()
  56
  57 def download_callback(connection):
  58     if JobManager().do_quit:
  59         raise KeyboardInterrupt
  60
  61 def downloader(progress_handler=None, proxy=None):
  62     openers = []
  63
  64     if progress_handler:
  65         openers.append (progress_handler)
  66     else:
  67         openers.append(HTTPProgressHandler(download_callback))
  68
  69     if proxy:
  70         openers.append (proxy)
  71
  72     return urllib2.build_opener (*openers)
  73
  74 class Feed:
  75     serial_execution_lock = threading.Lock()
  76
  77     def _getdb(self):
  78         try:
  79             db = self.tls.db
  80         except AttributeError:
  81             db = sqlite3.connect("%s/%s.db" % (self.dir, self.key), timeout=120)
  82             self.tls.db = db
  83         return db
  84     db = property(_getdb)
  85
  86     def __init__(self, configdir, key):
  87         self.key = key
  88         self.configdir = configdir
  89         self.dir = "%s/%s.d" %(self.configdir, self.key)
  90         self.tls = threading.local ()
  91
  92         if not isdir(self.dir):
  93             mkdir(self.dir)
  94         if not isfile("%s/%s.db" %(self.dir, self.key)):
  95             self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
  96             self.db.execute("CREATE TABLE images (id text, imagePath text);")
  97             self.db.commit()
  98
  99     def addImage(self, configdir, key, baseurl, url, proxy=None, opener=None):
 100         filename = configdir+key+".d/"+getId(url)
 101         if not isfile(filename):
 102             try:
 103                 if not opener:
 104                     opener = downloader(proxy=proxy)
 105
 106                 abs_url = urljoin(baseurl,url)
 107                 f = opener.open(abs_url)
 108                 outf = open(filename, "w")
 109                 outf.write(f.read())
 110                 f.close()
 111                 outf.close()
 112             except (urllib2.HTTPError, urllib2.URLError, IOError), exception:
 113                 print ("Could not download image %s: %s"
 114                        % (abs_url, str (exception)))
 115                 return None
 116             except:
 117                 exception = sys.exc_info()[0]
 118
 119                 print "Downloading image: %s" % abs_url
 120                 traceback.print_exc()
 121
 122                 try:
 123                     remove(filename)
 124                 except OSError:
 125                     pass
 126
 127                 raise exception
 128         else:
 129             #open(filename,"a").close()  # "Touch" the file
 130             file = open(filename,"a")
 131             utime(filename, None)
 132             file.close()
 133         return filename
 134
 135     def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
 136         def doit():
 137             def it():
 138                 self._updateFeed(configdir, url, etag, modified, expiryTime, proxy, imageCache, postFeedUpdateFunc, *postFeedUpdateFuncArgs)
 139             return it
 140         JobManager().execute(doit(), self.key, priority=priority)
 141
 142     def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
 143         success = False
 144         have_serial_execution_lock = False
 145         try:
 146             update_lock = None
 147             update_lock = get_lock("key")
 148             if not update_lock:
 149                 # Someone else is doing an update.
 150                 return
 151
 152             download_start = time.time ()
 153
 154             progress_handler = HTTPProgressHandler(download_callback)
 155
 156             openers = [progress_handler]
 157             if proxy:
 158                 openers.append (proxy)
 159             kwargs = {'handlers':openers}
 160
 161             tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs)
 162             download_duration = time.time () - download_start
 163
 164             opener = downloader(progress_handler, proxy)
 165
 166             if JobManager().do_quit:
 167                 raise KeyboardInterrupt
 168
 169             process_start = time.time()
 170
 171             # Expiry time is in hours
 172             expiry = float(expiryTime) * 3600.
 173
 174             currentTime = 0
 175
 176             have_woodchuck = mainthread.execute (wc().available)
 177
 178             def wc_success():
 179                 try:
 180                     wc().stream_register (self.key, "", 6 * 60 * 60)
 181                 except woodchuck.ObjectExistsError:
 182                     pass
 183                 try:
 184                     wc()[self.key].updated (
 185                         indicator=(woodchuck.Indicator.ApplicationVisual
 186                                    |woodchuck.Indicator.StreamWide),
 187                         transferred_down=progress_handler.stats['received'],
 188                         transferred_up=progress_handler.stats['sent'],
 189                         transfer_time=download_start,
 190                         transfer_duration=download_duration,
 191                         new_objects=len (tmp.entries),
 192                         objects_inline=len (tmp.entries))
 193                 except KeyError:
 194                     print "Failed to register update with woodchuck!"
 195                     pass
 196
 197             http_status = tmp.get ('status', 200)
 198
 199             # Check if the parse was succesful.  If the http status code
 200             # is 304, then the download was successful, but there is
 201             # nothing new.  Indeed, no content is returned.  This make a
 202             # 304 look like an error because there are no entries and the
 203             # parse fails.  But really, everything went great!  Check for
 204             # this first.
 205             if http_status == 304:
 206                 print "%s: No changes to feed." % (self.key,)
 207                 mainthread.execute (wc_success, async=True)
 208                 success = True
 209             elif len(tmp["entries"])==0 and not tmp.version:
 210                 # An error occured fetching or parsing the feed.  (Version
 211                 # will be either None if e.g. the connection timed our or
 212                 # '' if the data is not a proper feed)
 213                 print ("Error fetching %s: version is: %s: error: %s"
 214                        % (url, str (tmp.version),
 215                           str (tmp.get ('bozo_exception', 'Unknown error'))))
 216                 print tmp
 217                 if have_woodchuck:
 218                     def e():
 219                         print "%s: stream update failed!" % self.key
 220
 221                         try:
 222                             # It's not easy to get the feed's title from here.
 223                             # At the latest, the next time the application is
 224                             # started, we'll fix up the human readable name.
 225                             wc().stream_register (self.key, "", 6 * 60 * 60)
 226                         except woodchuck.ObjectExistsError:
 227                             pass
 228                         ec = woodchuck.TransferStatus.TransientOther
 229                         if 300 <= http_status and http_status < 400:
 230                             ec = woodchuck.TransferStatus.TransientNetwork
 231                         if 400 <= http_status and http_status < 500:
 232                             ec = woodchuck.TransferStatus.FailureGone
 233                         if 500 <= http_status and http_status < 600:
 234                             ec = woodchuck.TransferStatus.TransientNetwork
 235                         wc()[self.key].update_failed(ec)
 236                     mainthread.execute (e, async=True)
 237             else:
 238                currentTime = time.time()
 239                # The etag and modified value should only be updated if the content was not null
 240                try:
 241                    etag = tmp["etag"]
 242                except KeyError:
 243                    etag = None
 244                try:
 245                    modified = tmp["modified"]
 246                except KeyError:
 247                    modified = None
 248                try:
 249                    abs_url = urljoin(tmp["feed"]["link"],"/favicon.ico")
 250                    f = opener.open(abs_url)
 251                    data = f.read()
 252                    f.close()
 253                    outf = open(self.dir+"/favicon.ico", "w")
 254                    outf.write(data)
 255                    outf.close()
 256                    del data
 257                except (urllib2.HTTPError, urllib2.URLError), exception:
 258                    print ("Could not download favicon %s: %s"
 259                           % (abs_url, str (exception)))
 260
 261                self.serial_execution_lock.acquire ()
 262                have_serial_execution_lock = True
 263
 264                #reversedEntries = self.getEntries()
 265                #reversedEntries.reverse()
 266
 267                ids = self.getIds()
 268
 269                tmp["entries"].reverse()
 270                for entry in tmp["entries"]:
 271                    # Yield so as to make the main thread a bit more
 272                    # responsive.
 273                    time.sleep(0)
 274
 275                    if JobManager().do_quit:
 276                        raise KeyboardInterrupt
 277
 278                    received_base = progress_handler.stats['received']
 279                    sent_base = progress_handler.stats['sent']
 280                    object_size = 0
 281
 282                    date = self.extractDate(entry)
 283                    try:
 284                        entry["title"]
 285                    except KeyError:
 286                        entry["title"] = "No Title"
 287                    try :
 288                        entry["link"]
 289                    except KeyError:
 290                        entry["link"] = ""
 291                    try:
 292                        entry["author"]
 293                    except KeyError:
 294                        entry["author"] = None
 295                    if(not(entry.has_key("id"))):
 296                        entry["id"] = None
 297                    content = self.extractContent(entry)
 298                    object_size = len (content)
 299                    received_base -= len (content)
 300                    tmpEntry = {"title":entry["title"], "content":content,
 301                                 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
 302                    id = self.generateUniqueId(tmpEntry)
 303
 304                    #articleTime = time.mktime(self.entries[id]["dateTuple"])
 305                    soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
 306                    images = soup('img')
 307                    baseurl = tmpEntry["link"]
 308                    #if not id in ids:
 309                    if imageCache and len(images) > 0:
 310                        self.serial_execution_lock.release ()
 311                        have_serial_execution_lock = False
 312                        for img in images:
 313                             filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
 314                             if filename:
 315                                 img['src']="file://%s" %filename
 316                                 count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
 317                                 if count == 0:
 318                                     self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
 319                                     self.db.commit()
 320
 321                                 try:
 322                                     object_size += os.path.getsize (filename)
 323                                 except os.error, exception:
 324                                     print ("Error getting size of %s: %s"
 325                                            % (filename, exception))
 326                                     pass
 327                        self.serial_execution_lock.acquire ()
 328                        have_serial_execution_lock = True
 329
 330                    tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
 331                    file = open(tmpEntry["contentLink"], "w")
 332                    file.write(soup.prettify())
 333                    file.close()
 334                    if id in ids:
 335                        self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
 336                        self.db.commit()
 337                    else:
 338                        values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
 339                        self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
 340                        self.db.commit()
 341 #                   else:
 342 #                       try:
 343 #                           self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
 344 #                           self.db.commit()
 345 #                           filename = configdir+self.key+".d/"+id+".html"
 346 #                           file = open(filename,"a")
 347 #                           utime(filename, None)
 348 #                           file.close()
 349 #                           images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
 350 #                           for image in images:
 351 #                                file = open(image[0],"a")
 352 #                                utime(image[0], None)
 353 #                                file.close()
 354 #                       except:
 355 #                           pass
 356
 357                    # Register the object with Woodchuck and mark it as
 358                    # downloaded.
 359                    if have_woodchuck:
 360                        def e():
 361                            try:
 362                                obj = wc()[self.key].object_register(
 363                                    object_identifier=id,
 364                                    human_readable_name=tmpEntry["title"])
 365                            except woodchuck.ObjectExistsError:
 366                                obj = wc()[self.key][id]
 367                            else:
 368                                # If the entry does not contain a publication
 369                                # time, the attribute won't exist.
 370                                pubtime = entry.get ('date_parsed', None)
 371                                if pubtime:
 372                                    obj.publication_time = time.mktime (pubtime)
 373
 374                                received = (progress_handler.stats['received']
 375                                            - received_base)
 376                                sent = progress_handler.stats['sent'] - sent_base
 377                                obj.transferred (
 378                                    indicator=(woodchuck.Indicator.ApplicationVisual
 379                                               |woodchuck.Indicator.StreamWide),
 380                                    transferred_down=received,
 381                                    transferred_up=sent,
 382                                    object_size=object_size)
 383                        mainthread.execute(e, async=True)
 384                self.db.commit()
 385
 386                print ("%s: Update successful: transferred: %d/%d; objects: %d)"
 387                       % (self.key,
 388                          progress_handler.stats['sent'],
 389                          progress_handler.stats['received'],
 390                          len (tmp.entries)))
 391                mainthread.execute (wc_success, async=True)
 392                success = True
 393
 394             rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
 395             for row in rows:
 396                self.removeEntry(row[0])
 397
 398             from glob import glob
 399             from os import stat
 400             for file in glob(configdir+self.key+".d/*"):
 401                 #
 402                 stats = stat(file)
 403                 #
 404                 # put the two dates into matching format
 405                 #
 406                 lastmodDate = stats[8]
 407                 #
 408                 expDate = time.time()-expiry*3
 409                 # check if image-last-modified-date is outdated
 410                 #
 411                 if expDate > lastmodDate:
 412                     #
 413                     try:
 414                         #
 415                         #print 'Removing', file
 416                         #
 417                         # XXX: Tell woodchuck.
 418                         remove(file) # commented out for testing
 419                         #
 420                     except OSError, exception:
 421                         #
 422                         print 'Could not remove %s: %s' % (file, str (exception))
 423             print ("updated %s: %fs in download, %fs in processing"
 424                    % (self.key, download_duration,
 425                       time.time () - process_start))
 426         except:
 427             print "Updating %s: %s" % (self.key, sys.exc_info()[0])
 428             traceback.print_exc()
 429         finally:
 430             self.db.commit ()
 431
 432             if have_serial_execution_lock:
 433                 self.serial_execution_lock.release ()
 434
 435             if update_lock is not None:
 436                 release_lock (update_lock)
 437
 438             updateTime = 0
 439             try:
 440                 rows = self.db.execute("SELECT MAX(date) FROM feed;")
 441                 for row in rows:
 442                     updateTime=row[0]
 443             except:
 444                 print "Fetching update time."
 445                 traceback.print_exc()
 446             finally:
 447                 if not success:
 448                     etag = None
 449                     modified = None
 450                 title = None
 451                 try:
 452                     title = tmp.feed.title
 453                 except (AttributeError, UnboundLocalError), exception:
 454                     pass
 455                 if postFeedUpdateFunc is not None:
 456                     postFeedUpdateFunc (self.key, updateTime, etag, modified,
 457                                         title, *postFeedUpdateFuncArgs)
 458
 459     def setEntryRead(self, id):
 460         self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
 461         self.db.commit()
 462
 463         def e():
 464             if wc().available():
 465                 try:
 466                     wc()[self.key][id].used()
 467                 except KeyError:
 468                     pass
 469
 470     def setEntryUnread(self, id):
 471         self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
 472         self.db.commit()
 473
 474     def markAllAsRead(self):
 475         self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
 476         self.db.commit()
 477
 478     def isEntryRead(self, id):
 479         read_status = self.db.execute("SELECT read FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 480         return read_status==1  # Returns True if read==1, and False if read==0
 481
 482     def getTitle(self, id):
 483         return self.db.execute("SELECT title FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 484
 485     def getContentLink(self, id):
 486         return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 487
 488     def getExternalLink(self, id):
 489         return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 490
 491     def getDate(self, id):
 492         dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 493         return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
 494
 495     def getDateTuple(self, id):
 496         dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 497         return time.localtime(dateStamp)
 498
 499     def getDateStamp(self, id):
 500         return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
 501
 502     def generateUniqueId(self, entry):
 503         """
 504         Generate a stable identifier for the article.  For the same
 505         entry, this should result in the same identifier.  If
 506         possible, the identifier should remain the same even if the
 507         article is updated.
 508         """
 509         # Prefer the entry's id, which is supposed to be globally
 510         # unique.
 511         key = entry.get('id', None)
 512         if not key:
 513             # Next, try the link to the content.
 514             key = entry.get('link', None)
 515         if not key:
 516             # Ok, the title and the date concatenated are likely to be
 517             # relatively stable.
 518             key = entry.get('title', None) + entry.get('date', None)
 519         if not key:
 520             # Hmm, the article's content will at least guarantee no
 521             # false negatives (i.e., missing articles)
 522             key = entry.get('content', None)
 523         if not key:
 524             # If all else fails, just use a random number.
 525             key = str (random.random ())
 526         return getId (key)
 527
 528     def getIds(self, onlyUnread=False):
 529         if onlyUnread:
 530             rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
 531         else:
 532             rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
 533         ids = []
 534         for row in rows:
 535             ids.append(row[0])
 536         #ids.reverse()
 537         return ids
 538
 539     def getNextId(self, id):
 540         ids = self.getIds()
 541         index = ids.index(id)
 542         return ids[(index+1)%len(ids)]
 543
 544     def getPreviousId(self, id):
 545         ids = self.getIds()
 546         index = ids.index(id)
 547         return ids[(index-1)%len(ids)]
 548
 549     def getNumberOfUnreadItems(self):
 550         return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
 551
 552     def getNumberOfEntries(self):
 553         return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
 554
 555     def getArticle(self, entry):
 556         #self.setEntryRead(id)
 557         #entry = self.entries[id]
 558         title = entry['title']
 559         #content = entry.get('content', entry.get('summary_detail', {}))
 560         content = entry["content"]
 561
 562         link = entry['link']
 563         author = entry['author']
 564         date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
 565
 566         #text = '''<div style="color: black; background-color: white;">'''
 567         text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
 568         text += "<html><head><title>" + title + "</title>"
 569         text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
 570         #text += '<style> body {-webkit-user-select: none;} </style>'
 571         text += '</head><body bgcolor=\"#ffffff\"><div><a href=\"' + link + '\">' + title + "</a>"
 572         if author != None:
 573             text += "<BR /><small><i>Author: " + author + "</i></small>"
 574         text += "<BR /><small><i>Date: " + date + "</i></small></div>"
 575         text += "<BR /><BR />"
 576         text += content
 577         text += "</body></html>"
 578         return text
 579
 580     def getContent(self, id):
 581         contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
 582         try:
 583             file = open(self.entries[id]["contentLink"])
 584             content = file.read()
 585             file.close()
 586         except:
 587             content = "Content unavailable"
 588         return content
 589
 590     def extractDate(self, entry):
 591         if entry.has_key("updated_parsed"):
 592             return timegm(entry["updated_parsed"])
 593         elif entry.has_key("published_parsed"):
 594             return timegm(entry["published_parsed"])
 595         else:
 596             return time.time()
 597
 598     def extractContent(self, entry):
 599         content = ""
 600         if entry.has_key('summary'):
 601             content = entry.get('summary', '')
 602         if entry.has_key('content'):
 603             if len(entry.content[0].value) > len(content):
 604                 content = entry.content[0].value
 605         if content == "":
 606             content = entry.get('description', '')
 607         return content
 608
 609     def removeEntry(self, id):
 610         contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
 611         if contentLink:
 612             try:
 613                 remove(contentLink)
 614             except OSError, exception:
 615                 print "Deleting %s: %s" % (contentLink, str (exception))
 616         self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
 617         self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
 618         self.db.commit()
 619
 620         def e():
 621             if wc().available():
 622                 try:
 623                     wc()[self.key][id].files_deleted (
 624                         woodchuck.DeletionResponse.Deleted)
 625                     del wc()[self.key][id]
 626                 except KeyError:
 627                     pass
 628         mainthread.execute (e, async=True)
 629
 630 class ArchivedArticles(Feed):
 631     def addArchivedArticle(self, title, link, date, configdir):
 632         id = self.generateUniqueId({"date":date, "title":title})
 633         values = (id, title, link, date, 0, link, 0)
 634         self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
 635         self.db.commit()
 636
 637     def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
 638         currentTime = 0
 639         rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
 640         for row in rows:
 641             currentTime = time.time()
 642             id = row[0]
 643             link = row[1]
 644             f = urllib2.urlopen(link)
 645             #entry["content"] = f.read()
 646             html = f.read()
 647             f.close()
 648             soup = BeautifulSoup(html)
 649             images = soup('img')
 650             baseurl = link
 651             for img in images:
 652                 filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
 653                 img['src']=filename
 654                 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
 655                 self.db.commit()
 656             contentLink = configdir+self.key+".d/"+id+".html"
 657             file = open(contentLink, "w")
 658             file.write(soup.prettify())
 659             file.close()
 660
 661             self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
 662             self.db.commit()
 663         return (currentTime, None, None)
 664
 665     def purgeReadArticles(self):
 666         rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
 667         #ids = self.getIds()
 668         for row in rows:
 669             self.removeArticle(row[0])
 670
 671     def removeArticle(self, id):
 672         rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
 673         for row in rows:
 674             try:
 675                 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
 676                 if count == 0:
 677                     os.remove(row[0])
 678             except:
 679                 pass
 680         self.removeEntry(id)
 681
 682 class Listing:
 683     def _getdb(self):
 684         try:
 685             db = self.tls.db
 686         except AttributeError:
 687             db = sqlite3.connect("%s/feeds.db" % self.configdir, timeout=120)
 688             self.tls.db = db
 689         return db
 690     db = property(_getdb)
 691
 692     # Lists all the feeds in a dictionary, and expose the data
 693     def __init__(self, config, configdir):
 694         self.config = config
 695         self.configdir = configdir
 696
 697         self.tls = threading.local ()
 698
 699         try:
 700             table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
 701             if table == None:
 702                 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
 703                 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
 704                 self.addCategory("Default Category")
 705                 if isfile(self.configdir+"feeds.pickle"):
 706                     self.importOldFormatFeeds()
 707                 else:
 708                     self.addFeed("Maemo News", "http://maemo.org/news/items.xml")
 709             else:
 710                 from string import find, upper
 711                 if find(upper(table[0]), "WIDGET")<0:
 712                     self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
 713                     self.db.execute("UPDATE feeds SET widget=1;")
 714                     self.db.commit()
 715                 if find(upper(table[0]), "CATEGORY")<0:
 716                     self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
 717                     self.addCategory("Default Category")
 718                     self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
 719                     self.db.execute("UPDATE feeds SET category=1;")
 720             self.db.commit()
 721         except:
 722             pass
 723
 724         # Check that Woodchuck's state is up to date with respect our
 725         # state.
 726         wc_init (self)
 727         if wc().available():
 728             # The list of known streams.
 729             streams = wc().streams_list ()
 730             stream_ids = [s.identifier for s in streams]
 731
 732             # Register any unknown streams.  Remove known streams from
 733             # STREAMS_IDS.
 734             for key in self.getListOfFeeds():
 735                 title = self.getFeedTitle(key)
 736                 # XXX: We should also check whether the list of
 737                 # articles/objects in each feed/stream is up to date.
 738                 if key not in stream_ids:
 739                     print ("Registering previously unknown channel: %s (%s)"
 740                            % (key, title,))
 741                     # Use a default refresh interval of 6 hours.
 742                     wc().stream_register (key, title, 6 * 60 * 60)
 743                 else:
 744                     # Make sure the human readable name is up to date.
 745                     if wc()[key].human_readable_name != title:
 746                         wc()[key].human_readable_name = title
 747                     stream_ids.remove (key)
 748
 749
 750             # Unregister any streams that are no longer subscribed to.
 751             for id in stream_ids:
 752                 print ("Unregistering %s" % (id,))
 753                 w.stream_unregister (id)
 754
 755     def importOldFormatFeeds(self):
 756         """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
 757         import rss
 758         listing = rss.Listing(self.configdir)
 759         rank = 0
 760         for id in listing.getListOfFeeds():
 761             try:
 762                 rank += 1
 763                 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
 764                 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
 765                 self.db.commit()
 766
 767                 feed = listing.getFeed(id)
 768                 new_feed = self.getFeed(id)
 769
 770                 items = feed.getIds()[:]
 771                 items.reverse()
 772                 for item in items:
 773                         if feed.isEntryRead(item):
 774                             read_status = 1
 775                         else:
 776                             read_status = 0
 777                         date = timegm(feed.getDateTuple(item))
 778                         title = feed.getTitle(item)
 779                         newId = new_feed.generateUniqueId({"date":date, "title":title})
 780                         values = (newId, title , feed.getContentLink(item), date, tuple(time.time()), feed.getExternalLink(item), read_status)
 781                         new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
 782                         new_feed.db.commit()
 783                         try:
 784                             images = feed.getImages(item)
 785                             for image in images:
 786                                 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
 787                                 new_feed.db.commit()
 788                         except:
 789                             pass
 790                 self.updateUnread(id)
 791             except:
 792                 traceback.print_exc()
 793         remove(self.configdir+"feeds.pickle")
 794
 795
 796     def addArchivedArticle(self, key, index):
 797         feed = self.getFeed(key)
 798         title = feed.getTitle(index)
 799         link = feed.getExternalLink(index)
 800         date = feed.getDate(index)
 801         count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
 802         if count == 0:
 803             self.addFeed("Archived Articles", "", id="ArchivedArticles")
 804
 805         archFeed = self.getFeed("ArchivedArticles")
 806         archFeed.addArchivedArticle(title, link, date, self.configdir)
 807         self.updateUnread("ArchivedArticles")
 808
 809     def updateFeed(self, key, expiryTime=None, proxy=None, imageCache=None,
 810                    priority=0):
 811         if expiryTime is None:
 812             expiryTime = self.config.getExpiry()
 813         if not expiryTime:
 814             # Default to 24 hours
 815             expriyTime = 24
 816         if proxy is None:
 817             (use_proxy, proxy) = self.config.getProxy()
 818             if not use_proxy:
 819                 proxy = None
 820         if imageCache is None:
 821             imageCache = self.config.getImageCache()
 822
 823         feed = self.getFeed(key)
 824         (url, etag, modified) = self.db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
 825         try:
 826             modified = time.struct_time(eval(modified))
 827         except:
 828             modified = None
 829         feed.updateFeed(
 830             self.configdir, url, etag, modified, expiryTime, proxy, imageCache,
 831             priority, postFeedUpdateFunc=self._queuePostFeedUpdate)
 832
 833     def _queuePostFeedUpdate(self, *args, **kwargs):
 834         mainthread.execute (self._postFeedUpdate, async=True, *args, **kwargs)
 835
 836     def _postFeedUpdate(self, key, updateTime, etag, modified, title):
 837         if modified==None:
 838             modified="None"
 839         else:
 840             modified=str(tuple(modified))
 841         if updateTime > 0:
 842             self.db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, modified, key) )
 843         else:
 844             self.db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, modified, key) )
 845
 846         if title is not None:
 847             self.db.execute("UPDATE feeds SET title=(case WHEN title=='' THEN ? ELSE title END) where id=?;",
 848                             (title, key))
 849         self.db.commit()
 850         self.updateUnread(key)
 851
 852     def getFeed(self, key):
 853         if key == "ArchivedArticles":
 854             return ArchivedArticles(self.configdir, key)
 855         return Feed(self.configdir, key)
 856
 857     def editFeed(self, key, title, url, category=None):
 858         if category:
 859             self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
 860         else:
 861             self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
 862         self.db.commit()
 863
 864         if wc().available():
 865             try:
 866                 wc()[key].human_readable_name = title
 867             except KeyError:
 868                 print "Feed %s (%s) unknown." % (key, title)
 869                 pass
 870
 871     def getFeedUpdateTime(self, key):
 872         return time.ctime(self.db.execute("SELECT updateTime FROM feeds WHERE id=?;", (key,)).fetchone()[0])
 873
 874     def getFeedNumberOfUnreadItems(self, key):
 875         return self.db.execute("SELECT unread FROM feeds WHERE id=?;", (key,)).fetchone()[0]
 876
 877     def getFeedTitle(self, key):
 878         (title, url) = self.db.execute("SELECT title, url FROM feeds WHERE id=?;", (key,)).fetchone()
 879         if title:
 880             return title
 881         return url
 882
 883     def getFeedUrl(self, key):
 884         return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
 885
 886     def getFeedCategory(self, key):
 887         return self.db.execute("SELECT category FROM feeds WHERE id=?;", (key,)).fetchone()[0]
 888
 889     def getListOfFeeds(self, category=None):
 890         if category:
 891             rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
 892         else:
 893             rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
 894         keys = []
 895         for row in rows:
 896             if row[0]:
 897                 keys.append(row[0])
 898         return keys
 899
 900     def getListOfCategories(self):
 901         rows = self.db.execute("SELECT id FROM categories ORDER BY rank;" )
 902         keys = []
 903         for row in rows:
 904             if row[0]:
 905                 keys.append(row[0])
 906         return keys
 907
 908     def getCategoryTitle(self, id):
 909         row = self.db.execute("SELECT title FROM categories WHERE id=?;", (id, )).fetchone()
 910         return row[0]
 911
 912     def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
 913         if   order == "Most unread":
 914             tmp = "ORDER BY unread DESC"
 915             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
 916         elif order == "Least unread":
 917             tmp = "ORDER BY unread"
 918             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
 919         elif order == "Most recent":
 920             tmp = "ORDER BY updateTime DESC"
 921             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
 922         elif order == "Least recent":
 923             tmp = "ORDER BY updateTime"
 924             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
 925         else: # order == "Manual" or invalid value...
 926             tmp = "ORDER BY rank"
 927             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
 928         if onlyUnread:
 929             sql = "SELECT id FROM feeds WHERE unread>0 AND category=%s " %category + tmp
 930         else:
 931             sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
 932         rows = self.db.execute(sql)
 933         keys = []
 934         for row in rows:
 935             if row[0]:
 936                 keys.append(row[0])
 937         return keys
 938
 939     def getFavicon(self, key):
 940         filename = "%s%s.d/favicon.ico" % (self.configdir, key)
 941         if isfile(filename):
 942             return filename
 943         else:
 944             return False
 945
 946     def updateUnread(self, key):
 947         feed = self.getFeed(key)
 948         self.db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
 949         self.db.commit()
 950
 951     def addFeed(self, title, url, id=None, category=1):
 952         if not id:
 953             id = getId(url)
 954         count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
 955         if count == 0:
 956             max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
 957             if max_rank == None:
 958                 max_rank = 0
 959             values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
 960             self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
 961             self.db.commit()
 962             # Ask for the feed object, it will create the necessary tables
 963             self.getFeed(id)
 964
 965             if wc().available():
 966                 # Register the stream with Woodchuck.  Update approximately
 967                 # every 6 hours.
 968                 wc().stream_register(stream_identifier=id,
 969                                      human_readable_name=title,
 970                                      freshness=6*60*60)
 971
 972             return True
 973         else:
 974             return False
 975
 976     def addCategory(self, title):
 977         rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
 978         if rank==None:
 979             rank=1
 980         id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
 981         if id==None:
 982             id=1
 983         self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
 984         self.db.commit()
 985
 986     def removeFeed(self, key):
 987         if wc().available ():
 988             try:
 989                 del wc()[key]
 990             except KeyError:
 991                 print "Removing unregistered feed %s failed" % (key,)
 992
 993         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
 994         self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
 995         self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
 996         self.db.commit()
 997
 998         if isdir(self.configdir+key+".d/"):
 999            rmtree(self.configdir+key+".d/")
1000
1001     def removeCategory(self, key):
1002         if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
1003             rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
1004             self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
1005             self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
1006             self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
1007             self.db.commit()
1008
1009     #def saveConfig(self):
1010     #    self.listOfFeeds["feedingit-order"] = self.sortedKeys
1011     #    file = open(self.configdir+"feeds.pickle", "w")
1012     #    pickle.dump(self.listOfFeeds, file)
1013     #    file.close()
1014
1015     def moveUp(self, key):
1016         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1017         if rank>0:
1018             self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
1019             self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
1020             self.db.commit()
1021
1022     def moveCategoryUp(self, key):
1023         rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1024         if rank>0:
1025             self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
1026             self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
1027             self.db.commit()
1028
1029     def moveDown(self, key):
1030         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1031         max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1032         if rank<max_rank:
1033             self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
1034             self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
1035             self.db.commit()
1036
1037     def moveCategoryDown(self, key):
1038         rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1039         max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
1040         if rank<max_rank:
1041             self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
1042             self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )
1043             self.db.commit()
1044
1045