If a new feed's title is '', don't abort: set the title to that in the feed.
[feedingit] / src / rss_sqlite.py
1 #!/usr/bin/env python2.5
2
3
4 # Copyright (c) 2007-2008 INdT.
5 # Copyright (c) 2011 Neal H. Walfield
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU Lesser General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
10 #
11 #  This program is distributed in the hope that it will be useful,
12 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
13 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 #  GNU Lesser General Public License for more details.
15 #
16 #  You should have received a copy of the GNU Lesser General Public License
17 #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 #
19
20 # ============================================================================
21 # Name        : FeedingIt.py
22 # Author      : Yves Marcoz
23 # Version     : 0.5.4
24 # Description : Simple RSS Reader
25 # ============================================================================
26
27 import sqlite3
28 from os.path import isfile, isdir
29 from shutil import rmtree
30 from os import mkdir, remove, utime
31 import os
32 import md5
33 import feedparser
34 import time
35 import urllib2
36 from BeautifulSoup import BeautifulSoup
37 from urlparse import urljoin
38 from calendar import timegm
39 from updatedbus import get_lock, release_lock
40 import threading
41 import traceback
42 from wc import wc, wc_init
43 import woodchuck
44 from jobmanager import JobManager
45 import mainthread
46 from httpprogresshandler import HTTPProgressHandler
47 import random
48 import sys
49
50 def getId(string):
51     return md5.new(string).hexdigest()
52
53 def download_callback(connection):
54     if JobManager().do_quit:
55         raise KeyboardInterrupt
56
57 def downloader(progress_handler=None, proxy=None):
58     openers = []
59
60     if progress_handler:
61         openers.append (progress_handler)
62     else:
63         openers.append(HTTPProgressHandler(download_callback))
64
65     if proxy:
66         openers.append (proxy)
67
68     return urllib2.build_opener (*openers)
69
70 class Feed:
71     serial_execution_lock = threading.Lock()
72
73     def _getdb(self):
74         try:
75             db = self.tls.db
76         except AttributeError:
77             db = sqlite3.connect("%s/%s.db" % (self.dir, self.key), timeout=120)
78             self.tls.db = db
79         return db
80     db = property(_getdb)
81
82     def __init__(self, configdir, key):
83         self.key = key
84         self.configdir = configdir
85         self.dir = "%s/%s.d" %(self.configdir, self.key)
86         self.tls = threading.local ()
87
88         if not isdir(self.dir):
89             mkdir(self.dir)
90         if not isfile("%s/%s.db" %(self.dir, self.key)):
91             self.db.execute("CREATE TABLE feed (id text, title text, contentLink text, date float, updated float, link text, read int);")
92             self.db.execute("CREATE TABLE images (id text, imagePath text);")
93             self.db.commit()
94
95     def addImage(self, configdir, key, baseurl, url, proxy=None, opener=None):
96         filename = configdir+key+".d/"+getId(url)
97         if not isfile(filename):
98             try:
99                 if not opener:
100                     opener = downloader(proxy=proxy)
101
102                 abs_url = urljoin(baseurl,url)
103                 f = opener.open(abs_url)
104                 outf = open(filename, "w")
105                 outf.write(f.read())
106                 f.close()
107                 outf.close()
108             except (urllib2.HTTPError, urllib2.URLError, IOError), exception:
109                 print ("Could not download image %s: %s"
110                        % (abs_url, str (exception)))
111                 return None
112             except:
113                 exception = sys.exc_info()[0]
114
115                 print "Downloading image: %s" % abs_url
116                 traceback.print_exc()
117
118                 try:
119                     remove(filename)
120                 except OSError:
121                     pass
122
123                 raise exception
124         else:
125             #open(filename,"a").close()  # "Touch" the file
126             file = open(filename,"a")
127             utime(filename, None)
128             file.close()
129         return filename
130
131     def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, priority=0, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
132         def doit():
133             def it():
134                 self._updateFeed(configdir, url, etag, modified, expiryTime, proxy, imageCache, postFeedUpdateFunc, *postFeedUpdateFuncArgs)
135             return it
136         JobManager().execute(doit(), self.key, priority=priority)
137
138     def _updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False, postFeedUpdateFunc=None, *postFeedUpdateFuncArgs):
139         success = False
140         have_serial_execution_lock = False
141         try:
142             update_lock = None
143             update_lock = get_lock("key")
144             if not update_lock:
145                 # Someone else is doing an update.
146                 return
147
148             download_start = time.time ()
149
150             progress_handler = HTTPProgressHandler(download_callback)
151
152             openers = [progress_handler]
153             if proxy:
154                 openers.append (proxy)
155             kwargs = {'handlers':openers}
156             
157             tmp=feedparser.parse(url, etag=etag, modified=modified, **kwargs)
158             download_duration = time.time () - download_start
159     
160             opener = downloader(progress_handler, proxy)
161
162             if JobManager().do_quit:
163                 raise KeyboardInterrupt
164
165             process_start = time.time()
166
167             # Expiry time is in hours
168             expiry = float(expiryTime) * 3600.
169     
170             currentTime = 0
171     
172             have_woodchuck = mainthread.execute (wc().available)
173
174             def wc_success():
175                 try:
176                     wc().stream_register (self.key, "", 6 * 60 * 60)
177                 except woodchuck.ObjectExistsError:
178                     pass
179                 try:
180                     wc()[self.key].updated (
181                         indicator=(woodchuck.Indicator.ApplicationVisual
182                                    |woodchuck.Indicator.StreamWide),
183                         transferred_down=progress_handler.stats['received'],
184                         transferred_up=progress_handler.stats['sent'],
185                         transfer_time=download_start,
186                         transfer_duration=download_duration,
187                         new_objects=len (tmp.entries),
188                         objects_inline=len (tmp.entries))
189                 except KeyError:
190                     print "Failed to register update with woodchuck!"
191                     pass
192     
193             http_status = tmp.get ('status', 200)
194     
195             # Check if the parse was succesful.  If the http status code
196             # is 304, then the download was successful, but there is
197             # nothing new.  Indeed, no content is returned.  This make a
198             # 304 look like an error because there are no entries and the
199             # parse fails.  But really, everything went great!  Check for
200             # this first.
201             if http_status == 304:
202                 print "%s: No changes to feed." % (self.key,)
203                 mainthread.execute (wc_success, async=True)
204                 success = True
205             elif len(tmp["entries"])==0 and not tmp.version:
206                 # An error occured fetching or parsing the feed.  (Version
207                 # will be either None if e.g. the connection timed our or
208                 # '' if the data is not a proper feed)
209                 print ("Error fetching %s: version is: %s: error: %s"
210                        % (url, str (tmp.version),
211                           str (tmp.get ('bozo_exception', 'Unknown error'))))
212                 print tmp
213                 if have_woodchuck:
214                     def e():
215                         print "%s: stream update failed!" % self.key
216     
217                         try:
218                             # It's not easy to get the feed's title from here.
219                             # At the latest, the next time the application is
220                             # started, we'll fix up the human readable name.
221                             wc().stream_register (self.key, "", 6 * 60 * 60)
222                         except woodchuck.ObjectExistsError:
223                             pass
224                         ec = woodchuck.TransferStatus.TransientOther
225                         if 300 <= http_status and http_status < 400:
226                             ec = woodchuck.TransferStatus.TransientNetwork
227                         if 400 <= http_status and http_status < 500:
228                             ec = woodchuck.TransferStatus.FailureGone
229                         if 500 <= http_status and http_status < 600:
230                             ec = woodchuck.TransferStatus.TransientNetwork
231                         wc()[self.key].update_failed(ec)
232                     mainthread.execute (e, async=True)
233             else:
234                currentTime = time.time()
235                # The etag and modified value should only be updated if the content was not null
236                try:
237                    etag = tmp["etag"]
238                except KeyError:
239                    etag = None
240                try:
241                    modified = tmp["modified"]
242                except KeyError:
243                    modified = None
244                try:
245                    abs_url = urljoin(tmp["feed"]["link"],"/favicon.ico")
246                    f = opener.open(abs_url)
247                    data = f.read()
248                    f.close()
249                    outf = open(self.dir+"/favicon.ico", "w")
250                    outf.write(data)
251                    outf.close()
252                    del data
253                except (urllib2.HTTPError, urllib2.URLError), exception:
254                    print ("Could not download favicon %s: %s"
255                           % (abs_url, str (exception)))
256     
257                self.serial_execution_lock.acquire ()
258                have_serial_execution_lock = True
259
260                #reversedEntries = self.getEntries()
261                #reversedEntries.reverse()
262     
263                ids = self.getIds()
264     
265                tmp["entries"].reverse()
266                for entry in tmp["entries"]:
267                    # Yield so as to make the main thread a bit more
268                    # responsive.
269                    time.sleep(0)
270     
271                    if JobManager().do_quit:
272                        raise KeyboardInterrupt
273
274                    received_base = progress_handler.stats['received']
275                    sent_base = progress_handler.stats['sent']
276                    object_size = 0
277
278                    date = self.extractDate(entry)
279                    try:
280                        entry["title"]
281                    except KeyError:
282                        entry["title"] = "No Title"
283                    try :
284                        entry["link"]
285                    except KeyError:
286                        entry["link"] = ""
287                    try:
288                        entry["author"]
289                    except KeyError:
290                        entry["author"] = None
291                    if(not(entry.has_key("id"))):
292                        entry["id"] = None
293                    content = self.extractContent(entry)
294                    object_size = len (content)
295                    received_base -= len (content)
296                    tmpEntry = {"title":entry["title"], "content":content,
297                                 "date":date, "link":entry["link"], "author":entry["author"], "id":entry["id"]}
298                    id = self.generateUniqueId(tmpEntry)
299                    
300                    #articleTime = time.mktime(self.entries[id]["dateTuple"])
301                    soup = BeautifulSoup(self.getArticle(tmpEntry)) #tmpEntry["content"])
302                    images = soup('img')
303                    baseurl = tmpEntry["link"]
304                    #if not id in ids:
305                    if imageCache and len(images) > 0:
306                        self.serial_execution_lock.release ()
307                        have_serial_execution_lock = False
308                        for img in images:
309                             filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
310                             if filename:
311                                 img['src']="file://%s" %filename
312                                 count = self.db.execute("SELECT count(1) FROM images where id=? and imagePath=?;", (id, filename )).fetchone()[0]
313                                 if count == 0:
314                                     self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
315                                     self.db.commit()
316     
317                                 try:
318                                     object_size += os.path.getsize (filename)
319                                 except os.error, exception:
320                                     print ("Error getting size of %s: %s"
321                                            % (filename, exception))
322                                     pass
323                        self.serial_execution_lock.acquire ()
324                        have_serial_execution_lock = True
325     
326                    tmpEntry["contentLink"] = configdir+self.key+".d/"+id+".html"
327                    file = open(tmpEntry["contentLink"], "w")
328                    file.write(soup.prettify())
329                    file.close()
330                    if id in ids:
331                        self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
332                        self.db.commit()
333                    else:
334                        values = (id, tmpEntry["title"], tmpEntry["contentLink"], tmpEntry["date"], currentTime, tmpEntry["link"], 0)
335                        self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
336                        self.db.commit()
337 #                   else:
338 #                       try:
339 #                           self.db.execute("UPDATE feed SET updated=? WHERE id=?;", (currentTime, id) )
340 #                           self.db.commit()
341 #                           filename = configdir+self.key+".d/"+id+".html"
342 #                           file = open(filename,"a")
343 #                           utime(filename, None)
344 #                           file.close()
345 #                           images = self.db.execute("SELECT imagePath FROM images where id=?;", (id, )).fetchall()
346 #                           for image in images:
347 #                                file = open(image[0],"a")
348 #                                utime(image[0], None)
349 #                                file.close()
350 #                       except:
351 #                           pass
352     
353                    # Register the object with Woodchuck and mark it as
354                    # downloaded.
355                    if have_woodchuck:
356                        def e():
357                            try:
358                                obj = wc()[self.key].object_register(
359                                    object_identifier=id,
360                                    human_readable_name=tmpEntry["title"])
361                            except woodchuck.ObjectExistsError:
362                                obj = wc()[self.key][id]
363                            else:
364                                # If the entry does not contain a publication
365                                # time, the attribute won't exist.
366                                pubtime = entry.get ('date_parsed', None)
367                                if pubtime:
368                                    obj.publication_time = time.mktime (pubtime)
369         
370                                received = (progress_handler.stats['received']
371                                            - received_base)
372                                sent = progress_handler.stats['sent'] - sent_base
373                                obj.transferred (
374                                    indicator=(woodchuck.Indicator.ApplicationVisual
375                                               |woodchuck.Indicator.StreamWide),
376                                    transferred_down=received,
377                                    transferred_up=sent,
378                                    object_size=object_size)
379                        mainthread.execute(e, async=True)
380                self.db.commit()
381
382                print ("%s: Update successful: transferred: %d/%d; objects: %d)"
383                       % (self.key,
384                          progress_handler.stats['sent'],
385                          progress_handler.stats['received'],
386                          len (tmp.entries)))
387                mainthread.execute (wc_success, async=True)
388                success = True
389
390             rows = self.db.execute("SELECT id FROM feed WHERE (read=0 AND updated<?) OR (read=1 AND updated<?);", (currentTime-2*expiry, currentTime-expiry))
391             for row in rows:
392                self.removeEntry(row[0])
393             
394             from glob import glob
395             from os import stat
396             for file in glob(configdir+self.key+".d/*"):
397                 #
398                 stats = stat(file)
399                 #
400                 # put the two dates into matching format
401                 #
402                 lastmodDate = stats[8]
403                 #
404                 expDate = time.time()-expiry*3
405                 # check if image-last-modified-date is outdated
406                 #
407                 if expDate > lastmodDate:
408                     #
409                     try:
410                         #
411                         #print 'Removing', file
412                         #
413                         # XXX: Tell woodchuck.
414                         remove(file) # commented out for testing
415                         #
416                     except OSError, exception:
417                         #
418                         print 'Could not remove %s: %s' % (file, str (exception))
419             print ("updated %s: %fs in download, %fs in processing"
420                    % (self.key, download_duration,
421                       time.time () - process_start))
422         except:
423             print "Updating %s: %s" % (self.key, sys.exc_info()[0])
424             traceback.print_exc()
425         finally:
426             self.db.commit ()
427
428             if have_serial_execution_lock:
429                 self.serial_execution_lock.release ()
430
431             if update_lock is not None:
432                 release_lock (update_lock)
433
434             updateTime = 0
435             try:
436                 rows = self.db.execute("SELECT MAX(date) FROM feed;")
437                 for row in rows:
438                     updateTime=row[0]
439             except:
440                 print "Fetching update time."
441                 traceback.print_exc()
442             finally:
443                 if not success:
444                     etag = None
445                     modified = None
446                 title = None
447                 try:
448                     title = tmp.feed.title
449                 except (AttributeError, UnboundLocalError), exception:
450                     pass
451                 if postFeedUpdateFunc is not None:
452                     postFeedUpdateFunc (self.key, updateTime, etag, modified,
453                                         title, *postFeedUpdateFuncArgs)
454
455     def setEntryRead(self, id):
456         self.db.execute("UPDATE feed SET read=1 WHERE id=?;", (id,) )
457         self.db.commit()
458
459         def e():
460             if wc().available():
461                 try:
462                     wc()[self.key][id].used()
463                 except KeyError:
464                     pass
465
466     def setEntryUnread(self, id):
467         self.db.execute("UPDATE feed SET read=0 WHERE id=?;", (id,) )
468         self.db.commit()     
469         
470     def markAllAsRead(self):
471         self.db.execute("UPDATE feed SET read=1 WHERE read=0;")
472         self.db.commit()
473
474     def isEntryRead(self, id):
475         read_status = self.db.execute("SELECT read FROM feed WHERE id=?;", (id,) ).fetchone()[0]
476         return read_status==1  # Returns True if read==1, and False if read==0
477     
478     def getTitle(self, id):
479         return self.db.execute("SELECT title FROM feed WHERE id=?;", (id,) ).fetchone()[0]
480     
481     def getContentLink(self, id):
482         return self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,) ).fetchone()[0]
483     
484     def getExternalLink(self, id):
485         return self.db.execute("SELECT link FROM feed WHERE id=?;", (id,) ).fetchone()[0]
486     
487     def getDate(self, id):
488         dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
489         return time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(dateStamp))
490
491     def getDateTuple(self, id):
492         dateStamp = self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
493         return time.localtime(dateStamp)
494     
495     def getDateStamp(self, id):
496         return self.db.execute("SELECT date FROM feed WHERE id=?;", (id,) ).fetchone()[0]
497     
498     def generateUniqueId(self, entry):
499         if(entry["id"] != None):
500             return getId(str(entry["id"]))
501         else:
502             try:
503                 return getId(str(entry["date"]) + str(entry["title"]))
504             except:
505                 #print entry["title"]
506                 return getId(str(entry["date"]))
507     
508     def getIds(self, onlyUnread=False):
509         if onlyUnread:
510             rows = self.db.execute("SELECT id FROM feed where read=0 ORDER BY date DESC;").fetchall()
511         else:
512             rows = self.db.execute("SELECT id FROM feed ORDER BY date DESC;").fetchall()
513         ids = []
514         for row in rows:
515             ids.append(row[0])
516         #ids.reverse()
517         return ids
518     
519     def getNextId(self, id):
520         ids = self.getIds()
521         index = ids.index(id)
522         return ids[(index+1)%len(ids)]
523         
524     def getPreviousId(self, id):
525         ids = self.getIds()
526         index = ids.index(id)
527         return ids[(index-1)%len(ids)]
528     
529     def getNumberOfUnreadItems(self):
530         return self.db.execute("SELECT count(*) FROM feed WHERE read=0;").fetchone()[0]
531     
532     def getNumberOfEntries(self):
533         return self.db.execute("SELECT count(*) FROM feed;").fetchone()[0]
534
535     def getArticle(self, entry):
536         #self.setEntryRead(id)
537         #entry = self.entries[id]
538         title = entry['title']
539         #content = entry.get('content', entry.get('summary_detail', {}))
540         content = entry["content"]
541
542         link = entry['link']
543         author = entry['author']
544         date = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(entry["date"]) )
545
546         #text = '''<div style="color: black; background-color: white;">'''
547         text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
548         text += "<html><head><title>" + title + "</title>"
549         text += '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>\n'
550         #text += '<style> body {-webkit-user-select: none;} </style>'
551         text += '</head><body bgcolor=\"#ffffff\"><div><a href=\"' + link + '\">' + title + "</a>"
552         if author != None:
553             text += "<BR /><small><i>Author: " + author + "</i></small>"
554         text += "<BR /><small><i>Date: " + date + "</i></small></div>"
555         text += "<BR /><BR />"
556         text += content
557         text += "</body></html>"
558         return text
559    
560     def getContent(self, id):
561         contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
562         try:
563             file = open(self.entries[id]["contentLink"])
564             content = file.read()
565             file.close()
566         except:
567             content = "Content unavailable"
568         return content
569     
570     def extractDate(self, entry):
571         if entry.has_key("updated_parsed"):
572             return timegm(entry["updated_parsed"])
573         elif entry.has_key("published_parsed"):
574             return timegm(entry["published_parsed"])
575         else:
576             return time.time()
577         
578     def extractContent(self, entry):
579         content = ""
580         if entry.has_key('summary'):
581             content = entry.get('summary', '')
582         if entry.has_key('content'):
583             if len(entry.content[0].value) > len(content):
584                 content = entry.content[0].value
585         if content == "":
586             content = entry.get('description', '')
587         return content
588     
589     def removeEntry(self, id):
590         contentLink = self.db.execute("SELECT contentLink FROM feed WHERE id=?;", (id,)).fetchone()[0]
591         if contentLink:
592             try:
593                 remove(contentLink)
594             except OSError, exception:
595                 print "Deleting %s: %s" % (contentLink, str (exception))
596         self.db.execute("DELETE FROM feed WHERE id=?;", (id,) )
597         self.db.execute("DELETE FROM images WHERE id=?;", (id,) )
598         self.db.commit()
599
600         def e():
601             if wc().available():
602                 try:
603                     wc()[self.key][id].files_deleted (
604                         woodchuck.DeletionResponse.Deleted)
605                     del wc()[self.key][id]
606                 except KeyError:
607                     pass
608         mainthread.execute (e, async=True)
609  
610 class ArchivedArticles(Feed):    
611     def addArchivedArticle(self, title, link, date, configdir):
612         id = self.generateUniqueId({"date":date, "title":title})
613         values = (id, title, link, date, 0, link, 0)
614         self.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
615         self.db.commit()
616
617     def updateFeed(self, configdir, url, etag, modified, expiryTime=24, proxy=None, imageCache=False):
618         currentTime = 0
619         rows = self.db.execute("SELECT id, link FROM feed WHERE updated=0;")
620         for row in rows:
621             currentTime = time.time()
622             id = row[0]
623             link = row[1]
624             f = urllib2.urlopen(link)
625             #entry["content"] = f.read()
626             html = f.read()
627             f.close()
628             soup = BeautifulSoup(html)
629             images = soup('img')
630             baseurl = link
631             for img in images:
632                 filename = self.addImage(configdir, self.key, baseurl, img['src'], proxy=proxy)
633                 img['src']=filename
634                 self.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (id, filename) )
635                 self.db.commit()
636             contentLink = configdir+self.key+".d/"+id+".html"
637             file = open(contentLink, "w")
638             file.write(soup.prettify())
639             file.close()
640             
641             self.db.execute("UPDATE feed SET read=0, contentLink=?, updated=? WHERE id=?;", (contentLink, time.time(), id) )
642             self.db.commit()
643         return (currentTime, None, None)
644     
645     def purgeReadArticles(self):
646         rows = self.db.execute("SELECT id FROM feed WHERE read=1;")
647         #ids = self.getIds()
648         for row in rows:
649             self.removeArticle(row[0])
650
651     def removeArticle(self, id):
652         rows = self.db.execute("SELECT imagePath FROM images WHERE id=?;", (id,) )
653         for row in rows:
654             try:
655                 count = self.db.execute("SELECT count(*) FROM images WHERE id!=? and imagePath=?;", (id,row[0]) ).fetchone()[0]
656                 if count == 0:
657                     os.remove(row[0])
658             except:
659                 pass
660         self.removeEntry(id)
661
662 class Listing:
663     def _getdb(self):
664         try:
665             db = self.tls.db
666         except AttributeError:
667             db = sqlite3.connect("%s/feeds.db" % self.configdir, timeout=120)
668             self.tls.db = db
669         return db
670     db = property(_getdb)
671
672     # Lists all the feeds in a dictionary, and expose the data
673     def __init__(self, config, configdir):
674         self.config = config
675         self.configdir = configdir
676
677         self.tls = threading.local ()
678         
679         try:
680             table = self.db.execute("SELECT sql FROM sqlite_master").fetchone()
681             if table == None:
682                 self.db.execute("CREATE TABLE feeds(id text, url text, title text, unread int, updateTime float, rank int, etag text, modified text, widget int, category int);")
683                 self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
684                 self.addCategory("Default Category")
685                 if isfile(self.configdir+"feeds.pickle"):
686                     self.importOldFormatFeeds()
687                 else:
688                     self.addFeed("Maemo News", "http://maemo.org/news/items.xml")    
689             else:
690                 from string import find, upper
691                 if find(upper(table[0]), "WIDGET")<0:
692                     self.db.execute("ALTER TABLE feeds ADD COLUMN widget int;")
693                     self.db.execute("UPDATE feeds SET widget=1;")
694                     self.db.commit()
695                 if find(upper(table[0]), "CATEGORY")<0:
696                     self.db.execute("CREATE TABLE categories(id text, title text, unread int, rank int);")
697                     self.addCategory("Default Category")
698                     self.db.execute("ALTER TABLE feeds ADD COLUMN category int;")
699                     self.db.execute("UPDATE feeds SET category=1;")
700             self.db.commit()
701         except:
702             pass
703
704         # Check that Woodchuck's state is up to date with respect our
705         # state.
706         wc_init (self)
707         if wc().available():
708             # The list of known streams.
709             streams = wc().streams_list ()
710             stream_ids = [s.identifier for s in streams]
711
712             # Register any unknown streams.  Remove known streams from
713             # STREAMS_IDS.
714             for key in self.getListOfFeeds():
715                 title = self.getFeedTitle(key)
716                 # XXX: We should also check whether the list of
717                 # articles/objects in each feed/stream is up to date.
718                 if key not in stream_ids:
719                     print ("Registering previously unknown channel: %s (%s)"
720                            % (key, title,))
721                     # Use a default refresh interval of 6 hours.
722                     wc().stream_register (key, title, 6 * 60 * 60)
723                 else:
724                     # Make sure the human readable name is up to date.
725                     if wc()[key].human_readable_name != title:
726                         wc()[key].human_readable_name = title
727                     stream_ids.remove (key)
728                     
729
730             # Unregister any streams that are no longer subscribed to.
731             for id in stream_ids:
732                 print ("Unregistering %s" % (id,))
733                 w.stream_unregister (id)
734
735     def importOldFormatFeeds(self):
736         """This function loads feeds that are saved in an outdated format, and converts them to sqlite"""
737         import rss
738         listing = rss.Listing(self.configdir)
739         rank = 0
740         for id in listing.getListOfFeeds():
741             try:
742                 rank += 1
743                 values = (id, listing.getFeedTitle(id) , listing.getFeedUrl(id), 0, time.time(), rank, None, "None", 1)
744                 self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?, 1);", values)
745                 self.db.commit()
746                 
747                 feed = listing.getFeed(id)
748                 new_feed = self.getFeed(id)
749                 
750                 items = feed.getIds()[:]
751                 items.reverse()
752                 for item in items:
753                         if feed.isEntryRead(item):
754                             read_status = 1
755                         else:
756                             read_status = 0 
757                         date = timegm(feed.getDateTuple(item))
758                         title = feed.getTitle(item)
759                         newId = new_feed.generateUniqueId({"date":date, "title":title})
760                         values = (newId, title , feed.getContentLink(item), date, tuple(time.time()), feed.getExternalLink(item), read_status)
761                         new_feed.db.execute("INSERT INTO feed (id, title, contentLink, date, updated, link, read) VALUES (?, ?, ?, ?, ?, ?, ?);", values)
762                         new_feed.db.commit()
763                         try:
764                             images = feed.getImages(item)
765                             for image in images:
766                                 new_feed.db.execute("INSERT INTO images (id, imagePath) VALUES (?, ?);", (item, image) )
767                                 new_feed.db.commit()
768                         except:
769                             pass
770                 self.updateUnread(id)
771             except:
772                 traceback.print_exc()
773         remove(self.configdir+"feeds.pickle")
774                 
775         
776     def addArchivedArticle(self, key, index):
777         feed = self.getFeed(key)
778         title = feed.getTitle(index)
779         link = feed.getExternalLink(index)
780         date = feed.getDate(index)
781         count = self.db.execute("SELECT count(*) FROM feeds where id=?;", ("ArchivedArticles",) ).fetchone()[0]
782         if count == 0:
783             self.addFeed("Archived Articles", "", id="ArchivedArticles")
784
785         archFeed = self.getFeed("ArchivedArticles")
786         archFeed.addArchivedArticle(title, link, date, self.configdir)
787         self.updateUnread("ArchivedArticles")
788         
789     def updateFeed(self, key, expiryTime=None, proxy=None, imageCache=None,
790                    priority=0):
791         if expiryTime is None:
792             expiryTime = self.config.getExpiry()
793         if not expiryTime:
794             # Default to 24 hours
795             expriyTime = 24
796         if proxy is None:
797             (use_proxy, proxy) = self.config.getProxy()
798             if not use_proxy:
799                 proxy = None
800         if imageCache is None:
801             imageCache = self.config.getImageCache()
802
803         feed = self.getFeed(key)
804         (url, etag, modified) = self.db.execute("SELECT url, etag, modified FROM feeds WHERE id=?;", (key,) ).fetchone()
805         try:
806             modified = time.struct_time(eval(modified))
807         except:
808             modified = None
809         feed.updateFeed(
810             self.configdir, url, etag, modified, expiryTime, proxy, imageCache,
811             priority, postFeedUpdateFunc=self._queuePostFeedUpdate)
812
813     def _queuePostFeedUpdate(self, *args, **kwargs):
814         mainthread.execute (self._postFeedUpdate, async=True, *args, **kwargs)
815
816     def _postFeedUpdate(self, key, updateTime, etag, modified, title):
817         if modified==None:
818             modified="None"
819         else:
820             modified=str(tuple(modified))
821         if updateTime > 0:
822             self.db.execute("UPDATE feeds SET updateTime=?, etag=?, modified=? WHERE id=?;", (updateTime, etag, modified, key) )
823         else:
824             self.db.execute("UPDATE feeds SET etag=?, modified=? WHERE id=?;", (etag, modified, key) )
825
826         if title is not None:
827             self.db.execute("UPDATE feeds SET title=(case WHEN title=='' THEN ? ELSE title END) where id=?;",
828                             (title, key))
829         self.db.commit()
830         self.updateUnread(key)
831         
832     def getFeed(self, key):
833         if key == "ArchivedArticles":
834             return ArchivedArticles(self.configdir, key)
835         return Feed(self.configdir, key)
836         
837     def editFeed(self, key, title, url, category=None):
838         if category:
839             self.db.execute("UPDATE feeds SET title=?, url=?, category=? WHERE id=?;", (title, url, category, key))
840         else:
841             self.db.execute("UPDATE feeds SET title=?, url=? WHERE id=?;", (title, url, key))
842         self.db.commit()
843
844         if wc().available():
845             try:
846                 wc()[key].human_readable_name = title
847             except KeyError:
848                 print "Feed %s (%s) unknown." % (key, title)
849                 pass
850         
851     def getFeedUpdateTime(self, key):
852         return time.ctime(self.db.execute("SELECT updateTime FROM feeds WHERE id=?;", (key,)).fetchone()[0])
853         
854     def getFeedNumberOfUnreadItems(self, key):
855         return self.db.execute("SELECT unread FROM feeds WHERE id=?;", (key,)).fetchone()[0]
856         
857     def getFeedTitle(self, key):
858         (title, url) = self.db.execute("SELECT title, url FROM feeds WHERE id=?;", (key,)).fetchone()
859         if title:
860             return title
861         return url
862         
863     def getFeedUrl(self, key):
864         return self.db.execute("SELECT url FROM feeds WHERE id=?;", (key,)).fetchone()[0]
865     
866     def getFeedCategory(self, key):
867         return self.db.execute("SELECT category FROM feeds WHERE id=?;", (key,)).fetchone()[0]
868         
869     def getListOfFeeds(self, category=None):
870         if category:
871             rows = self.db.execute("SELECT id FROM feeds WHERE category=? ORDER BY rank;", (category, ) )
872         else:
873             rows = self.db.execute("SELECT id FROM feeds ORDER BY rank;" )
874         keys = []
875         for row in rows:
876             if row[0]:
877                 keys.append(row[0])
878         return keys
879     
880     def getListOfCategories(self):
881         rows = self.db.execute("SELECT id FROM categories ORDER BY rank;" )
882         keys = []
883         for row in rows:
884             if row[0]:
885                 keys.append(row[0])
886         return keys
887     
888     def getCategoryTitle(self, id):
889         row = self.db.execute("SELECT title FROM categories WHERE id=?;", (id, )).fetchone()
890         return row[0]
891     
892     def getSortedListOfKeys(self, order, onlyUnread=False, category=1):
893         if   order == "Most unread":
894             tmp = "ORDER BY unread DESC"
895             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1], reverse=True)
896         elif order == "Least unread":
897             tmp = "ORDER BY unread"
898             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][1])
899         elif order == "Most recent":
900             tmp = "ORDER BY updateTime DESC"
901             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2], reverse=True)
902         elif order == "Least recent":
903             tmp = "ORDER BY updateTime"
904             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][2])
905         else: # order == "Manual" or invalid value...
906             tmp = "ORDER BY rank"
907             #keyorder = sorted(feedInfo, key = lambda k: feedInfo[k][0])
908         if onlyUnread:
909             sql = "SELECT id FROM feeds WHERE unread>0 AND category=%s " %category + tmp 
910         else:
911             sql = "SELECT id FROM feeds WHERE category=%s " %category + tmp
912         rows = self.db.execute(sql)
913         keys = []
914         for row in rows:
915             if row[0]:
916                 keys.append(row[0])
917         return keys
918     
919     def getFavicon(self, key):
920         filename = "%s%s.d/favicon.ico" % (self.configdir, key)
921         if isfile(filename):
922             return filename
923         else:
924             return False
925         
926     def updateUnread(self, key):
927         feed = self.getFeed(key)
928         self.db.execute("UPDATE feeds SET unread=? WHERE id=?;", (feed.getNumberOfUnreadItems(), key))
929         self.db.commit()
930
931     def addFeed(self, title, url, id=None, category=1):
932         if not id:
933             id = getId(url)
934         count = self.db.execute("SELECT count(*) FROM feeds WHERE id=?;", (id,) ).fetchone()[0]
935         if count == 0:
936             max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
937             if max_rank == None:
938                 max_rank = 0
939             values = (id, title, url, 0, 0, max_rank+1, None, "None", 1, category)
940             self.db.execute("INSERT INTO feeds (id, title, url, unread, updateTime, rank, etag, modified, widget, category) VALUES (?, ?, ? ,? ,? ,?, ?, ?, ?,?);", values)
941             self.db.commit()
942             # Ask for the feed object, it will create the necessary tables
943             self.getFeed(id)
944
945             if wc().available():
946                 # Register the stream with Woodchuck.  Update approximately
947                 # every 6 hours.
948                 wc().stream_register(stream_identifier=id,
949                                      human_readable_name=title,
950                                      freshness=6*60*60)
951
952             return True
953         else:
954             return False
955         
956     def addCategory(self, title):
957         rank = self.db.execute("SELECT MAX(rank)+1 FROM categories;").fetchone()[0]
958         if rank==None:
959             rank=1
960         id = self.db.execute("SELECT MAX(id)+1 FROM categories;").fetchone()[0]
961         if id==None:
962             id=1
963         self.db.execute("INSERT INTO categories (id, title, unread, rank) VALUES (?, ?, 0, ?)", (id, title, rank))
964         self.db.commit()
965     
966     def removeFeed(self, key):
967         if wc().available ():
968             try:
969                 del wc()[key]
970             except KeyError:
971                 print "Removing unregistered feed %s failed" % (key,)
972
973         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,) ).fetchone()[0]
974         self.db.execute("DELETE FROM feeds WHERE id=?;", (key, ))
975         self.db.execute("UPDATE feeds SET rank=rank-1 WHERE rank>?;", (rank,) )
976         self.db.commit()
977
978         if isdir(self.configdir+key+".d/"):
979            rmtree(self.configdir+key+".d/")
980            
981     def removeCategory(self, key):
982         if self.db.execute("SELECT count(*) FROM categories;").fetchone()[0] > 1:
983             rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,) ).fetchone()[0]
984             self.db.execute("DELETE FROM categories WHERE id=?;", (key, ))
985             self.db.execute("UPDATE categories SET rank=rank-1 WHERE rank>?;", (rank,) )
986             self.db.execute("UPDATE feeds SET category=1 WHERE category=?;", (key,) )
987             self.db.commit()
988         
989     #def saveConfig(self):
990     #    self.listOfFeeds["feedingit-order"] = self.sortedKeys
991     #    file = open(self.configdir+"feeds.pickle", "w")
992     #    pickle.dump(self.listOfFeeds, file)
993     #    file.close()
994         
995     def moveUp(self, key):
996         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
997         if rank>0:
998             self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank-1) )
999             self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank-1, key) )
1000             self.db.commit()
1001             
1002     def moveCategoryUp(self, key):
1003         rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1004         if rank>0:
1005             self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank-1) )
1006             self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank-1, key) )
1007             self.db.commit()
1008         
1009     def moveDown(self, key):
1010         rank = self.db.execute("SELECT rank FROM feeds WHERE id=?;", (key,)).fetchone()[0]
1011         max_rank = self.db.execute("SELECT MAX(rank) FROM feeds;").fetchone()[0]
1012         if rank<max_rank:
1013             self.db.execute("UPDATE feeds SET rank=? WHERE rank=?;", (rank, rank+1) )
1014             self.db.execute("UPDATE feeds SET rank=? WHERE id=?;", (rank+1, key) )
1015             self.db.commit()
1016             
1017     def moveCategoryDown(self, key):
1018         rank = self.db.execute("SELECT rank FROM categories WHERE id=?;", (key,)).fetchone()[0]
1019         max_rank = self.db.execute("SELECT MAX(rank) FROM categories;").fetchone()[0]
1020         if rank<max_rank:
1021             self.db.execute("UPDATE categories SET rank=? WHERE rank=?;", (rank, rank+1) )
1022             self.db.execute("UPDATE categories SET rank=? WHERE id=?;", (rank+1, key) )
1023             self.db.commit()
1024             
1025