Limiting the size of the cache
[theonering] / src / gvoice / browser_emu.py
index c29d482..f45b204 100644 (file)
@@ -32,8 +32,8 @@ import logging
 import socket
 
 
-_moduleLogger = logging.getLogger("gvoice.browser_emu")
-socket.setdefaulttimeout(10)
+_moduleLogger = logging.getLogger(__name__)
+socket.setdefaulttimeout(20)
 
 
 class MozillaEmulator(object):
@@ -41,49 +41,43 @@ class MozillaEmulator(object):
        def __init__(self, trycount = 1):
                """Create a new MozillaEmulator object.
 
-               @param trycount: The download() method will retry the operation if it fails. You can specify -1 for infinite retrying.
-                        A value of 0 means no retrying. A value of 1 means one retry. etc."""
-               self.cookies = cookielib.LWPCookieJar()
+               @param trycount: The download() method will retry the operation if it
+               fails. You can specify -1 for infinite retrying.  A value of 0 means no
+               retrying. A value of 1 means one retry. etc."""
                self.debug = False
                self.trycount = trycount
-
-       def build_opener(self, url, postdata = None, extraheaders = None, forbid_redirect = False):
-               if extraheaders is None:
-                       extraheaders = {}
-
-               txheaders = {
-                       'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png',
-                       'Accept-Language': 'en,en-us;q=0.5',
-                       'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
-               }
-               for key, value in extraheaders.iteritems():
-                       txheaders[key] = value
-               req = urllib2.Request(url, postdata, txheaders)
-               self.cookies.add_cookie_header(req)
-               if forbid_redirect:
-                       redirector = HTTPNoRedirector()
+               self._cookies = cookielib.LWPCookieJar()
+               self._loadedFromCookies = False
+
+       def load_cookies(self, path):
+               assert not self._loadedFromCookies, "Load cookies only once"
+               if path is None:
+                       return
+
+               self._cookies.filename = path
+               try:
+                       self._cookies.load()
+               except cookielib.LoadError:
+                       _moduleLogger.exception("Bad cookie file")
+               except IOError:
+                       _moduleLogger.exception("No cookie file")
+               except Exception, e:
+                       _moduleLogger.exception("Unknown error with cookies")
                else:
-                       redirector = urllib2.HTTPRedirectHandler()
+                       self._loadedFromCookies = True
 
-               http_handler = urllib2.HTTPHandler(debuglevel=self.debug)
-               https_handler = urllib2.HTTPSHandler(debuglevel=self.debug)
+               return self._loadedFromCookies
 
-               u = urllib2.build_opener(
-                       http_handler,
-                       https_handler,
-                       urllib2.HTTPCookieProcessor(self.cookies),
-                       redirector
-               )
-               u.addheaders = [(
-                       'User-Agent',
-                       'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.8) Gecko/20050511 Firefox/1.0.4'
-               )]
-               if not postdata is None:
-                       req.add_data(postdata)
-               return (req, u)
+       def save_cookies(self):
+               if self._loadedFromCookies:
+                       self._cookies.save()
+
+       def clear_cookies(self):
+               if self._loadedFromCookies:
+                       self._cookies.clear()
 
        def download(self, url,
-                       postdata = None, extraheaders = None, forbid_redirect = False,
+                       postdata = None, extraheaders = None, forbidRedirect = False,
                        trycount = None, only_head = False,
                ):
                """Download an URL with GET or POST methods.
@@ -91,7 +85,7 @@ class MozillaEmulator(object):
                @param postdata: It can be a string that will be POST-ed to the URL.
                        When None is given, the method will be GET instead.
                @param extraheaders: You can add/modify HTTP headers with a dict here.
-               @param forbid_redirect: Set this flag if you do not want to handle
+               @param forbidRedirect: Set this flag if you do not want to handle
                        HTTP 301 and 302 redirects.
                @param trycount: Specify the maximum number of retries here.
                        0 means no retry on error. Using -1 means infinite retring.
@@ -101,7 +95,7 @@ class MozillaEmulator(object):
 
                @return: The raw HTML page data
                """
-               _moduleLogger.warning("Performing download of %s" % url)
+               _moduleLogger.debug("Performing download of %s" % url)
 
                if extraheaders is None:
                        extraheaders = {}
@@ -111,24 +105,62 @@ class MozillaEmulator(object):
 
                while True:
                        try:
-                               req, u = self.build_opener(url, postdata, extraheaders, forbid_redirect)
+                               req, u = self._build_opener(url, postdata, extraheaders, forbidRedirect)
                                openerdirector = u.open(req)
                                if self.debug:
                                        _moduleLogger.info("%r - %r" % (req.get_method(), url))
                                        _moduleLogger.info("%r - %r" % (openerdirector.code, openerdirector.msg))
                                        _moduleLogger.info("%r" % (openerdirector.headers))
-                               self.cookies.extract_cookies(openerdirector, req)
+                               self._cookies.extract_cookies(openerdirector, req)
                                if only_head:
                                        return openerdirector
 
                                return self._read(openerdirector, trycount)
-                       except urllib2.URLError:
+                       except urllib2.URLError, e:
+                               _moduleLogger.debug("%s: %s" % (e, url))
                                cnt += 1
                                if (-1 < trycount) and (trycount < cnt):
                                        raise
 
                        # Retry :-)
-                       _moduleLogger.info("MozillaEmulator: urllib2.URLError, retryting %d" % cnt)
+                       _moduleLogger.debug("MozillaEmulator: urllib2.URLError, retrying %d" % cnt)
+
+       def _build_opener(self, url, postdata = None, extraheaders = None, forbidRedirect = False):
+               if extraheaders is None:
+                       extraheaders = {}
+
+               txheaders = {
+                       'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png',
+                       'Accept-Language': 'en,en-us;q=0.5',
+                       'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
+               }
+               for key, value in extraheaders.iteritems():
+                       txheaders[key] = value
+               req = urllib2.Request(url, postdata, txheaders)
+               self._cookies.add_cookie_header(req)
+               if forbidRedirect:
+                       redirector = HTTPNoRedirector()
+                       #_moduleLogger.info("Redirection disabled")
+               else:
+                       redirector = urllib2.HTTPRedirectHandler()
+                       #_moduleLogger.info("Redirection enabled")
+
+               http_handler = urllib2.HTTPHandler(debuglevel=self.debug)
+               https_handler = urllib2.HTTPSHandler(debuglevel=self.debug)
+
+               u = urllib2.build_opener(
+                       http_handler,
+                       https_handler,
+                       urllib2.HTTPCookieProcessor(self._cookies),
+                       redirector
+               )
+               u.addheaders = [(
+                       'User-Agent',
+                       'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.1.4) Gecko/20091016 Firefox/3.5.4 (.NET CLR 3.5.30729)'
+               )]
+               if not postdata is None:
+                       req.add_data(postdata)
+               return (req, u)
 
        def _read(self, openerdirector, trycount):
                chunks = []
@@ -163,4 +195,5 @@ class HTTPNoRedirector(urllib2.HTTPRedirectHandler):
                        elif 'uri' in headers:
                                newurl = headers.getheaders('uri')[0]
                        e.newurl = newurl
+               _moduleLogger.info("New url: %s" % e.newurl)
                raise e