X-Git-Url: http://git.maemo.org/git/?a=blobdiff_plain;f=src%2Fgvoice%2Fbrowser_emu.py;h=27f9e296167cb12e4713081187e02c7a59cf79f5;hb=f8ffc8e5741b1dc7f7ac43ca80a66191b8c881b1;hp=c29d482a57157108154e9af27a421fc8153a6c35;hpb=a6b38779bb4204f033c322cd0688115b47652b74;p=theonering diff --git a/src/gvoice/browser_emu.py b/src/gvoice/browser_emu.py index c29d482..27f9e29 100644 --- a/src/gvoice/browser_emu.py +++ b/src/gvoice/browser_emu.py @@ -41,49 +41,43 @@ class MozillaEmulator(object): def __init__(self, trycount = 1): """Create a new MozillaEmulator object. - @param trycount: The download() method will retry the operation if it fails. You can specify -1 for infinite retrying. - A value of 0 means no retrying. A value of 1 means one retry. etc.""" - self.cookies = cookielib.LWPCookieJar() + @param trycount: The download() method will retry the operation if it + fails. You can specify -1 for infinite retrying. A value of 0 means no + retrying. A value of 1 means one retry. etc.""" self.debug = False self.trycount = trycount - - def build_opener(self, url, postdata = None, extraheaders = None, forbid_redirect = False): - if extraheaders is None: - extraheaders = {} - - txheaders = { - 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png', - 'Accept-Language': 'en,en-us;q=0.5', - 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', - } - for key, value in extraheaders.iteritems(): - txheaders[key] = value - req = urllib2.Request(url, postdata, txheaders) - self.cookies.add_cookie_header(req) - if forbid_redirect: - redirector = HTTPNoRedirector() + self._cookies = cookielib.LWPCookieJar() + self._loadedFromCookies = False + + def load_cookies(self, path): + assert not self._loadedFromCookies, "Load cookies only once" + if path is None: + return + + self._cookies.filename = path + try: + self._cookies.load() + except cookielib.LoadError: + _moduleLogger.exception("Bad cookie file") + except IOError: + _moduleLogger.exception("No cookie file") + except Exception, e: + _moduleLogger.exception("Unknown error with cookies") else: - redirector = urllib2.HTTPRedirectHandler() + self._loadedFromCookies = True - http_handler = urllib2.HTTPHandler(debuglevel=self.debug) - https_handler = urllib2.HTTPSHandler(debuglevel=self.debug) + return self._loadedFromCookies - u = urllib2.build_opener( - http_handler, - https_handler, - urllib2.HTTPCookieProcessor(self.cookies), - redirector - ) - u.addheaders = [( - 'User-Agent', - 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.8) Gecko/20050511 Firefox/1.0.4' - )] - if not postdata is None: - req.add_data(postdata) - return (req, u) + def save_cookies(self): + if self._loadedFromCookies: + self._cookies.save() + + def clear_cookies(self): + if self._loadedFromCookies: + self._cookies.clear() def download(self, url, - postdata = None, extraheaders = None, forbid_redirect = False, + postdata = None, extraheaders = None, forbidRedirect = False, trycount = None, only_head = False, ): """Download an URL with GET or POST methods. @@ -91,7 +85,7 @@ class MozillaEmulator(object): @param postdata: It can be a string that will be POST-ed to the URL. When None is given, the method will be GET instead. @param extraheaders: You can add/modify HTTP headers with a dict here. - @param forbid_redirect: Set this flag if you do not want to handle + @param forbidRedirect: Set this flag if you do not want to handle HTTP 301 and 302 redirects. @param trycount: Specify the maximum number of retries here. 0 means no retry on error. Using -1 means infinite retring. @@ -101,7 +95,7 @@ class MozillaEmulator(object): @return: The raw HTML page data """ - _moduleLogger.warning("Performing download of %s" % url) + _moduleLogger.debug("Performing download of %s" % url) if extraheaders is None: extraheaders = {} @@ -111,24 +105,62 @@ class MozillaEmulator(object): while True: try: - req, u = self.build_opener(url, postdata, extraheaders, forbid_redirect) + req, u = self._build_opener(url, postdata, extraheaders, forbidRedirect) openerdirector = u.open(req) if self.debug: _moduleLogger.info("%r - %r" % (req.get_method(), url)) _moduleLogger.info("%r - %r" % (openerdirector.code, openerdirector.msg)) _moduleLogger.info("%r" % (openerdirector.headers)) - self.cookies.extract_cookies(openerdirector, req) + self._cookies.extract_cookies(openerdirector, req) if only_head: return openerdirector return self._read(openerdirector, trycount) - except urllib2.URLError: + except urllib2.URLError, e: + _moduleLogger.debug("%s: %s" % (e, url)) cnt += 1 if (-1 < trycount) and (trycount < cnt): raise # Retry :-) - _moduleLogger.info("MozillaEmulator: urllib2.URLError, retryting %d" % cnt) + _moduleLogger.debug("MozillaEmulator: urllib2.URLError, retrying %d" % cnt) + + def _build_opener(self, url, postdata = None, extraheaders = None, forbidRedirect = False): + if extraheaders is None: + extraheaders = {} + + txheaders = { + 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png', + 'Accept-Language': 'en,en-us;q=0.5', + 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', + } + for key, value in extraheaders.iteritems(): + txheaders[key] = value + req = urllib2.Request(url, postdata, txheaders) + self._cookies.add_cookie_header(req) + if forbidRedirect: + redirector = HTTPNoRedirector() + #_moduleLogger.info("Redirection disabled") + else: + redirector = urllib2.HTTPRedirectHandler() + #_moduleLogger.info("Redirection enabled") + + http_handler = urllib2.HTTPHandler(debuglevel=self.debug) + https_handler = urllib2.HTTPSHandler(debuglevel=self.debug) + + u = urllib2.build_opener( + http_handler, + https_handler, + urllib2.HTTPCookieProcessor(self._cookies), + redirector + ) + u.addheaders = [( + 'User-Agent', + 'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.1.4) Gecko/20091016 Firefox/3.5.4 (.NET CLR 3.5.30729)' + )] + if not postdata is None: + req.add_data(postdata) + return (req, u) def _read(self, openerdirector, trycount): chunks = [] @@ -163,4 +195,5 @@ class HTTPNoRedirector(urllib2.HTTPRedirectHandler): elif 'uri' in headers: newurl = headers.getheaders('uri')[0] e.newurl = newurl + _moduleLogger.info("New url: %s" % e.newurl) raise e