import socket
-_moduleLogger = logging.getLogger("gvoice.browser_emu")
-socket.setdefaulttimeout(10)
+_moduleLogger = logging.getLogger(__name__)
+socket.setdefaulttimeout(20)
class MozillaEmulator(object):
def __init__(self, trycount = 1):
"""Create a new MozillaEmulator object.
- @param trycount: The download() method will retry the operation if it fails. You can specify -1 for infinite retrying.
- A value of 0 means no retrying. A value of 1 means one retry. etc."""
- self.cookies = cookielib.LWPCookieJar()
+ @param trycount: The download() method will retry the operation if it
+ fails. You can specify -1 for infinite retrying. A value of 0 means no
+ retrying. A value of 1 means one retry. etc."""
self.debug = False
self.trycount = trycount
-
- def build_opener(self, url, postdata = None, extraheaders = None, forbid_redirect = False):
- if extraheaders is None:
- extraheaders = {}
-
- txheaders = {
- 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png',
- 'Accept-Language': 'en,en-us;q=0.5',
- 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
- }
- for key, value in extraheaders.iteritems():
- txheaders[key] = value
- req = urllib2.Request(url, postdata, txheaders)
- self.cookies.add_cookie_header(req)
- if forbid_redirect:
- redirector = HTTPNoRedirector()
+ self._cookies = cookielib.LWPCookieJar()
+ self._loadedFromCookies = False
+
+ def load_cookies(self, path):
+ assert not self._loadedFromCookies, "Load cookies only once"
+ if path is None:
+ return
+
+ self._cookies.filename = path
+ try:
+ self._cookies.load()
+ except cookielib.LoadError:
+ _moduleLogger.exception("Bad cookie file")
+ except IOError:
+ _moduleLogger.exception("No cookie file")
+ except Exception, e:
+ _moduleLogger.exception("Unknown error with cookies")
else:
- redirector = urllib2.HTTPRedirectHandler()
+ self._loadedFromCookies = True
- http_handler = urllib2.HTTPHandler(debuglevel=self.debug)
- https_handler = urllib2.HTTPSHandler(debuglevel=self.debug)
+ return self._loadedFromCookies
- u = urllib2.build_opener(
- http_handler,
- https_handler,
- urllib2.HTTPCookieProcessor(self.cookies),
- redirector
- )
- u.addheaders = [(
- 'User-Agent',
- 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.8) Gecko/20050511 Firefox/1.0.4'
- )]
- if not postdata is None:
- req.add_data(postdata)
- return (req, u)
+ def save_cookies(self):
+ if self._loadedFromCookies:
+ self._cookies.save()
+
+ def clear_cookies(self):
+ if self._loadedFromCookies:
+ self._cookies.clear()
def download(self, url,
- postdata = None, extraheaders = None, forbid_redirect = False,
+ postdata = None, extraheaders = None, forbidRedirect = False,
trycount = None, only_head = False,
):
"""Download an URL with GET or POST methods.
@param postdata: It can be a string that will be POST-ed to the URL.
When None is given, the method will be GET instead.
@param extraheaders: You can add/modify HTTP headers with a dict here.
- @param forbid_redirect: Set this flag if you do not want to handle
+ @param forbidRedirect: Set this flag if you do not want to handle
HTTP 301 and 302 redirects.
@param trycount: Specify the maximum number of retries here.
0 means no retry on error. Using -1 means infinite retring.
@return: The raw HTML page data
"""
- _moduleLogger.warning("Performing download of %s" % url)
+ _moduleLogger.debug("Performing download of %s" % url)
if extraheaders is None:
extraheaders = {}
while True:
try:
- req, u = self.build_opener(url, postdata, extraheaders, forbid_redirect)
+ req, u = self._build_opener(url, postdata, extraheaders, forbidRedirect)
openerdirector = u.open(req)
if self.debug:
_moduleLogger.info("%r - %r" % (req.get_method(), url))
_moduleLogger.info("%r - %r" % (openerdirector.code, openerdirector.msg))
_moduleLogger.info("%r" % (openerdirector.headers))
- self.cookies.extract_cookies(openerdirector, req)
+ self._cookies.extract_cookies(openerdirector, req)
if only_head:
return openerdirector
return self._read(openerdirector, trycount)
- except urllib2.URLError:
+ except urllib2.URLError, e:
+ _moduleLogger.debug("%s: %s" % (e, url))
cnt += 1
if (-1 < trycount) and (trycount < cnt):
raise
# Retry :-)
- _moduleLogger.info("MozillaEmulator: urllib2.URLError, retryting %d" % cnt)
+ _moduleLogger.debug("MozillaEmulator: urllib2.URLError, retrying %d" % cnt)
+
+ def _build_opener(self, url, postdata = None, extraheaders = None, forbidRedirect = False):
+ if extraheaders is None:
+ extraheaders = {}
+
+ txheaders = {
+ 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png',
+ 'Accept-Language': 'en,en-us;q=0.5',
+ 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
+ }
+ for key, value in extraheaders.iteritems():
+ txheaders[key] = value
+ req = urllib2.Request(url, postdata, txheaders)
+ self._cookies.add_cookie_header(req)
+ if forbidRedirect:
+ redirector = HTTPNoRedirector()
+ #_moduleLogger.info("Redirection disabled")
+ else:
+ redirector = urllib2.HTTPRedirectHandler()
+ #_moduleLogger.info("Redirection enabled")
+
+ http_handler = urllib2.HTTPHandler(debuglevel=self.debug)
+ https_handler = urllib2.HTTPSHandler(debuglevel=self.debug)
+
+ u = urllib2.build_opener(
+ http_handler,
+ https_handler,
+ urllib2.HTTPCookieProcessor(self._cookies),
+ redirector
+ )
+ u.addheaders = [(
+ 'User-Agent',
+ 'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.1.4) Gecko/20091016 Firefox/3.5.4 (.NET CLR 3.5.30729)'
+ )]
+ if not postdata is None:
+ req.add_data(postdata)
+ return (req, u)
def _read(self, openerdirector, trycount):
chunks = []
elif 'uri' in headers:
newurl = headers.getheaders('uri')[0]
e.newurl = newurl
+ _moduleLogger.info("New url: %s" % e.newurl)
raise e