_moduleLogger = logging.getLogger("browser_emu")
-socket.setdefaulttimeout(10)
+socket.setdefaulttimeout(20)
class MozillaEmulator(object):
def __init__(self, trycount = 1):
"""Create a new MozillaEmulator object.
- @param trycount: The download() method will retry the operation if it fails. You can specify -1 for infinite retrying.
- A value of 0 means no retrying. A value of 1 means one retry. etc."""
- self.cookies = cookielib.LWPCookieJar()
+ @param trycount: The download() method will retry the operation if it
+ fails. You can specify -1 for infinite retrying. A value of 0 means no
+ retrying. A value of 1 means one retry. etc."""
self.debug = False
self.trycount = trycount
+ self._cookies = cookielib.LWPCookieJar()
+ self._loadedFromCookies = False
+
+ def load_cookies(self, path):
+ assert not self._loadedFromCookies, "Load cookies only once"
+ if path is None:
+ return
+
+ self._cookies.filename = path
+ try:
+ self._cookies.load()
+ except cookielib.LoadError:
+ _moduleLogger.exception("Bad cookie file")
+ except IOError:
+ _moduleLogger.exception("No cookie file")
+ except Exception, e:
+ _moduleLogger.exception("Unknown error with cookies")
+ else:
+ self._loadedFromCookies = True
+
+ return self._loadedFromCookies
+
+ def save_cookies(self):
+ if self._loadedFromCookies:
+ self._cookies.save()
+
+ def clear_cookies(self):
+ if self._loadedFromCookies:
+ self._cookies.clear()
def download(self, url,
postdata = None, extraheaders = None, forbidRedirect = False,
@return: The raw HTML page data
"""
- _moduleLogger.warning("Performing download of %s" % url)
+ _moduleLogger.debug("Performing download of %s" % url)
if extraheaders is None:
extraheaders = {}
_moduleLogger.info("%r - %r" % (req.get_method(), url))
_moduleLogger.info("%r - %r" % (openerdirector.code, openerdirector.msg))
_moduleLogger.info("%r" % (openerdirector.headers))
- self.cookies.extract_cookies(openerdirector, req)
+ self._cookies.extract_cookies(openerdirector, req)
if only_head:
return openerdirector
return self._read(openerdirector, trycount)
- except urllib2.URLError:
+ except urllib2.URLError, e:
+ _moduleLogger.debug("%s: %s" % (e, url))
cnt += 1
if (-1 < trycount) and (trycount < cnt):
raise
# Retry :-)
- _moduleLogger.info("MozillaEmulator: urllib2.URLError, retryting %d" % cnt)
+ _moduleLogger.debug("MozillaEmulator: urllib2.URLError, retrying %d" % cnt)
def _build_opener(self, url, postdata = None, extraheaders = None, forbidRedirect = False):
if extraheaders is None:
for key, value in extraheaders.iteritems():
txheaders[key] = value
req = urllib2.Request(url, postdata, txheaders)
- self.cookies.add_cookie_header(req)
+ self._cookies.add_cookie_header(req)
if forbidRedirect:
redirector = HTTPNoRedirector()
#_moduleLogger.info("Redirection disabled")
u = urllib2.build_opener(
http_handler,
https_handler,
- urllib2.HTTPCookieProcessor(self.cookies),
+ urllib2.HTTPCookieProcessor(self._cookies),
redirector
)
u.addheaders = [(
'User-Agent',
- 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.8) Gecko/20050511 Firefox/1.0.4'
+ 'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.1.4) Gecko/20091016 Firefox/3.5.4 (.NET CLR 3.5.30729)'
)]
if not postdata is None:
req.add_data(postdata)