import socket
-_moduleLogger = logging.getLogger("browser_emu")
-socket.setdefaulttimeout(10)
+_moduleLogger = logging.getLogger(__name__)
+socket.setdefaulttimeout(45)
+
+
+def add_proxy(protocol, url, port):
+ proxyInfo = "%s:%s" % (url, port)
+ proxy = urllib2.ProxyHandler(
+ {protocol: proxyInfo}
+ )
+ opener = urllib2.build_opener(proxy)
+ urllib2.install_opener(opener)
class MozillaEmulator(object):
+ USER_AGENT = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.1.4) Gecko/20091016 Firefox/3.5.4 (.NET CLR 3.5.30729)'
+
def __init__(self, trycount = 1):
"""Create a new MozillaEmulator object.
- @param trycount: The download() method will retry the operation if it fails. You can specify -1 for infinite retrying.
- A value of 0 means no retrying. A value of 1 means one retry. etc."""
+ @param trycount: The download() method will retry the operation if it
+ fails. You can specify -1 for infinite retrying. A value of 0 means no
+ retrying. A value of 1 means one retry. etc."""
self.debug = False
self.trycount = trycount
self._cookies = cookielib.LWPCookieJar()
_moduleLogger.exception("No cookie file")
except Exception, e:
_moduleLogger.exception("Unknown error with cookies")
- else:
- self._loadedFromCookies = True
+ self._loadedFromCookies = True
return self._loadedFromCookies
@return: The raw HTML page data
"""
- _moduleLogger.info("Performing download of %s" % url)
+ _moduleLogger.debug("Performing download of %s" % url)
if extraheaders is None:
extraheaders = {}
return openerdirector
return self._read(openerdirector, trycount)
- except urllib2.URLError:
+ except urllib2.URLError, e:
+ _moduleLogger.debug("%s: %s" % (e, url))
cnt += 1
if (-1 < trycount) and (trycount < cnt):
raise
# Retry :-)
- _moduleLogger.info("MozillaEmulator: urllib2.URLError, retryting %d" % cnt)
+ _moduleLogger.debug("MozillaEmulator: urllib2.URLError, retrying %d" % cnt)
def _build_opener(self, url, postdata = None, extraheaders = None, forbidRedirect = False):
if extraheaders is None:
'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png',
'Accept-Language': 'en,en-us;q=0.5',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
+ 'User-Agent': self.USER_AGENT,
}
for key, value in extraheaders.iteritems():
txheaders[key] = value
urllib2.HTTPCookieProcessor(self._cookies),
redirector
)
- u.addheaders = [(
- 'User-Agent',
- 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.8) Gecko/20050511 Firefox/1.0.4'
- )]
if not postdata is None:
req.add_data(postdata)
return (req, u)