Decreasing the timeout for making faster network transitions
[gc-dialer] / src / backends / gvoice / browser_emu.py
1 """
2 @author:          Laszlo Nagy
3 @copyright:   (c) 2005 by Szoftver Messias Bt.
4 @licence:        BSD style
5
6 Objects of the MozillaEmulator class can emulate a browser that is capable of:
7
8         - cookie management
9         - configurable user agent string
10         - GET and POST
11         - multipart POST (send files)
12         - receive content into file
13
14 I have seen many requests on the python mailing list about how to emulate a browser. I'm using this class for years now, without any problems. This is how you can use it:
15
16         1. Use firefox
17         2. Install and open the livehttpheaders plugin
18         3. Use the website manually with firefox
19         4. Check the GET and POST requests in the livehttpheaders capture window
20         5. Create an instance of the above class and send the same GET and POST requests to the server.
21
22 Optional steps:
23
24         - You can change user agent string in the build_opened method
25         - The "encode_multipart_formdata" function can be used alone to create POST data from a list of field values and files
26 """
27
28 import urllib2
29 import cookielib
30 import logging
31
32 import socket
33
34
35 _moduleLogger = logging.getLogger(__name__)
36 socket.setdefaulttimeout(25)
37
38
39 def add_proxy(protocol, url, port):
40         proxyInfo = "%s:%s" % (url, port)
41         proxy = urllib2.ProxyHandler(
42                 {protocol: proxyInfo}
43         )
44         opener = urllib2.build_opener(proxy)
45         urllib2.install_opener(opener)
46
47
48 class MozillaEmulator(object):
49
50         USER_AGENT = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.1.4) Gecko/20091016 Firefox/3.5.4 (.NET CLR 3.5.30729)'
51         #USER_AGENT = "Mozilla/5.0 (iPhone; U; CPU iPhone OS 3_0 like Mac OS X; en-us) AppleWebKit/528.18 (KHTML, like Gecko) Version/4.0 Mobile/7A341 Safari/528.16"
52
53         def __init__(self, trycount = 1):
54                 """Create a new MozillaEmulator object.
55
56                 @param trycount: The download() method will retry the operation if it
57                 fails. You can specify -1 for infinite retrying.  A value of 0 means no
58                 retrying. A value of 1 means one retry. etc."""
59                 self.debug = False
60                 self.trycount = trycount
61                 self._cookies = cookielib.LWPCookieJar()
62                 self._loadedFromCookies = False
63                 self._storeCookies = False
64
65         def load_cookies(self, path):
66                 assert not self._loadedFromCookies, "Load cookies only once"
67                 if path is None:
68                         return
69
70                 self._cookies.filename = path
71                 try:
72                         self._cookies.load()
73                 except cookielib.LoadError:
74                         _moduleLogger.exception("Bad cookie file")
75                 except IOError:
76                         _moduleLogger.exception("No cookie file")
77                 except Exception, e:
78                         _moduleLogger.exception("Unknown error with cookies")
79                 else:
80                         self._loadedFromCookies = True
81                 self._storeCookies = True
82
83                 return self._loadedFromCookies
84
85         def save_cookies(self):
86                 if self._storeCookies:
87                         self._cookies.save()
88
89         def clear_cookies(self):
90                 if self._storeCookies:
91                         self._cookies.clear()
92
93         def download(self, url,
94                         postdata = None, extraheaders = None, forbidRedirect = False,
95                         trycount = None, only_head = False,
96                 ):
97                 """Download an URL with GET or POST methods.
98
99                 @param postdata: It can be a string that will be POST-ed to the URL.
100                         When None is given, the method will be GET instead.
101                 @param extraheaders: You can add/modify HTTP headers with a dict here.
102                 @param forbidRedirect: Set this flag if you do not want to handle
103                         HTTP 301 and 302 redirects.
104                 @param trycount: Specify the maximum number of retries here.
105                         0 means no retry on error. Using -1 means infinite retring.
106                         None means the default value (that is self.trycount).
107                 @param only_head: Create the openerdirector and return it. In other
108                         words, this will not retrieve any content except HTTP headers.
109
110                 @return: The raw HTML page data
111                 """
112                 _moduleLogger.debug("Performing download of %s" % url)
113
114                 if extraheaders is None:
115                         extraheaders = {}
116                 if trycount is None:
117                         trycount = self.trycount
118                 cnt = 0
119
120                 while True:
121                         try:
122                                 req, u = self._build_opener(url, postdata, extraheaders, forbidRedirect)
123                                 openerdirector = u.open(req)
124                                 if self.debug:
125                                         _moduleLogger.info("%r - %r" % (req.get_method(), url))
126                                         _moduleLogger.info("%r - %r" % (openerdirector.code, openerdirector.msg))
127                                         _moduleLogger.info("%r" % (openerdirector.headers))
128                                 self._cookies.extract_cookies(openerdirector, req)
129                                 if only_head:
130                                         return openerdirector
131
132                                 return self._read(openerdirector, trycount)
133                         except urllib2.URLError, e:
134                                 _moduleLogger.debug("%s: %s" % (e, url))
135                                 cnt += 1
136                                 if (-1 < trycount) and (trycount < cnt):
137                                         raise
138
139                         # Retry :-)
140                         _moduleLogger.debug("MozillaEmulator: urllib2.URLError, retrying %d" % cnt)
141
142         def _build_opener(self, url, postdata = None, extraheaders = None, forbidRedirect = False):
143                 if extraheaders is None:
144                         extraheaders = {}
145
146                 txheaders = {
147                         'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png',
148                         'Accept-Language': 'en,en-us;q=0.5',
149                         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
150                         'User-Agent': self.USER_AGENT,
151                 }
152                 for key, value in extraheaders.iteritems():
153                         txheaders[key] = value
154                 req = urllib2.Request(url, postdata, txheaders)
155                 self._cookies.add_cookie_header(req)
156                 if forbidRedirect:
157                         redirector = HTTPNoRedirector()
158                         #_moduleLogger.info("Redirection disabled")
159                 else:
160                         redirector = urllib2.HTTPRedirectHandler()
161                         #_moduleLogger.info("Redirection enabled")
162
163                 http_handler = urllib2.HTTPHandler(debuglevel=self.debug)
164                 https_handler = urllib2.HTTPSHandler(debuglevel=self.debug)
165
166                 u = urllib2.build_opener(
167                         http_handler,
168                         https_handler,
169                         urllib2.HTTPCookieProcessor(self._cookies),
170                         redirector
171                 )
172                 if not postdata is None:
173                         req.add_data(postdata)
174                 return (req, u)
175
176         def _read(self, openerdirector, trycount):
177                 chunks = []
178
179                 chunk = openerdirector.read()
180                 chunks.append(chunk)
181                 #while chunk and cnt < trycount:
182                 #       time.sleep(1)
183                 #       cnt += 1
184                 #       chunk = openerdirector.read()
185                 #       chunks.append(chunk)
186
187                 data = "".join(chunks)
188
189                 if "Content-Length" in openerdirector.info():
190                         assert len(data) == int(openerdirector.info()["Content-Length"]), "The packet header promised %s of data but only was able to read %s of data" % (
191                                 openerdirector.info()["Content-Length"],
192                                 len(data),
193                         )
194
195                 return data
196
197
198 class HTTPNoRedirector(urllib2.HTTPRedirectHandler):
199         """This is a custom http redirect handler that FORBIDS redirection."""
200
201         def http_error_302(self, req, fp, code, msg, headers):
202                 e = urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp)
203                 if e.code in (301, 302):
204                         if 'location' in headers:
205                                 newurl = headers.getheaders('location')[0]
206                         elif 'uri' in headers:
207                                 newurl = headers.getheaders('uri')[0]
208                         e.newurl = newurl
209                 _moduleLogger.info("New url: %s" % e.newurl)
210                 raise e