Removing contact caching from backend, moving it to session
[gc-dialer] / src / backends / gvoice / browser_emu.py
1 """
2 @author:          Laszlo Nagy
3 @copyright:   (c) 2005 by Szoftver Messias Bt.
4 @licence:        BSD style
5
6 Objects of the MozillaEmulator class can emulate a browser that is capable of:
7
8         - cookie management
9         - configurable user agent string
10         - GET and POST
11         - multipart POST (send files)
12         - receive content into file
13
14 I have seen many requests on the python mailing list about how to emulate a browser. I'm using this class for years now, without any problems. This is how you can use it:
15
16         1. Use firefox
17         2. Install and open the livehttpheaders plugin
18         3. Use the website manually with firefox
19         4. Check the GET and POST requests in the livehttpheaders capture window
20         5. Create an instance of the above class and send the same GET and POST requests to the server.
21
22 Optional steps:
23
24         - You can change user agent string in the build_opened method
25         - The "encode_multipart_formdata" function can be used alone to create POST data from a list of field values and files
26 """
27
28 import urllib2
29 import cookielib
30 import logging
31
32 import socket
33
34
35 _moduleLogger = logging.getLogger(__name__)
36 socket.setdefaulttimeout(45)
37
38
39 def add_proxy(protocol, url, port):
40         proxyInfo = "%s:%s" % (url, port)
41         proxy = urllib2.ProxyHandler(
42                 {protocol: proxyInfo}
43         )
44         opener = urllib2.build_opener(proxy)
45         urllib2.install_opener(opener)
46
47
48 class MozillaEmulator(object):
49
50         USER_AGENT = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.1.4) Gecko/20091016 Firefox/3.5.4 (.NET CLR 3.5.30729)'
51
52         def __init__(self, trycount = 1):
53                 """Create a new MozillaEmulator object.
54
55                 @param trycount: The download() method will retry the operation if it
56                 fails. You can specify -1 for infinite retrying.  A value of 0 means no
57                 retrying. A value of 1 means one retry. etc."""
58                 self.debug = False
59                 self.trycount = trycount
60                 self._cookies = cookielib.LWPCookieJar()
61                 self._loadedFromCookies = False
62                 self._storeCookies = False
63
64         def load_cookies(self, path):
65                 assert not self._loadedFromCookies, "Load cookies only once"
66                 if path is None:
67                         return
68
69                 self._cookies.filename = path
70                 try:
71                         self._cookies.load()
72                 except cookielib.LoadError:
73                         _moduleLogger.exception("Bad cookie file")
74                 except IOError:
75                         _moduleLogger.exception("No cookie file")
76                 except Exception, e:
77                         _moduleLogger.exception("Unknown error with cookies")
78                 else:
79                         self._loadedFromCookies = True
80                 self._storeCookies = True
81
82                 return self._loadedFromCookies
83
84         def save_cookies(self):
85                 if self._storeCookies:
86                         self._cookies.save()
87
88         def clear_cookies(self):
89                 if self._storeCookies:
90                         self._cookies.clear()
91
92         def download(self, url,
93                         postdata = None, extraheaders = None, forbidRedirect = False,
94                         trycount = None, only_head = False,
95                 ):
96                 """Download an URL with GET or POST methods.
97
98                 @param postdata: It can be a string that will be POST-ed to the URL.
99                         When None is given, the method will be GET instead.
100                 @param extraheaders: You can add/modify HTTP headers with a dict here.
101                 @param forbidRedirect: Set this flag if you do not want to handle
102                         HTTP 301 and 302 redirects.
103                 @param trycount: Specify the maximum number of retries here.
104                         0 means no retry on error. Using -1 means infinite retring.
105                         None means the default value (that is self.trycount).
106                 @param only_head: Create the openerdirector and return it. In other
107                         words, this will not retrieve any content except HTTP headers.
108
109                 @return: The raw HTML page data
110                 """
111                 _moduleLogger.debug("Performing download of %s" % url)
112
113                 if extraheaders is None:
114                         extraheaders = {}
115                 if trycount is None:
116                         trycount = self.trycount
117                 cnt = 0
118
119                 while True:
120                         try:
121                                 req, u = self._build_opener(url, postdata, extraheaders, forbidRedirect)
122                                 openerdirector = u.open(req)
123                                 if self.debug:
124                                         _moduleLogger.info("%r - %r" % (req.get_method(), url))
125                                         _moduleLogger.info("%r - %r" % (openerdirector.code, openerdirector.msg))
126                                         _moduleLogger.info("%r" % (openerdirector.headers))
127                                 self._cookies.extract_cookies(openerdirector, req)
128                                 if only_head:
129                                         return openerdirector
130
131                                 return self._read(openerdirector, trycount)
132                         except urllib2.URLError, e:
133                                 _moduleLogger.debug("%s: %s" % (e, url))
134                                 cnt += 1
135                                 if (-1 < trycount) and (trycount < cnt):
136                                         raise
137
138                         # Retry :-)
139                         _moduleLogger.debug("MozillaEmulator: urllib2.URLError, retrying %d" % cnt)
140
141         def _build_opener(self, url, postdata = None, extraheaders = None, forbidRedirect = False):
142                 if extraheaders is None:
143                         extraheaders = {}
144
145                 txheaders = {
146                         'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png',
147                         'Accept-Language': 'en,en-us;q=0.5',
148                         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
149                         'User-Agent': self.USER_AGENT,
150                 }
151                 for key, value in extraheaders.iteritems():
152                         txheaders[key] = value
153                 req = urllib2.Request(url, postdata, txheaders)
154                 self._cookies.add_cookie_header(req)
155                 if forbidRedirect:
156                         redirector = HTTPNoRedirector()
157                         #_moduleLogger.info("Redirection disabled")
158                 else:
159                         redirector = urllib2.HTTPRedirectHandler()
160                         #_moduleLogger.info("Redirection enabled")
161
162                 http_handler = urllib2.HTTPHandler(debuglevel=self.debug)
163                 https_handler = urllib2.HTTPSHandler(debuglevel=self.debug)
164
165                 u = urllib2.build_opener(
166                         http_handler,
167                         https_handler,
168                         urllib2.HTTPCookieProcessor(self._cookies),
169                         redirector
170                 )
171                 if not postdata is None:
172                         req.add_data(postdata)
173                 return (req, u)
174
175         def _read(self, openerdirector, trycount):
176                 chunks = []
177
178                 chunk = openerdirector.read()
179                 chunks.append(chunk)
180                 #while chunk and cnt < trycount:
181                 #       time.sleep(1)
182                 #       cnt += 1
183                 #       chunk = openerdirector.read()
184                 #       chunks.append(chunk)
185
186                 data = "".join(chunks)
187
188                 if "Content-Length" in openerdirector.info():
189                         assert len(data) == int(openerdirector.info()["Content-Length"]), "The packet header promised %s of data but only was able to read %s of data" % (
190                                 openerdirector.info()["Content-Length"],
191                                 len(data),
192                         )
193
194                 return data
195
196
197 class HTTPNoRedirector(urllib2.HTTPRedirectHandler):
198         """This is a custom http redirect handler that FORBIDS redirection."""
199
200         def http_error_302(self, req, fp, code, msg, headers):
201                 e = urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp)
202                 if e.code in (301, 302):
203                         if 'location' in headers:
204                                 newurl = headers.getheaders('location')[0]
205                         elif 'uri' in headers:
206                                 newurl = headers.getheaders('uri')[0]
207                         e.newurl = newurl
208                 _moduleLogger.info("New url: %s" % e.newurl)
209                 raise e