From ef732138bc3958985b1041e5f5005f6947851ced Mon Sep 17 00:00:00 2001 From: Ed Page Date: Sat, 12 Dec 2009 15:24:01 -0600 Subject: [PATCH] Improving the gvoice api --- src/channel/call.py | 2 +- src/gvoice/backend.py | 449 +++++++++++++++++++++++++++------------------ src/gvoice/browser_emu.py | 107 +++++++---- 3 files changed, 342 insertions(+), 216 deletions(-) diff --git a/src/channel/call.py b/src/channel/call.py index 0e2bebf..fd6cb94 100644 --- a/src/channel/call.py +++ b/src/channel/call.py @@ -57,7 +57,7 @@ class CallChannel( contactId, contactNumber = handle.ContactHandle.from_handle_name(contact.name) - self._conn.session.backend.dial(contactNumber) + self._conn.session.backend.call(contactNumber) streamId = 0 streamState = telepathy.constants.MEDIA_STREAM_STATE_DISCONNECTED diff --git a/src/gvoice/backend.py b/src/gvoice/backend.py index e63671e..18eb7f3 100755 --- a/src/gvoice/backend.py +++ b/src/gvoice/backend.py @@ -40,7 +40,8 @@ from xml.sax import saxutils from xml.etree import ElementTree try: - import simplejson + import simplejson as _simplejson + simplejson = _simplejson except ImportError: simplejson = None @@ -50,55 +51,6 @@ import browser_emu _moduleLogger = logging.getLogger("gvoice.backend") -def safe_eval(s): - _TRUE_REGEX = re.compile("true") - _FALSE_REGEX = re.compile("false") - s = _TRUE_REGEX.sub("True", s) - s = _FALSE_REGEX.sub("False", s) - return eval(s, {}, {}) - - -if simplejson is None: - def parse_json(flattened): - return safe_eval(flattened) -else: - def parse_json(flattened): - return simplejson.loads(flattened) - - -def itergroup(iterator, count, padValue = None): - """ - Iterate in groups of 'count' values. If there - aren't enough values, the last result is padded with - None. - - >>> for val in itergroup([1, 2, 3, 4, 5, 6], 3): - ... print tuple(val) - (1, 2, 3) - (4, 5, 6) - >>> for val in itergroup([1, 2, 3, 4, 5, 6], 3): - ... print list(val) - [1, 2, 3] - [4, 5, 6] - >>> for val in itergroup([1, 2, 3, 4, 5, 6, 7], 3): - ... print tuple(val) - (1, 2, 3) - (4, 5, 6) - (7, None, None) - >>> for val in itergroup("123456", 3): - ... print tuple(val) - ('1', '2', '3') - ('4', '5', '6') - >>> for val in itergroup("123456", 3): - ... print repr("".join(val)) - '123' - '456' - """ - paddedIterator = itertools.chain(iterator, itertools.repeat(padValue, count-1)) - nIterators = (paddedIterator, ) * count - return itertools.izip(*nIterators) - - class NetworkError(RuntimeError): pass @@ -109,14 +61,15 @@ class GVoiceBackend(object): the functions include login, setting up a callback number, and initalting a callback """ + PHONE_TYPE_HOME = 1 + PHONE_TYPE_MOBILE = 2 + PHONE_TYPE_WORK = 3 + PHONE_TYPE_GIZMO = 7 + def __init__(self, cookieFile = None): # Important items in this function are the setup of the browser emulation and cookie file self._browser = browser_emu.MozillaEmulator(1) - if cookieFile is None: - cookieFile = os.path.join(os.path.expanduser("~"), ".gv_cookies.txt") - self._browser.cookies.filename = cookieFile - if os.path.isfile(cookieFile): - self._browser.cookies.load() + self._loadedFromCookies = self._browser.load_cookies(cookieFile) self._token = "" self._accountNum = "" @@ -128,35 +81,54 @@ class GVoiceBackend(object): self._validateRe = re.compile("^[0-9]{10,}$") - self._forwardURL = "https://www.google.com/voice/mobile/phones" - self._tokenURL = "http://www.google.com/voice/m" self._loginURL = "https://www.google.com/accounts/ServiceLoginAuth" - self._galxRe = re.compile(r"""""", re.MULTILINE | re.DOTALL) - self._tokenRe = re.compile(r"""""") - self._accountNumRe = re.compile(r"""(.{14})""") - self._callbackRe = re.compile(r"""\s+(.*?):\s*(.*?)\s*$""", re.M) + + SECURE_URL_BASE = "https://www.google.com/voice/" + SECURE_MOBILE_URL_BASE = SECURE_URL_BASE + "mobile/" + self._forwardURL = SECURE_MOBILE_URL_BASE + "phones" + self._tokenURL = SECURE_URL_BASE + "m" + self._callUrl = SECURE_URL_BASE + "call/connect" + self._callCancelURL = SECURE_URL_BASE + "call/cancel" + self._sendSmsURL = SECURE_URL_BASE + "sms/send" self._isDndURL = "https://www.google.com/voice/m/donotdisturb" self._isDndRe = re.compile(r"""""") self._setDndURL = "https://www.google.com/voice/m/savednd" - self._gvDialingStrRe = re.compile("This may take a few seconds", re.M) - self._clicktocallURL = "https://www.google.com/voice/m/sendcall" - self._sendSmsURL = "https://www.google.com/voice/m/sendsms" + self._downloadVoicemailURL = SECURE_URL_BASE + "media/send_voicemail/" + + self._XML_SEARCH_URL = SECURE_URL_BASE + "inbox/search/" + self._XML_ACCOUNT_URL = SECURE_URL_BASE + "inbox/contacts/" + self._XML_RECENT_URL = SECURE_URL_BASE + "inbox/recent/" + + self.XML_FEEDS = ( + 'inbox', 'starred', 'all', 'spam', 'trash', 'voicemail', 'sms', + 'recorded', 'placed', 'received', 'missed' + ) + self._XML_INBOX_URL = SECURE_URL_BASE + "inbox/recent/inbox" + self._XML_STARRED_URL = SECURE_URL_BASE + "inbox/recent/starred" + self._XML_ALL_URL = SECURE_URL_BASE + "inbox/recent/all" + self._XML_SPAM_URL = SECURE_URL_BASE + "inbox/recent/spam" + self._XML_TRASH_URL = SECURE_URL_BASE + "inbox/recent/trash" + self._XML_VOICEMAIL_URL = SECURE_URL_BASE + "inbox/recent/voicemail/" + self._XML_SMS_URL = SECURE_URL_BASE + "inbox/recent/sms/" + self._XML_RECORDED_URL = SECURE_URL_BASE + "inbox/recent/recorded/" + self._XML_PLACED_URL = SECURE_URL_BASE + "inbox/recent/placed/" + self._XML_RECEIVED_URL = SECURE_URL_BASE + "inbox/recent/received/" + self._XML_MISSED_URL = SECURE_URL_BASE + "inbox/recent/missed/" + + self._contactsURL = SECURE_MOBILE_URL_BASE + "contacts" + self._contactDetailURL = SECURE_MOBILE_URL_BASE + "contact" - self._recentCallsURL = "https://www.google.com/voice/inbox/recent/" - self._placedCallsURL = "https://www.google.com/voice/inbox/recent/placed/" - self._receivedCallsURL = "https://www.google.com/voice/inbox/recent/received/" - self._missedCallsURL = "https://www.google.com/voice/inbox/recent/missed/" + self._galxRe = re.compile(r"""""", re.MULTILINE | re.DOTALL) + self._tokenRe = re.compile(r"""""") + self._accountNumRe = re.compile(r"""(.{14})""") + self._callbackRe = re.compile(r"""\s+(.*?):\s*(.*?)\s*$""", re.M) self._contactsRe = re.compile(r"""(.*?)""", re.S) self._contactsNextRe = re.compile(r""".*Next.*?""", re.S) - self._contactsURL = "https://www.google.com/voice/mobile/contacts" self._contactDetailPhoneRe = re.compile(r"""([0-9+\-\(\) \t]+?)\((\w+)\)""", re.S) - self._contactDetailURL = "https://www.google.com/voice/mobile/contact" - self._voicemailURL = "https://www.google.com/voice/inbox/recent/voicemail/" - self._smsURL = "https://www.google.com/voice/inbox/recent/sms/" self._seperateVoicemailsRegex = re.compile(r"""^\s*
""", re.MULTILINE | re.DOTALL) self._exactVoicemailTimeRegex = re.compile(r"""(.*?)""", re.MULTILINE) self._relativeVoicemailTimeRegex = re.compile(r"""(.*?)""", re.MULTILINE) @@ -164,7 +136,7 @@ class GVoiceBackend(object): self._voicemailNumberRegex = re.compile(r"""""", re.MULTILINE) self._prettyVoicemailNumberRegex = re.compile(r"""(.*?)""", re.MULTILINE) self._voicemailLocationRegex = re.compile(r""".*?(.*?)""", re.MULTILINE) - self._messagesContactID = re.compile(r""".*?\s*?(.*?)""", re.MULTILINE) + self._messagesContactIDRegex = re.compile(r""".*?\s*?(.*?)""", re.MULTILINE) self._voicemailMessageRegex = re.compile(r"""((.*?)|(.*?))""", re.MULTILINE) self._smsFromRegex = re.compile(r"""(.*?)""", re.MULTILINE | re.DOTALL) self._smsTimeRegex = re.compile(r"""(.*?)""", re.MULTILINE | re.DOTALL) @@ -176,26 +148,25 @@ class GVoiceBackend(object): @note Once logged in try not to reauth more than once a minute. @returns If authenticated """ - if (time.time() - self._lastAuthed) < 120 and not force: + isRecentledAuthed = (time.time() - self._lastAuthed) < 120 + isPreviouslyAuthed = self._token is not None + if isRecentledAuthed and isPreviouslyAuthed and not force: return True try: - page = self._browser.download(self._forwardURL) + page = self._get_page(self._forwardURL) self._grab_account_info(page) except Exception, e: _moduleLogger.exception(str(e)) return False - self._browser.cookies.save() + self._browser.save_cookies() self._lastAuthed = time.time() return True def _get_token(self): - try: - tokenPage = self._browser.download(self._tokenURL) - except urllib2.URLError, e: - _moduleLogger.exception("Translating error: %s" % str(e)) - raise NetworkError("%s is not accesible" % self._loginURL) + tokenPage = self._get_page(self._tokenURL) + galxTokens = self._galxRe.search(tokenPage) if galxTokens is not None: galxToken = galxTokens.group(1) @@ -205,7 +176,7 @@ class GVoiceBackend(object): return galxToken def _login(self, username, password, token): - loginPostData = urllib.urlencode({ + loginData = { 'Email' : username, 'Passwd' : password, 'service': "grandcentral", @@ -214,13 +185,9 @@ class GVoiceBackend(object): "PersistentCookie": "yes", "GALX": token, "continue": self._forwardURL, - }) + } - try: - loginSuccessOrFailurePage = self._browser.download(self._loginURL, loginPostData) - except urllib2.URLError, e: - _moduleLogger.exception("Translating error: %s" % str(e)) - raise NetworkError("%s is not accesible" % self._loginURL) + loginSuccessOrFailurePage = self._get_page(self._loginURL, loginData) return loginSuccessOrFailurePage def login(self, username, password): @@ -243,21 +210,18 @@ class GVoiceBackend(object): return False _moduleLogger.info("Redirection failed on initial login attempt, auto-corrected for this") - self._browser.cookies.save() + self._browser.save_cookies() self._lastAuthed = time.time() return True def logout(self): + self._browser.clear_cookies() + self._browser.save_cookies() + self._token = None self._lastAuthed = 0.0 - self._browser.cookies.clear() - self._browser.cookies.save() def is_dnd(self): - try: - isDndPage = self._browser.download(self._isDndURL) - except urllib2.URLError, e: - _moduleLogger.exception("Translating error: %s" % str(e)) - raise NetworkError("%s is not accesible" % self._isDndURL) + isDndPage = self._get_page(self._isDndURL) dndGroup = self._isDndRe.search(isDndPage) if dndGroup is None: @@ -267,60 +231,94 @@ class GVoiceBackend(object): return isDnd def set_dnd(self, doNotDisturb): - dndPostData = urllib.urlencode({ + dndPostData = { "doNotDisturb": 1 if doNotDisturb else 0, "_rnr_se": self._token, - }) + } - try: - dndPage = self._browser.download(self._setDndURL, dndPostData) - except urllib2.URLError, e: - _moduleLogger.exception("Translating error: %s" % str(e)) - raise NetworkError("%s is not accesible" % self._setDndURL) + dndPage = self._get_page(self._setDndURL, dndPostData) - def dial(self, number): + def call(self, outgoingNumber): """ This is the main function responsible for initating the callback """ - number = self._send_validation(number) - try: - clickToCallData = urllib.urlencode({ - "number": number, - "phone": self._callbackNumber, - "_rnr_se": self._token, - }) - otherData = { - 'Referer' : 'https://google.com/voice/m/callsms', - } - callSuccessPage = self._browser.download(self._clicktocallURL, clickToCallData, None, otherData) - except urllib2.URLError, e: - _moduleLogger.exception("Translating error: %s" % str(e)) - raise NetworkError("%s is not accesible" % self._clicktocallURL) + outgoingNumber = self._send_validation(outgoingNumber) + subscriberNumber = None + phoneType = guess_phone_type(self._callbackNumber) # @todo Fix this hack + + page = self._get_page_with_token( + self._callUrl, + { + 'outgoingNumber': outgoingNumber, + 'forwardingNumber': self._callbackNumber, + 'subscriberNumber': subscriberNumber or 'undefined', + 'phoneType': phoneType, + 'remember': '1' + }, + ) + self._parse_with_validation(page) + return True - if self._gvDialingStrRe.search(callSuccessPage) is None: - raise RuntimeError("Google Voice returned an error") + def cancel(self, outgoingNumber=None): + """ + Cancels a call matching outgoing and forwarding numbers (if given). + Will raise an error if no matching call is being placed + """ + page = self._get_page_with_token( + self._callCancelURL, + { + 'outgoingNumber': outgoingNumber or 'undefined', + 'forwardingNumber': self._callbackNumber or 'undefined', + 'cancelType': 'C2C', + }, + ) + self._parse_with_validation(page) + + def send_sms(self, phoneNumber, message): + phoneNumber = self._send_validation(phoneNumber) + page = self._get_page_with_token( + self._sendSmsURL, + { + 'phoneNumber': phoneNumber, + 'text': message + }, + ) + self._parse_with_validation(page) - return True + def search(self, query): + """ + Search your Google Voice Account history for calls, voicemails, and sms + Returns ``Folder`` instance containting matching messages + """ + page = self._get_page( + self._XML_SEARCH_URL, + {"q": query}, + ) + json, html = extract_payload(page) + return json - def send_sms(self, number, message): - number = self._send_validation(number) - try: - smsData = urllib.urlencode({ - "number": number, - "smstext": message, - "_rnr_se": self._token, - "id": "undefined", - "c": "undefined", - }) - otherData = { - 'Referer' : 'https://google.com/voice/m/sms', - } - smsSuccessPage = self._browser.download(self._sendSmsURL, smsData, None, otherData) - except urllib2.URLError, e: - _moduleLogger.exception("Translating error: %s" % str(e)) - raise NetworkError("%s is not accesible" % self._sendSmsURL) + def get_feed(self, feed): + actualFeed = "_XML_%s_URL" % feed.upper() + feedUrl = getattr(self, actualFeed) - return True + page = self._get_page(feedUrl) + json, html = extract_payload(page) + + return json + + def download(self, messageId, adir): + """ + Download a voicemail or recorded call MP3 matching the given ``msg`` + which can either be a ``Message`` instance, or a SHA1 identifier. + Saves files to ``adir`` (defaults to current directory). + Message hashes can be found in ``self.voicemail().messages`` for example. + Returns location of saved file. + """ + page = self._get_page(self._downloadVoicemailURL, {"id": messageId}) + fn = os.path.join(adir, '%s.mp3' % messageId) + with open(fn, 'wb') as fo: + fo.write(page) + return fn def is_valid_syntax(self, number): """ @@ -362,15 +360,11 @@ class GVoiceBackend(object): @returns Iterable of (personsName, phoneNumber, exact date, relative date, action) """ for action, url in ( - ("Received", self._receivedCallsURL), - ("Missed", self._missedCallsURL), - ("Placed", self._placedCallsURL), + ("Received", self._XML_RECEIVED_URL), + ("Missed", self._XML_MISSED_URL), + ("Placed", self._XML_PLACED_URL), ): - try: - flatXml = self._browser.download(url) - except urllib2.URLError, e: - _moduleLogger.exception("Translating error: %s" % str(e)) - raise NetworkError("%s is not accesible" % url) + flatXml = self._get_page(url) allRecentHtml = self._grab_html(flatXml) allRecentData = self._parse_voicemail(allRecentHtml) @@ -384,11 +378,7 @@ class GVoiceBackend(object): """ contactsPagesUrls = [self._contactsURL] for contactsPageUrl in contactsPagesUrls: - try: - contactsPage = self._browser.download(contactsPageUrl) - except urllib2.URLError, e: - _moduleLogger.exception("Translating error: %s" % str(e)) - raise NetworkError("%s is not accesible" % contactsPageUrl) + contactsPage = self._get_page(contactsPageUrl) for contact_match in self._contactsRe.finditer(contactsPage): contactId = contact_match.group(1) contactName = saxutils.unescape(contact_match.group(2)) @@ -404,11 +394,7 @@ class GVoiceBackend(object): """ @returns Iterable of (Phone Type, Phone Number) """ - try: - detailPage = self._browser.download(self._contactDetailURL + '/' + contactId) - except urllib2.URLError, e: - _moduleLogger.exception("Translating error: %s" % str(e)) - raise NetworkError("%s is not accesible" % self._contactDetailURL) + detailPage = self._get_page(self._contactDetailURL + '/' + contactId) for detail_match in self._contactDetailPhoneRe.finditer(detailPage): phoneNumber = detail_match.group(1) @@ -416,22 +402,14 @@ class GVoiceBackend(object): yield (phoneType, phoneNumber) def get_messages(self): - try: - voicemailPage = self._browser.download(self._voicemailURL) - except urllib2.URLError, e: - _moduleLogger.exception("Translating error: %s" % str(e)) - raise NetworkError("%s is not accesible" % self._voicemailURL) + voicemailPage = self._get_page(self._XML_VOICEMAIL_URL) voicemailHtml = self._grab_html(voicemailPage) voicemailJson = self._grab_json(voicemailPage) parsedVoicemail = self._parse_voicemail(voicemailHtml) voicemails = self._merge_messages(parsedVoicemail, voicemailJson) decoratedVoicemails = self._decorate_voicemail(voicemails) - try: - smsPage = self._browser.download(self._smsURL) - except urllib2.URLError, e: - _moduleLogger.exception("Translating error: %s" % str(e)) - raise NetworkError("%s is not accesible" % self._smsURL) + smsPage = self._get_page(self._XML_SMS_URL) smsHtml = self._grab_html(smsPage) smsJson = self._grab_json(smsPage) parsedSms = self._parse_sms(smsHtml) @@ -510,7 +488,7 @@ class GVoiceBackend(object): number = numberGroup.group(1).strip() if numberGroup else "" prettyNumberGroup = self._prettyVoicemailNumberRegex.search(messageHtml) prettyNumber = prettyNumberGroup.group(1).strip() if prettyNumberGroup else "" - contactIdGroup = self._messagesContactID.search(messageHtml) + contactIdGroup = self._messagesContactIDRegex.search(messageHtml) contactId = contactIdGroup.group(1).strip() if contactIdGroup else "" messageGroups = self._voicemailMessageRegex.finditer(messageHtml) @@ -565,7 +543,7 @@ class GVoiceBackend(object): number = numberGroup.group(1).strip() if numberGroup else "" prettyNumberGroup = self._prettyVoicemailNumberRegex.search(messageHtml) prettyNumber = prettyNumberGroup.group(1).strip() if prettyNumberGroup else "" - contactIdGroup = self._messagesContactID.search(messageHtml) + contactIdGroup = self._messagesContactIDRegex.search(messageHtml) contactId = contactIdGroup.group(1).strip() if contactIdGroup else "" fromGroups = self._smsFromRegex.finditer(messageHtml) @@ -604,6 +582,120 @@ class GVoiceBackend(object): message["isArchived"] = "inbox" not in jsonItem["labels"] yield message + def _get_page(self, url, data = None, refererUrl = None): + headers = {} + if refererUrl is not None: + headers["Referer"] = refererUrl + + encodedData = urllib.urlencode(data) if data is not None else None + + try: + page = self._browser.download(url, encodedData, None, headers) + except urllib2.URLError, e: + _moduleLogger.error("Translating error: %s" % str(e)) + raise NetworkError("%s is not accesible" % url) + + return page + + def _get_page_with_token(self, url, data = None, refererUrl = None): + if data is None: + data = {} + data['_rnr_se'] = self._token + + page = self._get_page(url, data, refererUrl) + + return page + + def _parse_with_validation(self, page): + json, html = extract_payload(page) + validate_response(json) + return json, html + + +def itergroup(iterator, count, padValue = None): + """ + Iterate in groups of 'count' values. If there + aren't enough values, the last result is padded with + None. + + >>> for val in itergroup([1, 2, 3, 4, 5, 6], 3): + ... print tuple(val) + (1, 2, 3) + (4, 5, 6) + >>> for val in itergroup([1, 2, 3, 4, 5, 6], 3): + ... print list(val) + [1, 2, 3] + [4, 5, 6] + >>> for val in itergroup([1, 2, 3, 4, 5, 6, 7], 3): + ... print tuple(val) + (1, 2, 3) + (4, 5, 6) + (7, None, None) + >>> for val in itergroup("123456", 3): + ... print tuple(val) + ('1', '2', '3') + ('4', '5', '6') + >>> for val in itergroup("123456", 3): + ... print repr("".join(val)) + '123' + '456' + """ + paddedIterator = itertools.chain(iterator, itertools.repeat(padValue, count-1)) + nIterators = (paddedIterator, ) * count + return itertools.izip(*nIterators) + + +def safe_eval(s): + _TRUE_REGEX = re.compile("true") + _FALSE_REGEX = re.compile("false") + s = _TRUE_REGEX.sub("True", s) + s = _FALSE_REGEX.sub("False", s) + return eval(s, {}, {}) + + +def _fake_parse_json(flattened): + return safe_eval(flattened) + + +def _actual_parse_json(flattened): + return simplejson.loads(flattened) + + +if simplejson is None: + parse_json = _fake_parse_json +else: + parse_json = _actual_parse_json + + +def extract_payload(flatXml): + xmlTree = ElementTree.fromstring(flatXml) + + jsonElement = xmlTree.getchildren()[0] + flatJson = jsonElement.text + jsonTree = parse_json(flatJson) + + htmlElement = xmlTree.getchildren()[1] + flatHtml = htmlElement.text + + return jsonTree, flatHtml + + +def validate_response(response): + """ + Validates that the JSON response is A-OK + """ + try: + assert 'ok' in response and response['ok'] + except AssertionError: + raise RuntimeError('There was a problem with GV: %s' % response) + + +def guess_phone_type(number): + if number.startswith("747") or number.startswith("1747"): + return GVDialer.PHONE_TYPE_GIZMO + else: + return GVDialer.PHONE_TYPE_MOBILE + def set_sane_callback(backend): """ @@ -749,13 +841,14 @@ def grab_debug_info(username, password): ("isdnd", backend._isDndURL), ("contacts", backend._contactsURL), - ("voicemail", backend._voicemailURL), - ("sms", backend._smsURL), + ("account", backend._XML_ACCOUNT_URL), + ("voicemail", backend._XML_VOICEMAIL_URL), + ("sms", backend._XML_SMS_URL), - ("recent", backend._recentCallsURL), - ("placed", backend._placedCallsURL), - ("recieved", backend._receivedCallsURL), - ("missed", backend._missedCallsURL), + ("recent", backend._XML_RECENT_URL), + ("placed", backend._XML_PLACED_URL), + ("recieved", backend._XML_RECEIVED_URL), + ("missed", backend._XML_MISSED_URL), ] # Get Pages @@ -811,5 +904,7 @@ def grab_debug_info(username, password): if __name__ == "__main__": import sys logging.basicConfig(level=logging.DEBUG) - #test_backend(sys.argv[1], sys.argv[2]) - grab_debug_info(sys.argv[1], sys.argv[2]) + if True: + grab_debug_info(sys.argv[1], sys.argv[2]) + else: + test_backend(sys.argv[1], sys.argv[2]) diff --git a/src/gvoice/browser_emu.py b/src/gvoice/browser_emu.py index c29d482..108fb5c 100644 --- a/src/gvoice/browser_emu.py +++ b/src/gvoice/browser_emu.py @@ -43,47 +43,40 @@ class MozillaEmulator(object): @param trycount: The download() method will retry the operation if it fails. You can specify -1 for infinite retrying. A value of 0 means no retrying. A value of 1 means one retry. etc.""" - self.cookies = cookielib.LWPCookieJar() self.debug = False self.trycount = trycount - - def build_opener(self, url, postdata = None, extraheaders = None, forbid_redirect = False): - if extraheaders is None: - extraheaders = {} - - txheaders = { - 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png', - 'Accept-Language': 'en,en-us;q=0.5', - 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', - } - for key, value in extraheaders.iteritems(): - txheaders[key] = value - req = urllib2.Request(url, postdata, txheaders) - self.cookies.add_cookie_header(req) - if forbid_redirect: - redirector = HTTPNoRedirector() + self._cookies = cookielib.LWPCookieJar() + self._loadedFromCookies = False + + def load_cookies(self, path): + assert not self._loadedFromCookies, "Load cookies only once" + if path is None: + return + + self._cookies.filename = path + try: + self._cookies.load() + except cookielib.LoadError: + _moduleLogger.exception("Bad cookie file") + except IOError: + _moduleLogger.exception("No cookie file") + except Exception, e: + _moduleLogger.exception("Unknown error with cookies") else: - redirector = urllib2.HTTPRedirectHandler() + self._loadedFromCookies = True - http_handler = urllib2.HTTPHandler(debuglevel=self.debug) - https_handler = urllib2.HTTPSHandler(debuglevel=self.debug) + return self._loadedFromCookies - u = urllib2.build_opener( - http_handler, - https_handler, - urllib2.HTTPCookieProcessor(self.cookies), - redirector - ) - u.addheaders = [( - 'User-Agent', - 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.8) Gecko/20050511 Firefox/1.0.4' - )] - if not postdata is None: - req.add_data(postdata) - return (req, u) + def save_cookies(self): + if self._loadedFromCookies: + self._cookies.save() + + def clear_cookies(self): + if self._loadedFromCookies: + self._cookies.clear() def download(self, url, - postdata = None, extraheaders = None, forbid_redirect = False, + postdata = None, extraheaders = None, forbidRedirect = False, trycount = None, only_head = False, ): """Download an URL with GET or POST methods. @@ -91,7 +84,7 @@ class MozillaEmulator(object): @param postdata: It can be a string that will be POST-ed to the URL. When None is given, the method will be GET instead. @param extraheaders: You can add/modify HTTP headers with a dict here. - @param forbid_redirect: Set this flag if you do not want to handle + @param forbidRedirect: Set this flag if you do not want to handle HTTP 301 and 302 redirects. @param trycount: Specify the maximum number of retries here. 0 means no retry on error. Using -1 means infinite retring. @@ -101,7 +94,7 @@ class MozillaEmulator(object): @return: The raw HTML page data """ - _moduleLogger.warning("Performing download of %s" % url) + _moduleLogger.info("Performing download of %s" % url) if extraheaders is None: extraheaders = {} @@ -111,13 +104,13 @@ class MozillaEmulator(object): while True: try: - req, u = self.build_opener(url, postdata, extraheaders, forbid_redirect) + req, u = self._build_opener(url, postdata, extraheaders, forbidRedirect) openerdirector = u.open(req) if self.debug: _moduleLogger.info("%r - %r" % (req.get_method(), url)) _moduleLogger.info("%r - %r" % (openerdirector.code, openerdirector.msg)) _moduleLogger.info("%r" % (openerdirector.headers)) - self.cookies.extract_cookies(openerdirector, req) + self._cookies.extract_cookies(openerdirector, req) if only_head: return openerdirector @@ -130,6 +123,43 @@ class MozillaEmulator(object): # Retry :-) _moduleLogger.info("MozillaEmulator: urllib2.URLError, retryting %d" % cnt) + def _build_opener(self, url, postdata = None, extraheaders = None, forbidRedirect = False): + if extraheaders is None: + extraheaders = {} + + txheaders = { + 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png', + 'Accept-Language': 'en,en-us;q=0.5', + 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', + } + for key, value in extraheaders.iteritems(): + txheaders[key] = value + req = urllib2.Request(url, postdata, txheaders) + self._cookies.add_cookie_header(req) + if forbidRedirect: + redirector = HTTPNoRedirector() + #_moduleLogger.info("Redirection disabled") + else: + redirector = urllib2.HTTPRedirectHandler() + #_moduleLogger.info("Redirection enabled") + + http_handler = urllib2.HTTPHandler(debuglevel=self.debug) + https_handler = urllib2.HTTPSHandler(debuglevel=self.debug) + + u = urllib2.build_opener( + http_handler, + https_handler, + urllib2.HTTPCookieProcessor(self._cookies), + redirector + ) + u.addheaders = [( + 'User-Agent', + 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.8) Gecko/20050511 Firefox/1.0.4' + )] + if not postdata is None: + req.add_data(postdata) + return (req, u) + def _read(self, openerdirector, trycount): chunks = [] @@ -163,4 +193,5 @@ class HTTPNoRedirector(urllib2.HTTPRedirectHandler): elif 'uri' in headers: newurl = headers.getheaders('uri')[0] e.newurl = newurl + _moduleLogger.info("New url: %s" % e.newurl) raise e -- 1.7.9.5