except urllib2.URLError, e:
warnings.warn(traceback.format_exc())
raise RuntimeError("%s is not accesible" % self._voicemailURL)
+ voicemailHtml = self._grab_html(voicemailPage)
+ parsedVoicemail = self._parse_voicemail(voicemailHtml)
+ decoratedVoicemails = self._decorate_voicemail(parsedVoicemail)
try:
smsPage = self._browser.download(self._smsURL)
except urllib2.URLError, e:
warnings.warn(traceback.format_exc())
raise RuntimeError("%s is not accesible" % self._smsURL)
+ smsHtml = self._grab_html(smsPage)
+ parsedSms = self._parse_sms(smsHtml)
+ decoratedSms = self._decorate_sms(parsedSms)
- voicemailHtml = self._grab_html(voicemailPage)
- parsedVoicemail = self._parse_voicemail(voicemailHtml)
- decoratedVoicemails = self._decorated_voicemail(parsedVoicemail)
-
- # @todo Parse this
- # smsHtml = self._grab_html(smsPage)
-
- allMessages = itertools.chain(decoratedVoicemails)
+ allMessages = itertools.chain(decoratedVoicemails, decoratedSms)
sortedMessages = list(allMessages)
for exactDate, header, number, relativeDate, message in sortedMessages:
yield header, number, relativeDate, message
splitVoicemail = self._seperateVoicemailsRegex.split(voicemailHtml)
for id, messageHtml in itergroup(splitVoicemail[1:], 2):
exactTimeGroup = self._exactVoicemailTimeRegex.search(messageHtml)
- exactTime = exactTimeGroup.group(1) if exactTimeGroup else ""
+ exactTime = exactTimeGroup.group(1).strip() if exactTimeGroup else ""
relativeTimeGroup = self._relativeVoicemailTimeRegex.search(messageHtml)
- relativeTime = relativeTimeGroup.group(1) if relativeTimeGroup else ""
+ relativeTime = relativeTimeGroup.group(1).strip() if relativeTimeGroup else ""
locationGroup = self._voicemailLocationRegex.search(messageHtml)
- location = locationGroup.group(1) if locationGroup else ""
+ location = locationGroup.group(1).strip() if locationGroup else ""
+
numberGroup = self._voicemailNumberRegex.search(messageHtml)
- number = numberGroup.group(1) if numberGroup else ""
+ number = numberGroup.group(1).strip() if numberGroup else ""
prettyNumberGroup = self._prettyVoicemailNumberRegex.search(messageHtml)
- prettyNumber = prettyNumberGroup.group(1) if prettyNumberGroup else ""
+ prettyNumber = prettyNumberGroup.group(1).strip() if prettyNumberGroup else ""
+
messageGroups = self._voicemailMessageRegex.finditer(messageHtml)
messageParts = (
- (group.group(1), group.group(2))
+ (group.group(1).strip(), group.group(2).strip())
for group in messageGroups
) if messageGroups else ()
+
yield {
- "id": id,
+ "id": id.strip(),
"time": exactTime,
"relTime": relativeTime,
"prettyNumber": prettyNumber,
"messageParts": messageParts,
}
- def _decorated_voicemail(self, parsedVoicemail):
+ def _decorate_voicemail(self, parsedVoicemail):
messagePartFormat = {
"med1": "<i>%s</i>",
"med2": "%s",
message = "No Transcription"
yield exactTime, header, voicemailData["number"], voicemailData["relTime"], message
+ _smsFromRegex = re.compile(r"""<span class="gc-message-sms-from">(.*?)</span>""", re.MULTILINE | re.DOTALL)
+ _smsTextRegex = re.compile(r"""<span class="gc-message-sms-time">(.*?)</span>""", re.MULTILINE | re.DOTALL)
+ _smsTimeRegex = re.compile(r"""<span class="gc-message-sms-text">(.*?)</span>""", re.MULTILINE | re.DOTALL)
+
+ def _parse_sms(self, smsHtml):
+ splitSms = self._seperateVoicemailsRegex.split(smsHtml)
+ for id, messageHtml in itergroup(splitSms[1:], 2):
+ exactTimeGroup = self._exactVoicemailTimeRegex.search(messageHtml)
+ exactTime = exactTimeGroup.group(1).strip() if exactTimeGroup else ""
+ relativeTimeGroup = self._relativeVoicemailTimeRegex.search(messageHtml)
+ relativeTime = relativeTimeGroup.group(1).strip() if relativeTimeGroup else ""
+
+ numberGroup = self._voicemailNumberRegex.search(messageHtml)
+ number = numberGroup.group(1).strip() if numberGroup else ""
+ prettyNumberGroup = self._prettyVoicemailNumberRegex.search(messageHtml)
+ prettyNumber = prettyNumberGroup.group(1).strip() if prettyNumberGroup else ""
+
+ fromGroups = self._smsFromRegex.finditer(messageHtml)
+ fromParts = (group.group(1).strip() for group in fromGroups)
+ textGroups = self._smsTextRegex.finditer(messageHtml)
+ textParts = (group.group(1).strip() for group in textGroups)
+ timeGroups = self._smsTimeRegex.finditer(messageHtml)
+ timeParts = (group.group(1).strip() for group in timeGroups)
+
+ messageParts = itertools.izip(fromParts, textParts, timeParts)
+
+ yield {
+ "id": id.strip(),
+ "time": exactTime,
+ "relTime": relativeTime,
+ "prettyNumber": prettyNumber,
+ "number": number,
+ "messageParts": messageParts,
+ }
+
+ def _decorate_sms(self, parsedSms):
+ for messageData in parsedSms:
+ exactTime = messageData["time"] # @todo Parse This
+ header = "%s" % (messageData["prettyNumber"])
+ number = messageData["number"]
+ relativeTime = messageData["relTime"]
+ message = "\n".join((
+ "<b>%s (%s)</b>: %s" % messagePart
+ for messagePart in messageData["messageParts"]
+ ))
+ if not message:
+ message = "No Transcription"
+ yield exactTime, header, number, relativeTime, message
+
def test_backend(username, password):
import pprint