From 5915d4f67d732a5fb15e7779f7db5956d7098a5b Mon Sep 17 00:00:00 2001 From: pycage Date: Wed, 6 Jan 2010 20:35:05 +0000 Subject: [PATCH] added module for decoding GSM-encoded strings git-svn-id: file:///svnroot/ussd-widget/trunk@4 d197f4d6-dc93-42ad-8354-0da1f58e353f --- ussd-common/src/usr/lib/python2.5/gsmdecode.py | 236 ++++++++++++++++++++++++ 1 file changed, 236 insertions(+) create mode 100644 ussd-common/src/usr/lib/python2.5/gsmdecode.py diff --git a/ussd-common/src/usr/lib/python2.5/gsmdecode.py b/ussd-common/src/usr/lib/python2.5/gsmdecode.py new file mode 100644 index 0000000..4fc1392 --- /dev/null +++ b/ussd-common/src/usr/lib/python2.5/gsmdecode.py @@ -0,0 +1,236 @@ +LANG_DE = 0x0 +LANG_EN = 0x1 +LANG_IT = 0x2 +LANG_FR = 0x3 +LANG_ES = 0x4 +LANG_NL = 0x5 +LANG_SE = 0x6 +LANG_DA = 0x7 +LANG_PO = 0x8 +LANG_FI = 0x9 +LANG_NO = 0xa +LANG_GR = 0xb +LANG_TR = 0xc +LANG_UNSPECIFIED = 0xf + + +GSM_DEFAULT_ALPHABET = [ + u"@", + u"\u00a3", + u"$", + u"\u00a5", + u"\u00e8", + u"\u00e9", + u"\u00f9", + u"\u00ec", + u"\u00f2", + u"\u00c7", + u"\n", + u"\u00d8", + u"\u00f8", + u"\r", + u"\u00c5", + u"\u00e5", + + u"\u0394", + u"_", + u"\u03a6", + u"\u0393", + u"\u039b", + u"\u03a9", + u"\u03a0", + u"\u03a8", + u"\u03a3", + u"\u0398", + u"\u039e", + u" ", + u"\u00c6", + u"\u00e6", + u"\u00df", + u"\u00c9", + + u" ", + u"!", + u"\"", + u"#", + u"\u00a4", + u"%", + u"&", + u"'", + u"(", + u")", + u"*", + u"+", + u",", + u"-", + u".", + u"/", + + u"0", + u"1", + u"2", + u"3", + u"4", + u"5", + u"6", + u"7", + u"8", + u"9", + u":", + u";", + u"<", + u"=", + u">", + u"?", + + u"\u00a1", + u"A", + u"B", + u"C", + u"D", + u"E", + u"F", + u"G", + u"H", + u"I", + u"J", + u"K", + u"L", + u"M", + u"N", + u"O", + + u"P", + u"Q", + u"R", + u"S", + u"T", + u"U", + u"V", + u"W", + u"X", + u"Y", + u"Z", + u"\u00c4", + u"\u00d6", + u"\u00d1", + u"\u00dc", + u"§", + + u"\u00bf", + u"a", + u"b", + u"c", + u"d", + u"e", + u"f", + u"g", + u"h", + u"i", + u"j", + u"k", + u"l", + u"m", + u"n", + u"o", + + u"p", + u"q", + u"r", + u"s", + u"t", + u"u", + u"v", + u"w", + u"x", + u"y", + u"z", + u"\u00e4", + u"\u00f6", + u"\u00f1", + u"\u00fc", + u"\u00e0" +] + + +def decode(s, n): + """ + Decodes the given string using the given cell broadcast data coding scheme. + + @param s: string to decode + @param n: GSM cell broadcast data coding scheme + @return: UTF-8 string + """ + + # separate into nibbles + hbits = (n & 0xf0) >> 4 + lbits = (n & 0x0f) + + if (hbits == 0x0): + # language + return _decode_language(s, lbits) + + elif (0x1 <= hbits <= 0x3): + # reserved language + return s + + elif (0x4 <= hbits <= 0x7): + # general data coding indication + return _decode_general_data_coding(s, h, l) + + elif (0x8 <= hbits <= 0xe): + # reserved coding group + return s + + elif (hbits == 0xf): + # data coding / message handling + return s + + +def _decode_language(s, lang): + + return _decode_default_alphabet(s) + + +def _decode_default_alphabet(s): + + # TODO: we really might have to do 7 bit character unpacking here + + # ought to be all in the 7 bit GSM character map + chars = [ _GSM_DEFAULT_ALPHABET[ord(c)] for c in s ] + u_str = "".join(chars) + return u_str.encode("utf-8") + + +def _decode_hex(s): + + return s.decode("hex") + + +def _decode_usc2(s): + + return s.decode("hex").decode("utf-16-be").encode("utf-8") + + +def _decode_general_data_coding(s, h, l): + + is_compressed = (h & 0x2) + + alphabet = (l & 0xc) >> 2 + if (l == 0x0): + # default alphabet + return _decode_defaul_alphabet(s) + + elif (l == 0x1): + # 8 bit + # actually, encoding is user-defined, but let's assume hex'd ASCII + # for now + return _decode_hex(s) + + elif (l == 0x2): + # USC2 (16 bit, BE) + return _decode_usc2(s) + elif (l == 0x3): + # reserved + return s + -- 1.7.9.5