2 ########################################################################
4 ## Copyright (C) 2009 MiM
6 ## Contact: Handspring <xhealer@gmail.com>
8 ## AUTHOR: Alsor Zhou <alsor.zhou@gmail.com>
10 ## This file is part of MiM Pinyin.
12 ## This is free software: you can redistribute it and/or modify
13 ## it under the terms of the GNU General Public License as published by
14 ## the Free Software Foundation, either version 3 of the License, or
15 ## (at your option) any later version.
17 ## This is distributed in the hope that it will be useful,
18 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ## GNU General Public License for more details.
22 ## You should have received a copy of the GNU General Public License
23 ## along with Sigil. If not, see <http://www.gnu.org/licenses/>.
25 ########################################################################
34 # Global ERROR DEFINATION
39 '''Print wrapper with debug function supported
41 Never use this function in production (always output) code '''
47 Copyright (C) 2009 MiM
49 Contact: Handspring <xhealer@gmail.com>
53 This file is part of MiM Pinyin.
55 This is free software: you can redistribute it and/or modify
56 it under the terms of the GNU General Public License as published by
57 the Free Software Foundation, either version 3 of the License, or
58 (at your option) any later version.
60 This is distributed in the hope that it will be useful,
61 but WITHOUT ANY WARRANTY; without even the implied warranty of
62 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
63 GNU General Public License for more details.
65 You should have received a copy of the GNU General Public License
66 along with Sigil. If not, see <http://www.gnu.org/licenses/>.
71 '''converter.py [options] SRC [options...DEST]
72 -s SRC : specify dictionary source
73 -t DEST : save converted binary map into DEST
74 -c SRC : syntax check SRC, without converstion
75 -d DEST : generate dummy dictionary bin map
78 --source SRC : same with '-s SRC'
79 --target DEST: same with '-t DEST'
80 --check SRC : same with '-c SRC'
81 --dummy DEST: same with '-d DEST'
86 '''MiM pinyin dictionary converter version 0.0.1 Handspring <xhealer@gmail.com>'''
89 # Target file segmentation layout
91 tgt_file_start_index = 0
92 tgt_header_start_offset = 0
95 tgt_table_a_offset = 300
96 tgt_table_b_offset = 400
98 tgt_global_position_ind = 0
100 # Example header fileds:
101 # Fn:dictionary.bin\n
104 # ActiveChunkTableFlag:A\n
105 # ChunkTableAOffset:300
106 # ChunkTableBOffset:400
109 tgt_header_delemitor_str = ":"
110 tgt_header_fn_str = "Fn"
111 tgt_header_version_str = "Ver"
112 tgt_header_author_str = "Authors"
113 tgt_header_actf_str = "ActiveChunkTableFlag"
114 tgt_header_ctao_str = "ChunkTableAOffset"
115 tgt_header_ctbo_str = "ChunkTableBOffset"
116 tgt_header_chunk_size_str= "ChunkSize"
117 tgt_header_crc_str = "CRC32"
119 # syllable array definition
120 tgt_sa_seperator = "," # symbol between syllable word
121 tgt_sa_delimitor = ":" # symbol between key and value
124 tgt_ctable_flag_offset = 0
125 tgt_ctable_flag_fld_siz = 2 # bytes
126 tgt_ctable_chk_base_offset = 2
127 tgt_ctable_chk_base_fld_size = 2 # bytes, 65535 maximize
128 tgt_ctable_chk_acroyn_fld_size = 2
129 tgt_ctable_chk_offset_fld_size = 2
130 tgt_ctable_chk_size_fld_size = 2
132 # Internal function definition
133 def _generate_header(fn, ver, authors, actf, ctao, ctbo, csize):
134 '''Generate target file header.
136 @param ver: dictionary version
137 @param authors: dictionary authors
138 @param actf: active chunk table flag (A/B)
139 @param ctao: chunk table A offset
140 @param ctbo: chunk table B offset
141 @param csize: chunk size (fixed)
143 @return header: header string with crc32 computed
147 header += tgt_header_fn_str + tgt_header_delemitor_str + fn + tgt_delimitor
148 header += tgt_header_version_str + tgt_header_delemitor_str + ver + tgt_delimitor
149 header += tgt_header_version_str + tgt_header_delemitor_str + authors + tgt_delimitor
150 crc32 = crc32(header); # FIXME: should we crc the timestamp?
151 header += tgt_header_version_str + tgt_header_delemitor_str + actf + tgt_delimitor
152 header += tgt_header_version_str + tgt_header_delemitor_str + ctao + tgt_delimitor
153 header += tgt_header_version_str + tgt_header_delemitor_str + ctbo + tgt_delimitor
154 header += tgt_header_version_str + tgt_header_delemitor_str + csize + tgt_delimitor
155 header += tgt_header_version_str + tgt_header_delemitor_str + crc32
157 PRINT(_generate_header.__doc__)
160 def _generate_st1_sa(safile):
161 '''Generate static table 1 - Syllabale Array.'''
170 # format is {"key1:value1","key2:value2",...,null}
171 sal = saf.split(tgt_sa_seperator)
173 # format is {key:value}
175 dict_obj[item.split(tgt_sa_delimitor)[0]] = item.split(tgt_sa_delimitor)[1]
178 PRINT(_generate_st1_sa.__doc__)
180 def _generate_st2_cst(cstfile):
181 '''Generate static table 2 - Character-Syllable ID Pair Table.'''
182 PRINT(_generate_st2_cst.__doc__)
184 def _generate_ctable_a():
185 '''Chunk Table A generation.
187 0------------2------------4--------6--------8------10-------12-------14----16
188 | Table flag | Chunk Base | Acroyn | Offset | Size | Acroyn | Offset | Size |
190 PRINT(_generate_ctable_a.__doc__)
192 # FIXME: chunk table B holds the same contents with A in file storage?
193 def _generate_ctable_b():
194 '''Chunk Table B generation.
196 0------------2------------4--------6--------8------10-------12-------14----16
197 | Table flag | Chunk Base | Acroyn | Offset | Size | Acroyn | Offset | Size |
199 PRINT(_generate_ctable_b.__doc__)
201 def _generate_dictionary():
203 Normally, target data file have only one dictionary map. Data integrity is
204 guaranteed by a temp chunk at runtime.
206 PRINT(_generate_dictionary.__doc__)
208 def gen_dummy_dict_binmap():
209 '''Generate dummy dictionary bin map.'''
211 _generate_header("dictionary.bin", "0.2", "Jackson", "A", 300, 400, 65535)
212 _generate_st1_sa("SyllableArraySource.txt")
213 PRINT(gen_dummy_dict_binmap.__doc__)
215 def convert(src, dest):
216 '''Convertion from original text format dictionary to binary map.
218 @param src : text format dictionary
219 @param dest: binary map dictionary
223 PRINT(convert.__doc__)
226 '''Check syntax format of orignal text format dictionary
228 @param src : text format dictionary
230 @return True without syntax error, False else.
235 '''Main business logic
237 @param argv : sys.argv[1:]
238 @return error code if any
241 # handle parameter parse
242 valid_args = "hvVt:c:s:d"
243 valid_long_args = ["help", "version", "source", "target", "check", "dummy"]
248 opts, args = getopt.getopt(argv, valid_args, valid_long_args)
249 except getopt.GetoptError, err:
257 if o in ("-s", "--source"):
259 assert False, "No dictionary source specified"
262 # no dest specified, use same filename as src to store file
264 basename = os.path.basename(src)
265 dest = os.path.splitext(basename)[0]
266 dest = os.path.join(dest, ".bin")
269 elif o in ("-t", "--target"):
271 elif o in ("-d", "--dummy"):
272 gen_dummy_dict_binmap()
273 elif o in ("-c", "--check"):
277 elif o in ("-h", "--help"):
280 elif o in ("-V", "--version"):
285 if __name__ == "__main__":