Imported version 0.4-1
[mstardict] / stardict-plugins / stardict-xdxf-parsedata-plugin / stardict_xdxf_parsedata.cpp
1 #include "stardict_xdxf_parsedata.h"
2 #include <glib/gi18n.h>
3
4 #ifdef _WIN32
5 #include <windows.h>
6 #endif
7
8 static size_t xml_strlen(const std::string& str)
9 {
10         const char *q;
11         static const char* xml_entrs[] = { "lt;", "gt;", "amp;", "apos;", "quot;", 0 };
12         static const int xml_ent_len[] = { 3,     3,     4,      5,       5 };
13         size_t cur_pos;
14         int i;
15
16         for (cur_pos = 0, q = str.c_str(); *q; ++cur_pos) {
17                 if (*q == '&') {
18                         for (i = 0; xml_entrs[i]; ++i)
19                                 if (strncmp(xml_entrs[i], q + 1,
20                                             xml_ent_len[i]) == 0) {
21                                         q += xml_ent_len[i] + 1;
22                                         break;
23                                 }
24                         if (xml_entrs[i] == NULL)
25                                 ++q;
26                 } else if (*q == '<') {
27                         const char *p = strchr(q+1, '>');
28                         if (p)
29                                 q = p + 1;
30                         else
31                                 ++q;
32                         --cur_pos;
33                 } else
34                         q = g_utf8_next_char(q);
35         }
36
37         return cur_pos;
38 }
39
40 static void xml_decode(const char *str, std::string& decoded)
41 {
42         static const char raw_entrs[] = { 
43                 '<',   '>',   '&',    '\'',    '\"',    0 
44         };
45         static const char* xml_entrs[] = { 
46                 "lt;", "gt;", "amp;", "apos;", "quot;", 0 
47         };
48         static const int xml_ent_len[] = { 
49                 3,     3,     4,      5,       5 
50         };
51         int ient;
52         const char *amp = strchr(str, '&');
53
54         if (amp == NULL) {
55                 decoded = str;
56                 return;
57         }
58         decoded.assign(str, amp - str);
59         
60         while (*amp)
61                 if (*amp == '&') {
62                         for (ient = 0; xml_entrs[ient] != 0; ++ient)
63                                 if (strncmp(amp + 1, xml_entrs[ient],
64                                             xml_ent_len[ient]) == 0) {
65                                         decoded += raw_entrs[ient];
66                                         amp += xml_ent_len[ient]+1;
67                                         break;
68                                 }
69                         if (xml_entrs[ient] == 0)    // unrecognized sequence
70                                 decoded += *amp++;
71
72                 } else {
73                         decoded += *amp++;
74                 }        
75 }
76
77 static void xdxf2result(const char *p, ParseResult &result)
78 {
79         LinksPosList links_list;
80         std::string res;
81         const char *tag, *next;
82         std::string name;
83         std::string::size_type cur_pos;
84         int i;
85
86         struct ReplaceTag {
87                 const char *match_;
88                 int match_len_;
89                 const char *replace_;
90                 int char_len_;
91         };
92         static const ReplaceTag replace_arr[] = {
93                 { "abr>", 4, "<span foreground=\"green\" style=\"italic\">", 0 },
94                 { "/abr>", 5, "</span>", 0 },
95                 { "b>", 2, "<b>", 0 },
96                 { "/b>", 3, "</b>", 0 },
97                 { "i>", 2, "<i>", 0  },
98                 { "/i>", 3, "</i>", 0 },
99                 { "sub>", 4, "<sub>", 0 },
100                 { "/sub>", 5, "</sub>", 0},
101                 { "sup>", 4, "<sup>", 0},
102                 { "/sup>", 5, "</sup>", 0},
103                 { "tt>", 3, "<tt>", 0},
104                 { "/tt>", 4, "</tt>", 0},
105                 { "big>", 4, "<big>", 0},
106                 { "/big>", 5, "</big>", 0},
107                 { "small>", 6, "<small>", 0},
108                 { "/small>", 7, "</small>", 0},
109                 { "tr>", 3, "<b>[", 1 },
110                 { "/tr>", 4, "]</b>", 1 },
111                 { "ex>", 3, "<span foreground=\"violet\">", 0 },
112                 { "/ex>", 4, "</span>", 0 },
113                 { "/c>", 3, "</span>", 0 },
114                 { NULL, 0, NULL },
115         };
116
117         bool is_first_k = true;
118         for (cur_pos = 0; *p && (tag = strchr(p, '<')) != NULL;) {
119                 //TODO: do not create chunk
120                 std::string chunk(p, tag - p);
121                 res += chunk;
122                 cur_pos += xml_strlen(chunk);
123
124                 p = tag;
125                 for (i = 0; replace_arr[i].match_; ++i)
126                         if (strncmp(replace_arr[i].match_, p + 1,
127                                                 replace_arr[i].match_len_) == 0) {
128                                 res += replace_arr[i].replace_;
129                                 p += 1 + replace_arr[i].match_len_;
130                                 cur_pos += replace_arr[i].char_len_;
131                                 goto cycle_end;
132                         }
133
134                 if (strncmp("k>", p + 1, 2) == 0) {
135                         next = strstr(p + 3, "</k>");
136                         if (next) {
137                                 if (is_first_k) {
138                                         is_first_k = false;
139                                         if (*(next + 4) == '\n')
140                                                 next++;
141                                 } else {
142                                         res += "<span foreground=\"blue\">";
143                                         std::string chunk(p+3, next-(p+3));
144                                         res += chunk;
145                                         size_t xml_len = xml_strlen(chunk);
146                                         cur_pos += xml_len;
147                                         res += "</span>";
148                                 }
149                                 p = next + sizeof("</k>") - 1;
150                         } else
151                                 p += sizeof("<k>") - 1;
152                 } else if (*(p + 1) == 'c' && (*(p + 2) == ' ' || *(p + 2) == '>')) {
153                         next = strchr(p, '>');
154                         if (!next) {
155                                 ++p;
156                                 continue;
157                         }
158                         name.assign(p + 1, next - p - 1);
159                         std::string::size_type pos = name.find("c=\"");
160                         if (pos != std::string::npos) {
161                                 pos += sizeof("c=\"") - 1;
162                                 std::string::size_type end_pos = name.find("\"", pos);
163                                 if (end_pos == std::string::npos)
164                                         end_pos = name.length();
165
166                                 std::string color(name, pos, end_pos - pos);
167                                 if (pango_color_parse(NULL, color.c_str()))
168                                         res += "<span foreground=\"" + color + "\">";
169                                 else
170                                         res += "<span>";
171                         } else
172                                 res += "<span foreground=\"blue\">";
173                         p = next + 1;
174                 } else if (*(p + 1) == 'r' && *(p + 2) == 'r' && *(p + 3) == 'e' && *(p + 4) == 'f' && (*(p + 5) == ' ' || *(p + 5) == '>')) {
175                         next = strchr(p, '>');
176                         if (!next) {
177                                 ++p;
178                                 continue;
179                         }
180                         name.assign(p + 1, next - p - 1);
181                         std::string type;
182                         std::string::size_type pos = name.find("type=\"");
183                         if (pos != std::string::npos) {
184                                 pos += sizeof("type=\"") - 1;
185                                 std::string::size_type end_pos = name.find("\"", pos);
186                                 if (end_pos == std::string::npos)
187                                         end_pos = name.length();
188                                 type.assign(name, pos, end_pos - pos);
189                         }
190                         p = next + 1;
191                         next = strstr(p, "</rref>");
192                         if (!next)
193                                 continue;
194                         std::string chunk(p, next - p);
195                         p = next + sizeof("</rref>") - 1;
196                         if (type.empty()) {
197                                 if (g_str_has_suffix(chunk.c_str(), ".jpg") || g_str_has_suffix(chunk.c_str(), ".png")) {
198                                         type = "image";
199                                 } else if (g_str_has_suffix(chunk.c_str(), ".wav") || g_str_has_suffix(chunk.c_str(), ".mp3") || g_str_has_suffix(chunk.c_str(), ".ogg")) {
200                                         type = "sound";
201                                 } else if (g_str_has_suffix(chunk.c_str(), ".avi") || g_str_has_suffix(chunk.c_str(), ".mpeg")) {
202                                         type = "video";
203                                 } else {
204                                         type = "attach";
205                                 }
206                         }
207                         ParseResultItem item;
208                         item.type = ParseResultItemType_link;
209                         item.link = new ParseResultLinkItem;
210                         item.link->pango = res;
211                         item.link->links_list = links_list;
212                         result.item_list.push_back(item);
213                         res.clear();
214                         cur_pos = 0;
215                         links_list.clear();
216                         item.type = ParseResultItemType_res;
217                         item.res = new ParseResultResItem;
218                         item.res->type = type;
219                         item.res->key = chunk;
220                         result.item_list.push_back(item);
221                 } else if ((*(p + 1) == 'k' || *(p + 1) == 'i') && *(p + 2) == 'r' && *(p + 3) == 'e' && *(p + 4) == 'f' && (*(p + 5) == ' ' || *(p + 5) == '>')) {
222                         bool is_k_or_i = (*(p + 1) == 'k');
223                         next = strchr(p, '>');
224                         if (!next) {
225                                 ++p;
226                                 continue;
227                         }
228                         name.assign(p + 1, next - p - 1);
229                         std::string key;
230                         std::string::size_type pos;
231                         if (is_k_or_i)
232                                 pos = name.find("k=\"");
233                         else
234                                 pos = name.find("href=\"");
235                         if (pos != std::string::npos) {
236                                 if (is_k_or_i)
237                                         pos += sizeof("k=\"") - 1;
238                                 else
239                                         pos += sizeof("href=\"") - 1;
240                                 std::string::size_type end_pos = name.find("\"", pos);
241                                 if (end_pos == std::string::npos)
242                                         end_pos = name.length();
243                                 key.assign(name, pos, end_pos - pos);
244                         }
245
246                         p = next + 1;
247                         if (is_k_or_i)
248                                 next = strstr(p, "</kref>");
249                         else
250                                 next = strstr(p, "</iref>");
251                         if (!next)
252                                 continue;
253
254                         res += "<span foreground=\"blue\" underline=\"single\">";
255                         std::string::size_type link_len = next - p;
256                         std::string chunk(p, link_len);
257                         size_t xml_len = xml_strlen(chunk);
258                         std::string xml_enc;
259                         if (key.empty())
260                                 xml_decode(chunk.c_str(), xml_enc);
261                         else
262                                 xml_decode(key.c_str(), xml_enc);
263                         std::string link;
264                         if (is_k_or_i)
265                                 link = "query://";
266                         link += xml_enc;
267                         links_list.push_back(LinkDesc(cur_pos, xml_len, link));
268                         res += chunk;
269                         cur_pos += xml_len;
270                         res += "</span>";
271                         if (is_k_or_i)
272                                 p = next + sizeof("</kref>") - 1;
273                         else
274                                 p = next + sizeof("</iref>") - 1;
275                 } else {
276                         next = strchr(p+1, '>');
277                         if (!next) {
278                                 p++;
279                                 res += "&lt;";
280                                 cur_pos++;
281                                 continue;
282                         }
283                         p = next + 1;
284                 }
285 cycle_end:
286                 ;
287         }
288         res += p;
289         ParseResultItem item;
290         item.type = ParseResultItemType_link;
291         item.link = new ParseResultLinkItem;
292         item.link->pango = res;
293         item.link->links_list = links_list;
294         result.item_list.push_back(item);
295 }
296
297 static bool parse(const char *p, unsigned int *parsed_size, ParseResult &result, const char *oword)
298 {
299         if (*p != 'x')
300                 return false;
301         p++;
302         size_t len = strlen(p);
303         if (len) {
304                 xdxf2result(p, result);
305         }
306         *parsed_size = 1 + len + 1;
307         return true;
308 }
309
310 static void configure()
311 {
312 }
313
314 DLLIMPORT bool stardict_plugin_init(StarDictPlugInObject *obj)
315 {
316         if (strcmp(obj->version_str, PLUGIN_SYSTEM_VERSION)!=0) {
317                 g_print("Error: XDXF data parsing plugin version doesn't match!\n");
318                 return true;
319         }
320         obj->type = StarDictPlugInType_PARSEDATA;
321         obj->info_xml = g_strdup_printf("<plugin_info><name>%s</name><version>1.0</version><short_desc>%s</short_desc><long_desc>%s</long_desc><author>Hu Zheng &lt;huzheng_001@163.com&gt;</author><website>http://stardict.sourceforge.net</website></plugin_info>", _("XDXF data parsing"), _("XDXF data parsing engine."), _("Parse the XDXF data."));
322         obj->configure_func = configure;
323         return false;
324 }
325
326 DLLIMPORT void stardict_plugin_exit(void)
327 {
328 }
329
330 DLLIMPORT bool stardict_parsedata_plugin_init(StarDictParseDataPlugInObject *obj)
331 {
332         obj->parse_func = parse;
333         g_print(_("XDXF data parsing plug-in loaded.\n"));
334         return false;
335 }
336
337 #ifdef _WIN32
338 BOOL APIENTRY DllMain (HINSTANCE hInst     /* Library instance handle. */ ,
339                        DWORD reason        /* Reason this function is being called. */ ,
340                        LPVOID reserved     /* Not used. */ )
341 {
342     switch (reason)
343     {
344       case DLL_PROCESS_ATTACH:
345         break;
346
347       case DLL_PROCESS_DETACH:
348         break;
349
350       case DLL_THREAD_ATTACH:
351         break;
352
353       case DLL_THREAD_DETACH:
354         break;
355     }
356
357     /* Returns TRUE on success, FALSE on failure */
358     return TRUE;
359 }
360 #endif