git.maemo.org Git - navit-package/blob - navit/linguistics.c

   1 #include <string.h>
   2 #include <stdio.h>
   3 #include <glib.h>
   4 #include "debug.h"
   5 #include "linguistics.h"
   6
   7 static const char *special[][3]={
   8 /* Capital Diacritics */
   9 /* ¨ Diaresis */
  10 {"Ä","A","AE"},
  11 {"Ö","O","OE"},
  12 {"Ü","U","UE"},
  13 /* ˝ Double Acute Accent */
  14 {"Ő","O"},
  15 {"Ű","U"},
  16 /* ´ Acute Accent */
  17 {"Á","A"},
  18 {"Ć","C"},
  19 {"É","E"},
  20 {"Í","I"},
  21 {"Ń","N"},
  22 {"Ó","O"},
  23 {"Ś","S"},
  24 {"Ú","U"},
  25 {"Ý","Y"},
  26 {"Ź","Z"},
  27 /* ˛ Ogonek */
  28 {"Ą","A"},
  29 {"Ę","E"},
  30 /* ˙ Dot */
  31 {"Ż","Z"},
  32 /* – Stroke */
  33 {"Ł","L"},
  34 /* ˚ Ring */
  35 {"Å","A","AA"},
  36 {"Ů","U"},
  37 /* ˇ Caron */
  38 {"Č","C"},
  39 {"Ď","D"},
  40 {"Ě","E"},
  41 {"Ň","N"},
  42 {"Ř","R"},
  43 {"Š","S"},
  44 {"Ť","T"},
  45 {"Ž","Z"},
  46 /* / Slash */
  47 {"Ø","O","OE"},
  48 /* ligatures */
  49 {"Æ","A","AE"},
  50 /* Small Diacritics */
  51 /* ¨ Diaresis */
  52 {"ä","a","ae"},
  53 {"ö","o","oe"},
  54 {"ü","u","ue"},
  55 /* ˝ Double Acute Accent */
  56 {"ő","o"},
  57 {"ű","u"},
  58 /* ´ Acute Accent */
  59 {"á","a"},
  60 {"ć","c"},
  61 {"é","e"},
  62 {"í","i"},
  63 {"ń","n"},
  64 {"ó","o"},
  65 {"ś","s"},
  66 {"ú","u"},
  67 {"ý","y"},
  68 {"ź","z"},
  69 /* ˛ Ogonek */
  70 {"ą","a"},
  71 {"ę","e"},
  72 /* ˙ Dot */
  73 {"ż","z"},
  74 /* – Stroke */
  75 {"ł","l"},
  76 /* ˚ Ring */
  77 {"ů","u"},
  78 {"å","a", "aa"},
  79 /* ˇ Caron */
  80 {"č","c"},
  81 {"ď","d"},
  82 {"ě","e"},
  83 {"Ň","N"},
  84 {"ř","r"},
  85 {"š","s"},
  86 {"ť","t"},
  87 {"ž","z"},
  88 /* / Slash */
  89 {"ø","o", "oe"},
  90 /* ligatures */
  91 {"æ","a","ae"},
  92 {"ß","s","ss"},
  93 };
  94
  95 char *
  96 linguistics_expand_special(char *str, int mode)
  97 {
  98         char *in=str;
  99         char *out,*ret;
 100         int found=0;
 101         out=ret=g_strdup(str);
 102         if (!mode)
 103                 return ret;
 104         while (*in) {
 105                 char *next=g_utf8_find_next_char(in, NULL);
 106                 int i,len=next-in;
 107                 int match=0;
 108                 if (len > 1) {
 109                         for (i = 0 ; i < sizeof(special)/sizeof(special[0]); i++) {
 110                                 const char *search=special[i][0];
 111                                 if (!strncmp(in,search,len)) {
 112                                         const char *replace=special[i][mode];
 113                                         if (replace) {
 114                                                 int replace_len=strlen(replace);
 115                                                 dbg_assert(replace_len <= len);
 116                                                 dbg(1,"found %s %s %d %s %d\n",in,search,len,replace,replace_len);
 117                                                 strcpy(out, replace);
 118                                                 out+=replace_len;
 119                                                 match=1;
 120                                                 break;
 121                                         }
 122                                 }
 123                         }
 124                 }
 125                 if (match) {
 126                         found=1;
 127                         in=next;
 128                 } else {
 129                         while (len-- > 0)
 130                                 *out++=*in++;
 131                 }
 132         }
 133         *out++='\0';
 134         if (!found) {
 135                 g_free(ret);
 136                 ret=NULL;
 137         }
 138         return ret;
 139 }
 140
 141 char *
 142 linguistics_next_word(char *str)
 143 {
 144         int len=strcspn(str, " -/()");
 145         if (!str[len] || !str[len+1])
 146                 return NULL;
 147         return str+len+1;
 148 }
 149
 150 void
 151 linguistics_init(void)
 152 {
 153 }