#include "linguistics.h"
static const char *special[][3]={
+/* Capital Diacritics */
+/* ¨ Diaresis */
{"Ä","A","AE"},
{"Ö","O","OE"},
{"Ü","U","UE"},
+/* ˝ Double Acute Accent */
{"Ő","O"},
{"Ű","U"},
+/* ´ Acute Accent */
{"Á","A"},
{"Ć","C"},
{"É","E"},
{"Ó","O"},
{"Ś","S"},
{"Ú","U"},
+{"Ý","Y"},
{"Ź","Z"},
+/* ˛ Ogonek */
{"Ą","A"},
{"Ę","E"},
+/* ˙ Dot */
{"Ż","Z"},
+/* – Stroke */
{"Ł","L"},
+/* ˚ Ring */
+{"Å","A","AA"},
+{"Ů","U"},
+/* ˇ Caron */
+{"Č","C"},
+{"Ď","D"},
+{"Ě","E"},
+{"Ň","N"},
+{"Ř","R"},
+{"Š","S"},
+{"Ť","T"},
+{"Ž","Z"},
+/* / Slash */
+{"Ø","O","OE"},
+/* ligatures */
+{"Æ","A","AE"},
+/* Small Diacritics */
+/* ¨ Diaresis */
{"ä","a","ae"},
{"ö","o","oe"},
{"ü","u","ue"},
+/* ˝ Double Acute Accent */
{"ő","o"},
{"ű","u"},
+/* ´ Acute Accent */
{"á","a"},
{"ć","c"},
{"é","e"},
{"ó","o"},
{"ś","s"},
{"ú","u"},
+{"ý","y"},
{"ź","z"},
+/* ˛ Ogonek */
{"ą","a"},
{"ę","e"},
+/* ˙ Dot */
{"ż","z"},
+/* – Stroke */
{"ł","l"},
+/* ˚ Ring */
+{"ů","u"},
+{"å","a", "aa"},
+/* ˇ Caron */
+{"č","c"},
+{"ď","d"},
+{"ě","e"},
+{"Ň","N"},
+{"ř","r"},
+{"š","s"},
+{"ť","t"},
+{"ž","z"},
+/* / Slash */
+{"ø","o", "oe"},
+/* ligatures */
+{"æ","a","ae"},
{"ß","s","ss"},
};
if (replace) {
int replace_len=strlen(replace);
dbg_assert(replace_len <= len);
- dbg(1,"found %s %s %s\n",in,search,replace);
+ dbg(1,"found %s %s %d %s %d\n",in,search,len,replace,replace_len);
strcpy(out, replace);
out+=replace_len;
match=1;
}
}
}
- in=next;
}
- if (match)
+ if (match) {
found=1;
- else {
+ in=next;
+ } else {
while (len-- > 0)
*out++=*in++;
}
}
return ret;
}
+
+char *
+linguistics_next_word(char *str)
+{
+ int len=strcspn(str, " -/()");
+ if (!str[len] || !str[len+1])
+ return NULL;
+ return str+len+1;
+}
+
+void
+linguistics_init(void)
+{
+}