00001 #include "lightmediascanner_charset_conv.h" 00002 #include <iconv.h> 00003 #include <stdio.h> 00004 #include <stdlib.h> 00005 #include <string.h> 00006 #include <errno.h> 00007 00008 struct lms_charset_conv { 00009 iconv_t check; 00010 iconv_t fallback; 00011 unsigned int size; 00012 iconv_t *convs; 00013 char **names; 00014 }; 00015 00025 lms_charset_conv_t * 00026 lms_charset_conv_new(void) 00027 { 00028 lms_charset_conv_t *lcc; 00029 00030 lcc = malloc(sizeof(*lcc)); 00031 if (!lcc) { 00032 perror("malloc"); 00033 return NULL; 00034 } 00035 00036 lcc->check = iconv_open("UTF-8", "UTF-8"); 00037 if (lcc->check == (iconv_t)-1) { 00038 perror("ERROR: could not create conversion checker"); 00039 goto error_check; 00040 } 00041 00042 lcc->fallback = iconv_open("UTF-8//IGNORE", "UTF-8"); 00043 if (lcc->fallback == (iconv_t)-1) { 00044 perror("ERROR: could not create conversion fallback"); 00045 goto error_fallback; 00046 } 00047 00048 lcc->size = 0; 00049 lcc->convs = NULL; 00050 lcc->names = NULL; 00051 return lcc; 00052 00053 error_fallback: 00054 iconv_close(lcc->check); 00055 error_check: 00056 free(lcc); 00057 00058 return NULL; 00059 } 00060 00066 void 00067 lms_charset_conv_free(lms_charset_conv_t *lcc) 00068 { 00069 int i; 00070 00071 if (!lcc) 00072 return; 00073 00074 iconv_close(lcc->check); 00075 iconv_close(lcc->fallback); 00076 00077 for (i = 0; i < lcc->size; i++) { 00078 iconv_close(lcc->convs[i]); 00079 free(lcc->names[i]); 00080 } 00081 00082 if (lcc->convs) 00083 free(lcc->convs); 00084 if (lcc->names) 00085 free(lcc->names); 00086 free(lcc); 00087 } 00088 00097 int 00098 lms_charset_conv_add(lms_charset_conv_t *lcc, const char *charset) 00099 { 00100 iconv_t cd, *convs; 00101 char **names; 00102 int idx, ns; 00103 00104 if (!lcc) 00105 return -1; 00106 00107 if (!charset) 00108 return -2; 00109 00110 cd = iconv_open("UTF-8", charset); 00111 if (cd == (iconv_t)-1) { 00112 fprintf(stderr, "ERROR: could not add conversion charset '%s': %s\n", 00113 charset, strerror(errno)); 00114 return -3; 00115 } 00116 00117 idx = lcc->size; 00118 ns = lcc->size + 1; 00119 00120 convs = realloc(lcc->convs, ns * sizeof(*convs)); 00121 if (!convs) 00122 goto realloc_error; 00123 lcc->convs = convs; 00124 lcc->convs[idx] = cd; 00125 00126 names = realloc(lcc->names, ns * sizeof(*names)); 00127 if (!names) 00128 goto realloc_error; 00129 lcc->names = names; 00130 lcc->names[idx] = strdup(charset); 00131 if (!lcc->names[idx]) 00132 goto realloc_error; 00133 00134 lcc->size = ns; 00135 return 0; 00136 00137 realloc_error: 00138 perror("realloc"); 00139 iconv_close(cd); 00140 return -4; 00141 } 00142 00143 static int 00144 _find(const lms_charset_conv_t *lcc, const char *charset) 00145 { 00146 int i; 00147 00148 for (i = 0; i < lcc->size; i++) 00149 if (strcmp(lcc->names[i], charset) == 0) 00150 return i; 00151 00152 return -1; 00153 } 00154 00163 int 00164 lms_charset_conv_del(lms_charset_conv_t *lcc, const char *charset) 00165 { 00166 iconv_t *convs; 00167 char **names; 00168 int idx; 00169 00170 if (!lcc) 00171 return -1; 00172 00173 if (!charset) 00174 return -2; 00175 00176 idx = _find(lcc, charset); 00177 if (idx < 0) { 00178 fprintf(stderr, "ERROR: could not find charset '%s'\n", charset); 00179 return -3; 00180 } 00181 00182 iconv_close(lcc->convs[idx]); 00183 free(lcc->names[idx]); 00184 00185 lcc->size--; 00186 for (; idx < lcc->size; idx++) { 00187 lcc->convs[idx] = lcc->convs[idx + 1]; 00188 lcc->names[idx] = lcc->names[idx + 1]; 00189 } 00190 00191 convs = realloc(lcc->convs, lcc->size * sizeof(*convs)); 00192 if (convs) 00193 lcc->convs = convs; 00194 else 00195 perror("could not realloc 'convs'"); 00196 00197 names = realloc(lcc->names, lcc->size * sizeof(*names)); 00198 if (names) 00199 lcc->names = names; 00200 else 00201 perror("could not realloc 'names'"); 00202 00203 return 0; 00204 } 00205 00206 static int 00207 _check(lms_charset_conv_t *lcc, const char *istr, unsigned int ilen, char *ostr, unsigned int olen) 00208 { 00209 char *inbuf, *outbuf; 00210 size_t r, inlen, outlen; 00211 00212 inbuf = (char *)istr; 00213 inlen = ilen; 00214 outbuf = ostr; 00215 outlen = olen; 00216 00217 iconv(lcc->check, NULL, NULL, NULL, NULL); 00218 r = iconv(lcc->check, &inbuf, &inlen, &outbuf, &outlen); 00219 if (r == (size_t)-1) 00220 return -1; 00221 else 00222 return 0; 00223 } 00224 00225 static int 00226 _conv(iconv_t cd, char **p_str, unsigned int *p_len, char *ostr, unsigned int olen) 00227 { 00228 char *inbuf, *outbuf; 00229 size_t r, inlen, outlen; 00230 00231 inbuf = *p_str; 00232 inlen = *p_len; 00233 outbuf = ostr; 00234 outlen = olen; 00235 00236 iconv(cd, NULL, NULL, NULL, NULL); 00237 r = iconv(cd, &inbuf, &inlen, &outbuf, &outlen); 00238 if (r == (size_t)-1) 00239 return -1; 00240 00241 *p_len = olen - outlen; 00242 free(*p_str); 00243 *p_str = ostr; 00244 00245 outbuf = realloc(*p_str, *p_len + 1); 00246 if (!outbuf) 00247 perror("realloc"); 00248 else 00249 *p_str = outbuf; 00250 00251 (*p_str)[*p_len] = '\0'; 00252 00253 return 0; 00254 } 00255 00265 int 00266 lms_charset_conv(lms_charset_conv_t *lcc, char **p_str, unsigned int *p_len) 00267 { 00268 char *outstr; 00269 int i, outlen; 00270 00271 if (!lcc) 00272 return -1; 00273 if (!p_str) 00274 return -2; 00275 if (!p_len) 00276 return -3; 00277 if (!*p_str || !*p_len) 00278 return 0; 00279 00280 outlen = 2 * *p_len; 00281 outstr = malloc(outlen + 1); 00282 if (!outstr) { 00283 perror("malloc"); 00284 return -4; 00285 } 00286 00287 if (_check(lcc, *p_str, *p_len, outstr, outlen) == 0) { 00288 free(outstr); 00289 return 0; 00290 } 00291 00292 for (i = 0; i < lcc->size; i++) 00293 if (_conv(lcc->convs[i], p_str, p_len, outstr, outlen) == 0) 00294 return 0; 00295 00296 fprintf(stderr, 00297 "WARNING: could not convert '%*s' to any charset, use fallback\n", 00298 *p_len, *p_str); 00299 i = _conv(lcc->fallback, p_str, p_len, outstr, outlen); 00300 if (i < 0) { 00301 memset(*p_str, '?', *p_len); 00302 free(outstr); 00303 } 00304 return i; 00305 } 00306 00316 int 00317 lms_charset_conv_check(lms_charset_conv_t *lcc, const char *str, unsigned int len) 00318 { 00319 char *outstr; 00320 int r, outlen; 00321 00322 if (!lcc) 00323 return -1; 00324 if (!str || !len) 00325 return 0; 00326 00327 outlen = 2 * len; 00328 outstr = malloc(outlen); 00329 if (!outstr) { 00330 perror("malloc"); 00331 return -2; 00332 } 00333 00334 r = _check(lcc, str, len, outstr, outlen); 00335 free(outstr); 00336 return r; 00337 }