src/lib/lightmediascanner_charset_conv.c

Go to the documentation of this file.
00001 #include "lightmediascanner_charset_conv.h"
00002 #include <iconv.h>
00003 #include <stdio.h>
00004 #include <stdlib.h>
00005 #include <string.h>
00006 #include <errno.h>
00007 
00008 struct lms_charset_conv {
00009     iconv_t check;
00010     iconv_t fallback;
00011     unsigned int size;
00012     iconv_t *convs;
00013     char **names;
00014 };
00015 
00025 lms_charset_conv_t *
00026 lms_charset_conv_new(void)
00027 {
00028     lms_charset_conv_t *lcc;
00029 
00030     lcc = malloc(sizeof(*lcc));
00031     if (!lcc) {
00032         perror("malloc");
00033         return NULL;
00034     }
00035 
00036     lcc->check = iconv_open("UTF-8", "UTF-8");
00037     if (lcc->check == (iconv_t)-1) {
00038         perror("ERROR: could not create conversion checker");
00039         goto error_check;
00040     }
00041 
00042     lcc->fallback = iconv_open("UTF-8//IGNORE", "UTF-8");
00043     if (lcc->fallback == (iconv_t)-1) {
00044         perror("ERROR: could not create conversion fallback");
00045         goto error_fallback;
00046     }
00047 
00048     lcc->size = 0;
00049     lcc->convs = NULL;
00050     lcc->names = NULL;
00051     return lcc;
00052 
00053   error_fallback:
00054     iconv_close(lcc->check);
00055   error_check:
00056     free(lcc);
00057 
00058     return NULL;
00059 }
00060 
00066 void
00067 lms_charset_conv_free(lms_charset_conv_t *lcc)
00068 {
00069     int i;
00070 
00071     if (!lcc)
00072         return;
00073 
00074     iconv_close(lcc->check);
00075     iconv_close(lcc->fallback);
00076 
00077     for (i = 0; i < lcc->size; i++) {
00078         iconv_close(lcc->convs[i]);
00079         free(lcc->names[i]);
00080     }
00081 
00082     if (lcc->convs)
00083         free(lcc->convs);
00084     if (lcc->names)
00085         free(lcc->names);
00086     free(lcc);
00087 }
00088 
00097 int
00098 lms_charset_conv_add(lms_charset_conv_t *lcc, const char *charset)
00099 {
00100     iconv_t cd, *convs;
00101     char **names;
00102     int idx, ns;
00103 
00104     if (!lcc)
00105         return -1;
00106 
00107     if (!charset)
00108         return -2;
00109 
00110     cd = iconv_open("UTF-8", charset);
00111     if (cd == (iconv_t)-1) {
00112         fprintf(stderr, "ERROR: could not add conversion charset '%s': %s\n",
00113                 charset, strerror(errno));
00114         return -3;
00115     }
00116 
00117     idx = lcc->size;
00118     ns = lcc->size + 1;
00119 
00120     convs = realloc(lcc->convs, ns * sizeof(*convs));
00121     if (!convs)
00122         goto realloc_error;
00123     lcc->convs = convs;
00124     lcc->convs[idx] = cd;
00125 
00126     names = realloc(lcc->names, ns * sizeof(*names));
00127     if (!names)
00128         goto realloc_error;
00129     lcc->names = names;
00130     lcc->names[idx] = strdup(charset);
00131     if (!lcc->names[idx])
00132         goto realloc_error;
00133 
00134     lcc->size = ns;
00135     return 0;
00136 
00137   realloc_error:
00138     perror("realloc");
00139     iconv_close(cd);
00140     return -4;
00141 }
00142 
00143 static int
00144 _find(const lms_charset_conv_t *lcc, const char *charset)
00145 {
00146     int i;
00147 
00148     for (i = 0; i < lcc->size; i++)
00149         if (strcmp(lcc->names[i], charset) == 0)
00150             return i;
00151 
00152     return -1;
00153 }
00154 
00163 int
00164 lms_charset_conv_del(lms_charset_conv_t *lcc, const char *charset)
00165 {
00166     iconv_t *convs;
00167     char **names;
00168     int idx;
00169 
00170     if (!lcc)
00171         return -1;
00172 
00173     if (!charset)
00174         return -2;
00175 
00176     idx = _find(lcc, charset);
00177     if (idx < 0) {
00178         fprintf(stderr, "ERROR: could not find charset '%s'\n", charset);
00179         return -3;
00180     }
00181 
00182     iconv_close(lcc->convs[idx]);
00183     free(lcc->names[idx]);
00184 
00185     lcc->size--;
00186     for (; idx < lcc->size; idx++) {
00187         lcc->convs[idx] = lcc->convs[idx + 1];
00188         lcc->names[idx] = lcc->names[idx + 1];
00189     }
00190 
00191     convs = realloc(lcc->convs, lcc->size * sizeof(*convs));
00192     if (convs)
00193         lcc->convs = convs;
00194     else
00195         perror("could not realloc 'convs'");
00196 
00197     names = realloc(lcc->names, lcc->size * sizeof(*names));
00198     if (names)
00199         lcc->names = names;
00200     else
00201         perror("could not realloc 'names'");
00202 
00203     return 0;
00204 }
00205 
00206 static int
00207 _check(lms_charset_conv_t *lcc, const char *istr, unsigned int ilen, char *ostr, unsigned int olen)
00208 {
00209     char *inbuf, *outbuf;
00210     size_t r, inlen, outlen;
00211 
00212     inbuf = (char *)istr;
00213     inlen = ilen;
00214     outbuf = ostr;
00215     outlen = olen;
00216 
00217     iconv(lcc->check, NULL, NULL, NULL, NULL);
00218     r = iconv(lcc->check, &inbuf, &inlen, &outbuf, &outlen);
00219     if (r == (size_t)-1)
00220         return -1;
00221     else
00222         return 0;
00223 }
00224 
00225 static int
00226 _conv(iconv_t cd, char **p_str, unsigned int *p_len, char *ostr, unsigned int olen)
00227 {
00228     char *inbuf, *outbuf;
00229     size_t r, inlen, outlen;
00230 
00231     inbuf = *p_str;
00232     inlen = *p_len;
00233     outbuf = ostr;
00234     outlen = olen;
00235 
00236     iconv(cd, NULL, NULL, NULL, NULL);
00237     r = iconv(cd, &inbuf, &inlen, &outbuf, &outlen);
00238     if (r == (size_t)-1)
00239         return -1;
00240 
00241     *p_len = olen - outlen;
00242     free(*p_str);
00243     *p_str = ostr;
00244 
00245     outbuf = realloc(*p_str, *p_len + 1);
00246     if (!outbuf)
00247         perror("realloc");
00248     else
00249         *p_str = outbuf;
00250 
00251     (*p_str)[*p_len] = '\0';
00252 
00253     return 0;
00254 }
00255 
00265 int
00266 lms_charset_conv(lms_charset_conv_t *lcc, char **p_str, unsigned int *p_len)
00267 {
00268     char *outstr;
00269     int i, outlen;
00270 
00271     if (!lcc)
00272         return -1;
00273     if (!p_str)
00274         return -2;
00275     if (!p_len)
00276         return -3;
00277     if (!*p_str || !*p_len)
00278         return 0;
00279 
00280     outlen = 2 * *p_len;
00281     outstr = malloc(outlen + 1);
00282     if (!outstr) {
00283         perror("malloc");
00284         return -4;
00285     }
00286 
00287     if (_check(lcc, *p_str, *p_len, outstr, outlen) == 0) {
00288         free(outstr);
00289         return 0;
00290     }
00291 
00292     for (i = 0; i < lcc->size; i++)
00293         if (_conv(lcc->convs[i], p_str, p_len, outstr, outlen) == 0)
00294             return 0;
00295 
00296     fprintf(stderr,
00297             "WARNING: could not convert '%*s' to any charset, use fallback\n",
00298             *p_len, *p_str);
00299     i = _conv(lcc->fallback, p_str, p_len, outstr, outlen);
00300     if (i < 0) {
00301         memset(*p_str, '?', *p_len);
00302         free(outstr);
00303     }
00304     return i;
00305 }
00306 
00316 int
00317 lms_charset_conv_check(lms_charset_conv_t *lcc, const char *str, unsigned int len)
00318 {
00319     char *outstr;
00320     int r, outlen;
00321 
00322     if (!lcc)
00323         return -1;
00324     if (!str || !len)
00325         return 0;
00326 
00327     outlen = 2 * len;
00328     outstr = malloc(outlen);
00329     if (!outstr) {
00330         perror("malloc");
00331         return -2;
00332     }
00333 
00334     r = _check(lcc, str, len, outstr, outlen);
00335     free(outstr);
00336     return r;
00337 }

Generated on Thu Dec 13 02:04:03 2007 for Light Media Scanner by  doxygen 1.5.2