git.maemo.org Git - modest/blob - src/modest-text-utils.c

   1 /* Copyright (c) 2006, Nokia Corporation
   2  * All rights reserved.
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions are
   6  * met:
   7  *
   8  * * Redistributions of source code must retain the above copyright
   9  *   notice, this list of conditions and the following disclaimer.
  10  * * Redistributions in binary form must reproduce the above copyright
  11  *   notice, this list of conditions and the following disclaimer in the
  12  *   documentation and/or other materials provided with the distribution.
  13  * * Neither the name of the Nokia Corporation nor the names of its
  14  *   contributors may be used to endorse or promote products derived from
  15  *   this software without specific prior written permission.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
  18  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  19  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
  20  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
  21  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  22  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  23  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  24  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  25  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  26  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  27  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28  */
  29
  30
  31 #include <glib.h>
  32 #include <string.h>
  33 #include <stdlib.h>
  34 #include <glib/gi18n.h>
  35 #include <regex.h>
  36 #include "modest-text-utils.h"
  37
  38
  39 #ifdef HAVE_CONFIG_H
  40 #include <config.h>
  41 #endif /*HAVE_CONFIG_H */
  42
  43 /* private */
  44 static GString *get_next_line (const char *b, const gsize blen, const gchar * iter);
  45 static int get_indent_level (const char *l);
  46 static void unquote_line (GString * l);
  47 static void append_quoted (GString * buf, const int indent,
  48                            const GString * str, const int cutpoint);
  49 static int get_breakpoint_utf8 (const gchar * s, const gint indent,
  50                                 const gint limit);
  51 static int get_breakpoint_ascii (const gchar * s, const gint indent,
  52                                  const gint limit);
  53 static int get_breakpoint (const gchar * s, const gint indent,
  54                            const gint limit);
  55
  56 static GString *
  57 get_next_line (const gchar * b, const gsize blen, const gchar * iter)
  58 {
  59         GString *gs;
  60         const gchar *i0;
  61
  62         if (iter > b + blen)
  63                 return g_string_new("");
  64
  65         i0 = iter;
  66         while (iter[0]) {
  67                 if (iter[0] == '\n')
  68                         break;
  69                 iter++;
  70         }
  71         gs = g_string_new_len (i0, iter - i0);
  72         return gs;
  73 }
  74 static int
  75 get_indent_level (const char *l)
  76 {
  77         int indent = 0;
  78
  79         while (l[0]) {
  80                 if (l[0] == '>') {
  81                         indent++;
  82                         if (l[1] == ' ') {
  83                                 l++;
  84                         }
  85                 } else {
  86                         break;
  87                 }
  88                 l++;
  89
  90         }
  91
  92         /*      if we hit the signature marker "-- ", we return -(indent + 1). This
  93          *      stops reformatting.
  94          */
  95         if (strcmp (l, "-- ") == 0) {
  96                 return -1 - indent;
  97         } else {
  98                 return indent;
  99         }
 100 }
 101
 102 static void
 103 unquote_line (GString * l)
 104 {
 105         gchar *p;
 106
 107         p = l->str;
 108         while (p[0]) {
 109                 if (p[0] == '>') {
 110                         if (p[1] == ' ') {
 111                                 p++;
 112                         }
 113                 } else {
 114                         break;
 115                 }
 116                 p++;
 117         }
 118         g_string_erase (l, 0, p - l->str);
 119 }
 120
 121 static void
 122 append_quoted (GString * buf, int indent, const GString * str,
 123                const int cutpoint)
 124 {
 125         int i;
 126
 127         indent = indent < 0 ? abs (indent) - 1 : indent;
 128         for (i = 0; i <= indent; i++) {
 129                 g_string_append (buf, "> ");
 130         }
 131         if (cutpoint > 0) {
 132                 g_string_append_len (buf, str->str, cutpoint);
 133         } else {
 134                 g_string_append (buf, str->str);
 135         }
 136         g_string_append (buf, "\n");
 137 }
 138
 139 static int
 140 get_breakpoint_utf8 (const gchar * s, gint indent, const gint limit)
 141 {
 142         gint index = 0;
 143         const gchar *pos, *last;
 144         gunichar *uni;
 145
 146         indent = indent < 0 ? abs (indent) - 1 : indent;
 147
 148         last = NULL;
 149         pos = s;
 150         uni = g_utf8_to_ucs4_fast (s, -1, NULL);
 151         while (pos[0]) {
 152                 if ((index + 2 * indent > limit) && last) {
 153                         g_free (uni);
 154                         return last - s;
 155                 }
 156                 if (g_unichar_isspace (uni[index])) {
 157                         last = pos;
 158                 }
 159                 pos = g_utf8_next_char (pos);
 160                 index++;
 161         }
 162         g_free (uni);
 163         return strlen (s);
 164 }
 165
 166 static int
 167 get_breakpoint_ascii (const gchar * s, const gint indent, const gint limit)
 168 {
 169         gint i, last;
 170
 171         last = strlen (s);
 172         if (last + 2 * indent < limit)
 173                 return last;
 174
 175         for (i = strlen (s); i > 0; i--) {
 176                 if (s[i] == ' ') {
 177                         if (i + 2 * indent <= limit) {
 178                                 return i;
 179                         } else {
 180                                 last = i;
 181                         }
 182                 }
 183         }
 184         return last;
 185 }
 186
 187 static int
 188 get_breakpoint (const gchar * s, const gint indent, const gint limit)
 189 {
 190
 191         if (g_utf8_validate (s, -1, NULL)) {
 192                 return get_breakpoint_utf8 (s, indent, limit);
 193         } else {                /* assume ASCII */
 194                 //g_warning("invalid UTF-8 in msg");
 195                 return get_breakpoint_ascii (s, indent, limit);
 196         }
 197 }
 198
 199
 200
 201 /* just to prevent warnings:
 202  * warning: `%x' yields only last 2 digits of year in some locales
 203  */
 204 size_t
 205 modest_text_utils_strftime(char *s, size_t max, const char  *fmt, const  struct tm *tm)
 206 {
 207         return strftime(s, max, fmt, tm);
 208 }
 209
 210 static gchar *
 211 cite (const time_t sent_date, const gchar *from)
 212 {
 213         gchar sent_str[101];
 214
 215         /* format sent_date */
 216         modest_text_utils_strftime (sent_str, 100, "%c", localtime (&sent_date));
 217         return g_strdup_printf (N_("On %s, %s wrote:\n"), sent_str, from);
 218 }
 219
 220
 221 gchar *
 222 modest_text_utils_quote (const gchar * to_quote, const gchar * from,
 223                          const time_t sent_date, const int limit)
 224 {
 225         const gchar *iter;
 226         gint indent, breakpoint, rem_indent = 0;
 227         GString *q, *l, *remaining;
 228         gsize len;
 229         gchar *tmp;
 230
 231         /* format sent_date */
 232         tmp = cite (sent_date, from);
 233         q = g_string_new (tmp);
 234         g_free (tmp);
 235
 236         /* remaining will store the rest of the line if we have to break it */
 237         remaining = g_string_new ("");
 238
 239         iter = to_quote;
 240         len = strlen(to_quote);
 241         do {
 242                 l = get_next_line (to_quote, len, iter);
 243                 iter = iter + l->len + 1;
 244                 indent = get_indent_level (l->str);
 245                 unquote_line (l);
 246
 247                 if (remaining->len) {
 248                         if (l->len && indent == rem_indent) {
 249                                 g_string_prepend (l, " ");
 250                                 g_string_prepend (l, remaining->str);
 251                         } else {
 252                                 do {
 253                                         breakpoint =
 254                                                 get_breakpoint (remaining->     str,
 255                                                                 rem_indent,
 256                                                                 limit);
 257                                         append_quoted (q, rem_indent,
 258                                                        remaining, breakpoint);
 259                                         g_string_erase (remaining, 0,
 260                                                         breakpoint);
 261                                         if (remaining->str[0] == ' ') {
 262                                                 g_string_erase (remaining, 0,
 263                                                                 1);
 264                                         }
 265                                 } while (remaining->len);
 266                         }
 267                 }
 268                 g_string_free (remaining, TRUE);
 269                 breakpoint = get_breakpoint (l->str, indent, limit);
 270                 remaining = g_string_new (l->str + breakpoint);
 271                 if (remaining->str[0] == ' ') {
 272                         g_string_erase (remaining, 0, 1);
 273                 }
 274                 rem_indent = indent;
 275                 append_quoted (q, indent, l, breakpoint);
 276                 g_string_free (l, TRUE);
 277         } while ((iter < to_quote + len) || (remaining->str[0]));
 278
 279         return g_string_free (q, FALSE);
 280 }
 281
 282
 283 gchar *
 284 modest_text_utils_derived_subject (const gchar *subject, const gchar *prefix)
 285 {
 286         gchar *tmp;
 287
 288         if (!subject)
 289                 return g_strdup_printf ("%s ", prefix);
 290
 291         tmp = g_strchug (g_strdup (subject));
 292
 293         if (!strncmp (tmp, prefix, strlen (prefix))) {
 294                 return tmp;
 295         } else {
 296                 g_free (tmp);
 297                 return g_strdup_printf ("%s %s", prefix, subject);
 298         }
 299 }
 300
 301
 302 gchar *
 303 modest_text_utils_cited_text (const gchar *from,
 304                                      time_t sent_date,
 305                                      const gchar *text)
 306 {
 307         gchar *tmp, *retval;
 308
 309         tmp = cite (sent_date, from);
 310         retval = g_strdup_printf ("%s%s\n", tmp, text);
 311         g_free (tmp);
 312
 313         return retval;
 314 }
 315
 316
 317 gchar *
 318 modest_text_utils_inlined_text (const gchar *from, time_t sent_date,
 319                                 const gchar *to, const gchar *subject,
 320                                 const gchar *text)
 321 {
 322         gchar sent_str[101];
 323
 324         modest_text_utils_strftime (sent_str, 100, "%c", localtime (&sent_date));
 325
 326         return g_strdup_printf ("%s\n%s %s\n%s %s\n%s %s\n%s %s\n\n%s",
 327                                 _("-----Forwarded Message-----"),
 328                                 _("From:"), from,
 329                                 _("Sent:"), sent_str,
 330                                 _("To:"), to,
 331                                 _("Subject:"), subject,
 332                                 text);
 333 }
 334
 335 gchar *
 336 modest_text_utils_remove_address (const gchar *address, const gchar *address_list)
 337 {
 338         char *dup, *token, *ptr, *result;
 339         GString *filtered_emails;
 340
 341         if (!address_list)
 342                 return NULL;
 343
 344         /* Search for substring */
 345         if (!strstr ((const char *) address_list, (const char *) address))
 346                 return g_strdup (address_list);
 347
 348         dup = g_strdup (address_list);
 349         filtered_emails = g_string_new (NULL);
 350
 351         token = strtok_r (dup, ",", &ptr);
 352
 353         while (token != NULL) {
 354                 /* Add to list if not found */
 355                 if (!strstr ((const char *) token, (const char *) address)) {
 356                         if (filtered_emails->len == 0)
 357                                 g_string_append_printf (filtered_emails, "%s", token);
 358                         else
 359                                 g_string_append_printf (filtered_emails, ",%s", token);
 360                 }
 361                 token = strtok_r (NULL, ",", &ptr);
 362         }
 363         result = filtered_emails->str;
 364
 365         /* Clean */
 366         g_free (dup);
 367         g_string_free (filtered_emails, FALSE);
 368
 369         return result;
 370 }
 371
 372
 373
 374
 375 /*
 376  * we need these regexps to find URLs in plain text e-mails
 377  */
 378 typedef struct _url_match_pattern_t url_match_pattern_t;
 379 struct _url_match_pattern_t {
 380         gchar   *regex;
 381         regex_t *preg;
 382         gchar   *prefix;
 383 };
 384
 385 typedef struct _url_match_t url_match_t;
 386 struct _url_match_t {
 387         guint offset;
 388         guint len;
 389         const gchar* prefix;
 390 };
 391
 392
 393 #define MAIL_VIEWER_URL_MATCH_PATTERNS  {                               \
 394         { "(file|rtsp|http|ftp|https)://[-A-Za-z0-9_$.+!*(),;:@%&=?/~#]+[-A-Za-z0-9_$%&=?/~#]",\
 395           NULL, NULL },\
 396         { "www\\.[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-A-Za-z0-9_$.+!*(),;:@%&=?/~#]*[^]}\\),?!;:\"]?)?",\
 397           NULL, "http://" },\
 398         { "ftp\\.[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-A-Za-z0-9_$.+!*(),;:@%&=?/~#]*[^]}\\),?!;:\"]?)?",\
 399           NULL, "ftp://" },\
 400         { "(voipto|callto|chatto|jabberto|xmpp):[-_a-z@0-9.\\+]+", \
 401            NULL, NULL},                                             \
 402         { "mailto:[-_a-z0-9.\\+]+@[-_a-z0-9.]+",                    \
 403           NULL, NULL},\
 404         { "[-_a-z0-9.\\+]+@[-_a-z0-9.]+",\
 405           NULL, "mailto:"}\
 406         }
 407
 408
 409 static gint
 410 cmp_offsets_reverse (const url_match_t *match1, const url_match_t *match2)
 411 {
 412         return match2->offset - match1->offset;
 413 }
 414
 415
 416
 417 /*
 418  * check if the match is inside an existing match... */
 419 static void
 420 chk_partial_match (const url_match_t *match, guint* offset)
 421 {
 422         if (*offset >= match->offset && *offset < match->offset + match->len)
 423                 *offset = -1;
 424 }
 425
 426 static GSList*
 427 get_url_matches (GString *txt)
 428 {
 429         regmatch_t rm;
 430         guint rv, i, offset = 0;
 431         GSList *match_list = NULL;
 432
 433         static url_match_pattern_t patterns[] = MAIL_VIEWER_URL_MATCH_PATTERNS;
 434         const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
 435
 436         /* initalize the regexps */
 437         for (i = 0; i != pattern_num; ++i) {
 438                 patterns[i].preg = g_new0 (regex_t,1);
 439                 g_assert(regcomp (patterns[i].preg, patterns[i].regex,
 440                                   REG_ICASE|REG_EXTENDED|REG_NEWLINE) == 0);
 441         }
 442         /* find all the matches */
 443         for (i = 0; i != pattern_num; ++i) {
 444                 offset     = 0;
 445                 while (1) {
 446                         int test_offset;
 447                         if ((rv = regexec (patterns[i].preg, txt->str + offset, 1, &rm, 0)) != 0) {
 448                                 g_assert (rv == REG_NOMATCH); /* this should not happen */
 449                                 break; /* try next regexp */
 450                         }
 451                         if (rm.rm_so == -1)
 452                                 break;
 453
 454                         /* FIXME: optimize this */
 455                         /* to avoid partial matches on something that was already found... */
 456                         /* check_partial_match will put -1 in the data ptr if that is the case */
 457                         test_offset = offset + rm.rm_so;
 458                         g_slist_foreach (match_list, (GFunc)chk_partial_match, &test_offset);
 459
 460                         /* make a list of our matches (<offset, len, prefix> tupels)*/
 461                         if (test_offset != -1) {
 462                                 url_match_t *match = g_new (url_match_t,1);
 463                                 match->offset = offset + rm.rm_so;
 464                                 match->len    = rm.rm_eo - rm.rm_so;
 465                                 match->prefix = patterns[i].prefix;
 466                                 match_list = g_slist_prepend (match_list, match);
 467                         }
 468                         offset += rm.rm_eo;
 469                 }
 470         }
 471
 472         for (i = 0; i != pattern_num; ++i) {
 473                 regfree (patterns[i].preg);
 474                 g_free  (patterns[i].preg);
 475         } /* don't free patterns itself -- it's static */
 476
 477         /* now sort the list, so the matches are in reverse order of occurence.
 478          * that way, we can do the replacements starting from the end, so we don't need
 479          * to recalculate the offsets
 480          */
 481         match_list = g_slist_sort (match_list,
 482                                    (GCompareFunc)cmp_offsets_reverse);
 483         return match_list;
 484 }
 485
 486
 487
 488 static void
 489 hyperlinkify_plain_text (GString *txt)
 490 {
 491         GSList *cursor;
 492         GSList *match_list = get_url_matches (txt);
 493
 494         /* we will work backwards, so the offsets stay valid */
 495         for (cursor = match_list; cursor; cursor = cursor->next) {
 496
 497                 url_match_t *match = (url_match_t*) cursor->data;
 498                 gchar *url  = g_strndup (txt->str + match->offset, match->len);
 499                 gchar *repl = NULL; /* replacement  */
 500
 501                 /* the prefix is NULL: use the one that is already there */
 502                 repl = g_strdup_printf ("<a href=\"%s%s\">%s</a>",
 503                                         match->prefix ? match->prefix : "", url, url);
 504
 505                 /* replace the old thing with our hyperlink
 506                  * replacement thing */
 507                 g_string_erase  (txt, match->offset, match->len);
 508                 g_string_insert (txt, match->offset, repl);
 509
 510                 g_free (url);
 511                 g_free (repl);
 512
 513                 g_free (cursor->data);
 514         }
 515
 516         g_slist_free (match_list);
 517 }
 518
 519
 520
 521 gchar*
 522 modest_text_utils_convert_to_html (const gchar *data)
 523 {
 524         guint            i;
 525         gboolean         first_space = TRUE;
 526         GString         *html;
 527         gsize           len;
 528
 529         if (!data)
 530                 return NULL;
 531
 532         len = strlen (data);
 533         html = g_string_sized_new (len + 100);  /* just a  guess... */
 534
 535         g_string_append_printf (html,
 536                                 "<html>"
 537                                 "<head>"
 538                                 "<meta http-equiv=\"content-type\""
 539                                 " content=\"text/html; charset=utf8\">"
 540                                 "</head>"
 541                                 "<body><tt>");
 542
 543         /* replace with special html chars where needed*/
 544         for (i = 0; i != len; ++i)  {
 545                 char    kar = data[i];
 546                 switch (kar) {
 547
 548                 case 0:  break; /* ignore embedded \0s */
 549                 case '<' : g_string_append   (html, "&lt;"); break;
 550                 case '>' : g_string_append   (html, "&gt;"); break;
 551                 case '&' : g_string_append   (html, "&quot;"); break;
 552                 case '\n': g_string_append   (html, "<br>\n"); break;
 553                 default:
 554                         if (kar == ' ') {
 555                                 g_string_append (html, first_space ? " " : "&nbsp;");
 556                                 first_space = FALSE;
 557                         } else  if (kar == '\t')
 558                                 g_string_append (html, "&nbsp; &nbsp;&nbsp;");
 559                         else {
 560                                 int charnum = 0;
 561                                 first_space = TRUE;
 562                                 /* optimization trick: accumulate 'normal' chars, then copy */
 563                                 do {
 564                                         kar = data [++charnum + i];
 565
 566                                 } while ((i + charnum < len) &&
 567                                          (kar > '>' || (kar != '<' && kar != '>'
 568                                                         && kar != '&' && kar !=  ' '
 569                                                         && kar != '\n' && kar != '\t')));
 570                                 g_string_append_len (html, &data[i], charnum);
 571                                 i += (charnum  - 1);
 572                         }
 573                 }
 574         }
 575
 576         g_string_append (html, "</tt></body></html>");
 577         hyperlinkify_plain_text (html);
 578
 579         return g_string_free (html, FALSE);
 580 }
 581
 582
 583