git.maemo.org Git - modest/blob - src/modest-text-utils.c

   1 /* Copyright (c) 2006, Nokia Corporation
   2  * All rights reserved.
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions are
   6  * met:
   7  *
   8  * * Redistributions of source code must retain the above copyright
   9  *   notice, this list of conditions and the following disclaimer.
  10  * * Redistributions in binary form must reproduce the above copyright
  11  *   notice, this list of conditions and the following disclaimer in the
  12  *   documentation and/or other materials provided with the distribution.
  13  * * Neither the name of the Nokia Corporation nor the names of its
  14  *   contributors may be used to endorse or promote products derived from
  15  *   this software without specific prior written permission.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
  18  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  19  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
  20  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
  21  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  22  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  23  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  24  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  25  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  26  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  27  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28  */
  29
  30
  31 #include <glib.h>
  32 #include <string.h>
  33 #include <stdlib.h>
  34 #include <glib/gi18n.h>
  35 #include <regex.h>
  36 #include "modest-text-utils.h"
  37
  38
  39 #ifdef HAVE_CONFIG_H
  40 #include <config.h>
  41 #endif /*HAVE_CONFIG_H */
  42
  43 /* defines */
  44 #define FORWARD_STRING _("-----Forwarded Message-----")
  45 #define FROM_STRING _("From:")
  46 #define SENT_STRING _("Sent:")
  47 #define TO_STRING _("To:")
  48 #define SUBJECT_STRING _("Subject:")
  49
  50 /*
  51  * we need these regexps to find URLs in plain text e-mails
  52  */
  53 typedef struct _url_match_pattern_t url_match_pattern_t;
  54 struct _url_match_pattern_t {
  55         gchar   *regex;
  56         regex_t *preg;
  57         gchar   *prefix;
  58 };
  59
  60 typedef struct _url_match_t url_match_t;
  61 struct _url_match_t {
  62         guint offset;
  63         guint len;
  64         const gchar* prefix;
  65 };
  66
  67 #define MAIL_VIEWER_URL_MATCH_PATTERNS  {                               \
  68         { "(file|rtsp|http|ftp|https)://[-A-Za-z0-9_$.+!*(),;:@%&=?/~#]+[-A-Za-z0-9_$%&=?/~#]",\
  69           NULL, NULL },\
  70         { "www\\.[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-A-Za-z0-9_$.+!*(),;:@%&=?/~#]*[^]}\\),?!;:\"]?)?",\
  71           NULL, "http://" },\
  72         { "ftp\\.[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-A-Za-z0-9_$.+!*(),;:@%&=?/~#]*[^]}\\),?!;:\"]?)?",\
  73           NULL, "ftp://" },\
  74         { "(voipto|callto|chatto|jabberto|xmpp):[-_a-z@0-9.\\+]+", \
  75            NULL, NULL},                                             \
  76         { "mailto:[-_a-z0-9.\\+]+@[-_a-z0-9.]+",                    \
  77           NULL, NULL},\
  78         { "[-_a-z0-9.\\+]+@[-_a-z0-9.]+",\
  79           NULL, "mailto:"}\
  80         }
  81
  82 /* private */
  83 static gchar*   cite                    (const time_t sent_date, const gchar *from);
  84 static void     hyperlinkify_plain_text (GString *txt);
  85 static gint     cmp_offsets_reverse     (const url_match_t *match1, const url_match_t *match2);
  86 static void     chk_partial_match       (const url_match_t *match, guint* offset);
  87 static GSList*  get_url_matches         (GString *txt);
  88
  89 static GString* get_next_line           (const char *b, const gsize blen, const gchar * iter);
  90 static int      get_indent_level        (const char *l);
  91 static void     unquote_line            (GString * l);
  92 static void     append_quoted           (GString * buf, const int indent, const GString * str,
  93                                          const int cutpoint);
  94 static int      get_breakpoint_utf8     (const gchar * s, const gint indent, const gint limit);
  95 static int      get_breakpoint_ascii    (const gchar * s, const gint indent, const gint limit);
  96 static int      get_breakpoint          (const gchar * s, const gint indent, const gint limit);
  97
  98 static gchar*   modest_text_utils_quote_plain_text (const gchar *text,
  99                                                     const gchar *cite,
 100                                                     int limit);
 101
 102 static gchar*   modest_text_utils_quote_html       (const gchar *text,
 103                                                     const gchar *cite,
 104                                                     int limit);
 105
 106
 107 /* ******************************************************************* */
 108 /* ************************* PUBLIC FUNCTIONS ************************ */
 109 /* ******************************************************************* */
 110
 111 gchar *
 112 modest_text_utils_quote (const gchar *text,
 113                          const gchar *content_type,
 114                          const gchar *from,
 115                          const time_t sent_date,
 116                          int limit)
 117 {
 118         gchar *retval, *cited;
 119
 120         cited = cite (sent_date, from);
 121
 122         if (!strcmp (content_type, "text/html"))
 123                 /* TODO: extract the <body> of the HTML and pass it to
 124                    the function */
 125                 retval = modest_text_utils_quote_html (text, cited, limit);
 126         else
 127                 retval = modest_text_utils_quote_plain_text (text, cited, limit);
 128
 129         g_free (cited);
 130
 131         return retval;
 132 }
 133
 134
 135 gchar *
 136 modest_text_utils_cite (const gchar *text,
 137                         const gchar *content_type,
 138                         const gchar *from,
 139                         time_t sent_date)
 140 {
 141         gchar *tmp, *retval;
 142
 143         tmp = cite (sent_date, from);
 144         retval = g_strdup_printf ("%s%s\n", tmp, text);
 145         g_free (tmp);
 146
 147         return retval;
 148 }
 149
 150 gchar *
 151 modest_text_utils_inline (const gchar *text,
 152                           const gchar *content_type,
 153                           const gchar *from,
 154                           time_t sent_date,
 155                           const gchar *to,
 156                           const gchar *subject)
 157 {
 158         gchar sent_str[101];
 159         const gchar *plain_format = "%s\n%s %s\n%s %s\n%s %s\n%s %s\n\n%s";
 160         const gchar *html_format = \
 161                 "%s<br>\n<table width=\"100%\" border=\"0\" cellspacing=\"2\" cellpadding=\"2\">\n" \
 162                 "<tr><td>%s</td><td>%s</td></tr>\n" \
 163                 "<tr><td>%s</td><td>%s</td></tr>\n" \
 164                 "<tr><td>%s</td><td>%s</td></tr>\n" \
 165                 "<tr><td>%s</td><td>%s</td></tr>\n" \
 166                 "<br><br>%s";
 167         const gchar *format;
 168
 169         modest_text_utils_strftime (sent_str, 100, "%c", localtime (&sent_date));
 170
 171         if (!strcmp (content_type, "text/html"))
 172                 /* TODO: extract the <body> of the HTML and pass it to
 173                    the function */
 174                 format = html_format;
 175         else
 176                 format = plain_format;
 177
 178         return g_strdup_printf (format,
 179                                 FORWARD_STRING,
 180                                 FROM_STRING, from,
 181                                 SENT_STRING, sent_str,
 182                                 TO_STRING, to,
 183                                 SUBJECT_STRING, subject,
 184                                 text);
 185 }
 186
 187 /* just to prevent warnings:
 188  * warning: `%x' yields only last 2 digits of year in some locales
 189  */
 190 size_t
 191 modest_text_utils_strftime(char *s, size_t max, const char  *fmt, const  struct tm *tm)
 192 {
 193         return strftime(s, max, fmt, tm);
 194 }
 195
 196 gchar *
 197 modest_text_utils_derived_subject (const gchar *subject, const gchar *prefix)
 198 {
 199         gchar *tmp;
 200
 201         if (!subject)
 202                 return g_strdup_printf ("%s ", prefix);
 203
 204         tmp = g_strchug (g_strdup (subject));
 205
 206         if (!strncmp (tmp, prefix, strlen (prefix))) {
 207                 return tmp;
 208         } else {
 209                 g_free (tmp);
 210                 return g_strdup_printf ("%s %s", prefix, subject);
 211         }
 212 }
 213
 214 gchar *
 215 modest_text_utils_remove_address (const gchar *address_list, const gchar *address)
 216 {
 217         char *dup, *token, *ptr, *result;
 218         GString *filtered_emails;
 219
 220         if (!address_list)
 221                 return NULL;
 222
 223         /* Search for substring */
 224         if (!strstr ((const char *) address_list, (const char *) address))
 225                 return g_strdup (address_list);
 226
 227         dup = g_strdup (address_list);
 228         filtered_emails = g_string_new (NULL);
 229
 230         token = strtok_r (dup, ",", &ptr);
 231
 232         while (token != NULL) {
 233                 /* Add to list if not found */
 234                 if (!strstr ((const char *) token, (const char *) address)) {
 235                         if (filtered_emails->len == 0)
 236                                 g_string_append_printf (filtered_emails, "%s", token);
 237                         else
 238                                 g_string_append_printf (filtered_emails, ",%s", token);
 239                 }
 240                 token = strtok_r (NULL, ",", &ptr);
 241         }
 242         result = filtered_emails->str;
 243
 244         /* Clean */
 245         g_free (dup);
 246         g_string_free (filtered_emails, FALSE);
 247
 248         return result;
 249 }
 250
 251 gchar*
 252 modest_text_utils_convert_to_html (const gchar *data)
 253 {
 254         guint            i;
 255         gboolean         first_space = TRUE;
 256         GString         *html;
 257         gsize           len;
 258
 259         if (!data)
 260                 return NULL;
 261
 262         len = strlen (data);
 263         html = g_string_sized_new (len + 100);  /* just a  guess... */
 264
 265         g_string_append_printf (html,
 266                                 "<html>"
 267                                 "<head>"
 268                                 "<meta http-equiv=\"content-type\""
 269                                 " content=\"text/html; charset=utf8\">"
 270                                 "</head>"
 271                                 "<body><tt>");
 272
 273         /* replace with special html chars where needed*/
 274         for (i = 0; i != len; ++i)  {
 275                 char    kar = data[i];
 276                 switch (kar) {
 277
 278                 case 0:  break; /* ignore embedded \0s */
 279                 case '<' : g_string_append   (html, "&lt;"); break;
 280                 case '>' : g_string_append   (html, "&gt;"); break;
 281                 case '&' : g_string_append   (html, "&quot;"); break;
 282                 case '\n': g_string_append   (html, "<br>\n"); break;
 283                 default:
 284                         if (kar == ' ') {
 285                                 g_string_append (html, first_space ? " " : "&nbsp;");
 286                                 first_space = FALSE;
 287                         } else  if (kar == '\t')
 288                                 g_string_append (html, "&nbsp; &nbsp;&nbsp;");
 289                         else {
 290                                 int charnum = 0;
 291                                 first_space = TRUE;
 292                                 /* optimization trick: accumulate 'normal' chars, then copy */
 293                                 do {
 294                                         kar = data [++charnum + i];
 295
 296                                 } while ((i + charnum < len) &&
 297                                          (kar > '>' || (kar != '<' && kar != '>'
 298                                                         && kar != '&' && kar !=  ' '
 299                                                         && kar != '\n' && kar != '\t')));
 300                                 g_string_append_len (html, &data[i], charnum);
 301                                 i += (charnum  - 1);
 302                         }
 303                 }
 304         }
 305
 306         g_string_append (html, "</tt></body></html>");
 307         hyperlinkify_plain_text (html);
 308
 309         return g_string_free (html, FALSE);
 310 }
 311
 312 /* ******************************************************************* */
 313 /* ************************* UTILIY FUNCTIONS ************************ */
 314 /* ******************************************************************* */
 315
 316 static GString *
 317 get_next_line (const gchar * b, const gsize blen, const gchar * iter)
 318 {
 319         GString *gs;
 320         const gchar *i0;
 321
 322         if (iter > b + blen)
 323                 return g_string_new("");
 324
 325         i0 = iter;
 326         while (iter[0]) {
 327                 if (iter[0] == '\n')
 328                         break;
 329                 iter++;
 330         }
 331         gs = g_string_new_len (i0, iter - i0);
 332         return gs;
 333 }
 334 static int
 335 get_indent_level (const char *l)
 336 {
 337         int indent = 0;
 338
 339         while (l[0]) {
 340                 if (l[0] == '>') {
 341                         indent++;
 342                         if (l[1] == ' ') {
 343                                 l++;
 344                         }
 345                 } else {
 346                         break;
 347                 }
 348                 l++;
 349
 350         }
 351
 352         /*      if we hit the signature marker "-- ", we return -(indent + 1). This
 353          *      stops reformatting.
 354          */
 355         if (strcmp (l, "-- ") == 0) {
 356                 return -1 - indent;
 357         } else {
 358                 return indent;
 359         }
 360 }
 361
 362 static void
 363 unquote_line (GString * l)
 364 {
 365         gchar *p;
 366
 367         p = l->str;
 368         while (p[0]) {
 369                 if (p[0] == '>') {
 370                         if (p[1] == ' ') {
 371                                 p++;
 372                         }
 373                 } else {
 374                         break;
 375                 }
 376                 p++;
 377         }
 378         g_string_erase (l, 0, p - l->str);
 379 }
 380
 381 static void
 382 append_quoted (GString * buf, int indent, const GString * str,
 383                const int cutpoint)
 384 {
 385         int i;
 386
 387         indent = indent < 0 ? abs (indent) - 1 : indent;
 388         for (i = 0; i <= indent; i++) {
 389                 g_string_append (buf, "> ");
 390         }
 391         if (cutpoint > 0) {
 392                 g_string_append_len (buf, str->str, cutpoint);
 393         } else {
 394                 g_string_append (buf, str->str);
 395         }
 396         g_string_append (buf, "\n");
 397 }
 398
 399 static int
 400 get_breakpoint_utf8 (const gchar * s, gint indent, const gint limit)
 401 {
 402         gint index = 0;
 403         const gchar *pos, *last;
 404         gunichar *uni;
 405
 406         indent = indent < 0 ? abs (indent) - 1 : indent;
 407
 408         last = NULL;
 409         pos = s;
 410         uni = g_utf8_to_ucs4_fast (s, -1, NULL);
 411         while (pos[0]) {
 412                 if ((index + 2 * indent > limit) && last) {
 413                         g_free (uni);
 414                         return last - s;
 415                 }
 416                 if (g_unichar_isspace (uni[index])) {
 417                         last = pos;
 418                 }
 419                 pos = g_utf8_next_char (pos);
 420                 index++;
 421         }
 422         g_free (uni);
 423         return strlen (s);
 424 }
 425
 426 static int
 427 get_breakpoint_ascii (const gchar * s, const gint indent, const gint limit)
 428 {
 429         gint i, last;
 430
 431         last = strlen (s);
 432         if (last + 2 * indent < limit)
 433                 return last;
 434
 435         for (i = strlen (s); i > 0; i--) {
 436                 if (s[i] == ' ') {
 437                         if (i + 2 * indent <= limit) {
 438                                 return i;
 439                         } else {
 440                                 last = i;
 441                         }
 442                 }
 443         }
 444         return last;
 445 }
 446
 447 static int
 448 get_breakpoint (const gchar * s, const gint indent, const gint limit)
 449 {
 450
 451         if (g_utf8_validate (s, -1, NULL)) {
 452                 return get_breakpoint_utf8 (s, indent, limit);
 453         } else {                /* assume ASCII */
 454                 //g_warning("invalid UTF-8 in msg");
 455                 return get_breakpoint_ascii (s, indent, limit);
 456         }
 457 }
 458
 459 static gchar *
 460 cite (const time_t sent_date, const gchar *from)
 461 {
 462         gchar sent_str[101];
 463
 464         /* format sent_date */
 465         modest_text_utils_strftime (sent_str, 100, "%c", localtime (&sent_date));
 466         return g_strdup_printf (N_("On %s, %s wrote:\n"), sent_str, from);
 467 }
 468
 469
 470 static gchar *
 471 modest_text_utils_quote_plain_text (const gchar *text,
 472                                     const gchar *cite,
 473                                     int limit)
 474 {
 475         const gchar *iter;
 476         gint indent, breakpoint, rem_indent = 0;
 477         GString *q, *l, *remaining;
 478         gsize len;
 479         gchar *tmp;
 480
 481         /* remaining will store the rest of the line if we have to break it */
 482         q = g_string_new (cite);
 483         remaining = g_string_new ("");
 484
 485         iter = text;
 486         len = strlen(text);
 487         do {
 488                 l = get_next_line (text, len, iter);
 489                 iter = iter + l->len + 1;
 490                 indent = get_indent_level (l->str);
 491                 unquote_line (l);
 492
 493                 if (remaining->len) {
 494                         if (l->len && indent == rem_indent) {
 495                                 g_string_prepend (l, " ");
 496                                 g_string_prepend (l, remaining->str);
 497                         } else {
 498                                 do {
 499                                         breakpoint =
 500                                                 get_breakpoint (remaining->     str,
 501                                                                 rem_indent,
 502                                                                 limit);
 503                                         append_quoted (q, rem_indent,
 504                                                        remaining, breakpoint);
 505                                         g_string_erase (remaining, 0,
 506                                                         breakpoint);
 507                                         if (remaining->str[0] == ' ') {
 508                                                 g_string_erase (remaining, 0,
 509                                                                 1);
 510                                         }
 511                                 } while (remaining->len);
 512                         }
 513                 }
 514                 g_string_free (remaining, TRUE);
 515                 breakpoint = get_breakpoint (l->str, indent, limit);
 516                 remaining = g_string_new (l->str + breakpoint);
 517                 if (remaining->str[0] == ' ') {
 518                         g_string_erase (remaining, 0, 1);
 519                 }
 520                 rem_indent = indent;
 521                 append_quoted (q, indent, l, breakpoint);
 522                 g_string_free (l, TRUE);
 523         } while ((iter < text + len) || (remaining->str[0]));
 524
 525         return g_string_free (q, FALSE);
 526 }
 527
 528 static gchar*
 529 modest_text_utils_quote_html (const gchar *text,
 530                               const gchar *cite,
 531                               int limit)
 532 {
 533         const gchar *format = \
 534                 "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n" \
 535                 "<html>\n" \
 536                 "<body>\n" \
 537                 "%s" \
 538                 "<blockquote type=\"cite\">\n%s\n</blockquote>\n" \
 539                 "</body>\n" \
 540                 "</html>\n";
 541
 542         return g_strdup_printf (format, cite, text);
 543 }
 544
 545 static gint
 546 cmp_offsets_reverse (const url_match_t *match1, const url_match_t *match2)
 547 {
 548         return match2->offset - match1->offset;
 549 }
 550
 551
 552
 553 /*
 554  * check if the match is inside an existing match... */
 555 static void
 556 chk_partial_match (const url_match_t *match, guint* offset)
 557 {
 558         if (*offset >= match->offset && *offset < match->offset + match->len)
 559                 *offset = -1;
 560 }
 561
 562 static GSList*
 563 get_url_matches (GString *txt)
 564 {
 565         regmatch_t rm;
 566         guint rv, i, offset = 0;
 567         GSList *match_list = NULL;
 568
 569         static url_match_pattern_t patterns[] = MAIL_VIEWER_URL_MATCH_PATTERNS;
 570         const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
 571
 572         /* initalize the regexps */
 573         for (i = 0; i != pattern_num; ++i) {
 574                 patterns[i].preg = g_new0 (regex_t,1);
 575                 g_assert(regcomp (patterns[i].preg, patterns[i].regex,
 576                                   REG_ICASE|REG_EXTENDED|REG_NEWLINE) == 0);
 577         }
 578         /* find all the matches */
 579         for (i = 0; i != pattern_num; ++i) {
 580                 offset     = 0;
 581                 while (1) {
 582                         int test_offset;
 583                         if ((rv = regexec (patterns[i].preg, txt->str + offset, 1, &rm, 0)) != 0) {
 584                                 g_assert (rv == REG_NOMATCH); /* this should not happen */
 585                                 break; /* try next regexp */
 586                         }
 587                         if (rm.rm_so == -1)
 588                                 break;
 589
 590                         /* FIXME: optimize this */
 591                         /* to avoid partial matches on something that was already found... */
 592                         /* check_partial_match will put -1 in the data ptr if that is the case */
 593                         test_offset = offset + rm.rm_so;
 594                         g_slist_foreach (match_list, (GFunc)chk_partial_match, &test_offset);
 595
 596                         /* make a list of our matches (<offset, len, prefix> tupels)*/
 597                         if (test_offset != -1) {
 598                                 url_match_t *match = g_new (url_match_t,1);
 599                                 match->offset = offset + rm.rm_so;
 600                                 match->len    = rm.rm_eo - rm.rm_so;
 601                                 match->prefix = patterns[i].prefix;
 602                                 match_list = g_slist_prepend (match_list, match);
 603                         }
 604                         offset += rm.rm_eo;
 605                 }
 606         }
 607
 608         for (i = 0; i != pattern_num; ++i) {
 609                 regfree (patterns[i].preg);
 610                 g_free  (patterns[i].preg);
 611         } /* don't free patterns itself -- it's static */
 612
 613         /* now sort the list, so the matches are in reverse order of occurence.
 614          * that way, we can do the replacements starting from the end, so we don't need
 615          * to recalculate the offsets
 616          */
 617         match_list = g_slist_sort (match_list,
 618                                    (GCompareFunc)cmp_offsets_reverse);
 619         return match_list;
 620 }
 621
 622
 623
 624 static void
 625 hyperlinkify_plain_text (GString *txt)
 626 {
 627         GSList *cursor;
 628         GSList *match_list = get_url_matches (txt);
 629
 630         /* we will work backwards, so the offsets stay valid */
 631         for (cursor = match_list; cursor; cursor = cursor->next) {
 632
 633                 url_match_t *match = (url_match_t*) cursor->data;
 634                 gchar *url  = g_strndup (txt->str + match->offset, match->len);
 635                 gchar *repl = NULL; /* replacement  */
 636
 637                 /* the prefix is NULL: use the one that is already there */
 638                 repl = g_strdup_printf ("<a href=\"%s%s\">%s</a>",
 639                                         match->prefix ? match->prefix : "", url, url);
 640
 641                 /* replace the old thing with our hyperlink
 642                  * replacement thing */
 643                 g_string_erase  (txt, match->offset, match->len);
 644                 g_string_insert (txt, match->offset, repl);
 645
 646                 g_free (url);
 647                 g_free (repl);
 648
 649                 g_free (cursor->data);
 650         }
 651
 652         g_slist_free (match_list);
 653 }
 654
 655
 656
 657 gchar*
 658 modest_text_utils_display_address (gchar *address)
 659 {
 660         gchar *cursor;
 661
 662         if (!address)
 663                 return NULL;
 664
 665         g_return_val_if_fail (g_utf8_validate (address, -1, NULL), NULL);
 666
 667         /* simplistic --> remove <email@address> from display name */
 668         cursor = g_strstr_len (address, strlen(address), "<");
 669         if (cursor)
 670                 cursor[0]='\0';
 671
 672         /* simplistic --> remove (bla bla) from display name */
 673         cursor = g_strstr_len (address, strlen(address), "(");
 674         if (cursor)
 675                 cursor[0]='\0';
 676
 677         return address;
 678 }
 679