git.maemo.org Git - modest/blob - src/modest-text-utils.c

   1 /* Copyright (c) 2006, Nokia Corporation
   2  * All rights reserved.
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions are
   6  * met:
   7  *
   8  * * Redistributions of source code must retain the above copyright
   9  *   notice, this list of conditions and the following disclaimer.
  10  * * Redistributions in binary form must reproduce the above copyright
  11  *   notice, this list of conditions and the following disclaimer in the
  12  *   documentation and/or other materials provided with the distribution.
  13  * * Neither the name of the Nokia Corporation nor the names of its
  14  *   contributors may be used to endorse or promote products derived from
  15  *   this software without specific prior written permission.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
  18  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  19  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
  20  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
  21  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  22  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  23  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  24  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  25  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  26  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  27  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28  */
  29
  30
  31
  32 #ifndef _GNU_SOURCE
  33 #define _GNU_SOURCE
  34 #endif /*_GNU_SOURCE*/
  35 #include <string.h> /* for strcasestr */
  36
  37
  38 #include <glib.h>
  39 #include <stdlib.h>
  40 #include <glib/gi18n.h>
  41 #include <regex.h>
  42 #include <modest-tny-platform-factory.h>
  43 #include <modest-text-utils.h>
  44 #include <modest-runtime.h>
  45 #include <ctype.h>
  46
  47 #ifdef HAVE_CONFIG_H
  48 #include <config.h>
  49 #endif /*HAVE_CONFIG_H */
  50
  51 /* defines */
  52 #define FORWARD_STRING _("mcen_ia_editor_original_message")
  53 #define FROM_STRING _("mail_va_from")
  54 #define SENT_STRING _("mcen_fi_message_properties_sent")
  55 #define TO_STRING _("mail_va_to")
  56 #define SUBJECT_STRING _("mail_va_subject")
  57 #define EMPTY_STRING ""
  58
  59 /*
  60  * do the hyperlinkification only for texts < 50 Kb,
  61  * as it's quite slow. Without this, e.g. mail with
  62  * an uuencoded part (which is not recognized as attachment,
  63  * will hang modest
  64  */
  65 #define HYPERLINKIFY_MAX_LENGTH (1024*50)
  66
  67 /*
  68  * we need these regexps to find URLs in plain text e-mails
  69  */
  70 typedef struct _url_match_pattern_t url_match_pattern_t;
  71 struct _url_match_pattern_t {
  72         gchar   *regex;
  73         regex_t *preg;
  74         gchar   *prefix;
  75 };
  76
  77 typedef struct _url_match_t url_match_t;
  78 struct _url_match_t {
  79         guint offset;
  80         guint len;
  81         const gchar* prefix;
  82 };
  83
  84
  85 /*
  86  * we mark the ampersand with \007 when converting text->html
  87  * because after text->html we do hyperlink detecting, which
  88  * could be screwed up by the ampersand.
  89  * ie. 1<3 ==> 1\007lt;3
  90  */
  91 #define MARK_AMP '\007'
  92 #define MARK_AMP_STR "\007"
  93
  94 /* mark &amp; separately, because they are parts of urls.
  95  * ie. a&b => a\006amp;b, but a>b => a\007gt;b
  96  *
  97  * we need to handle '&' separately, because it can be part of URIs
  98  * (as in href="http://foo.bar?a=1&b=1"), so inside those URIs
  99  * we need to re-replace \006amp; with '&' again, while outside uri's
 100  * it will be '&amp;'
 101  *
 102  * yes, it's messy, but a consequence of doing text->html first, then hyperlinkify
 103  */
 104 #define MARK_AMP_URI '\006'
 105 #define MARK_AMP_URI_STR "\006"
 106
 107
 108 /* note: match MARK_AMP_URI_STR as well, because after txt->html, a '&' will look like $(MARK_AMP_URI_STR)"amp;" */
 109 #define MAIL_VIEWER_URL_MATCH_PATTERNS  {                               \
 110         { "(feed:|)(file|rtsp|http|ftp|https|mms|mmsh|webcal|feed|rtsp|rdp|lastfm|sip)://[-a-z0-9_$.+!*(),;:@%=\?/~#&" MARK_AMP_URI_STR \
 111                         "]+[-a-z0-9_$%&" MARK_AMP_URI_STR "=?/~#]",     \
 112           NULL, NULL },\
 113         { "www\\.[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]",\
 114                         NULL, "http://" },                              \
 115         { "ftp\\.[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]",\
 116           NULL, "ftp://" },\
 117         { "(jabberto|voipto|sipto|sip|chatto|skype|xmpp):[-_a-z@0-9.+]+", \
 118            NULL, NULL},                                             \
 119         { "mailto:[-_a-z0-9.\\+]+@[-_a-z0-9.]+",                    \
 120           NULL, NULL},\
 121         { "[-_a-z0-9.\\+]+@[-_a-z0-9.]+",\
 122           NULL, "mailto:"}\
 123         }
 124
 125 const gchar account_title_forbidden_chars[] = {
 126         '\\', '/', ':', '*', '?', '\'', '<', '>', '|', '^'
 127 };
 128 const gchar folder_name_forbidden_chars[] = {
 129         '<', '>', ':', '\'', '/', '\\', '|', '?', '*', '^', '%', '$', '#', '&'
 130 };
 131 const gchar user_name_forbidden_chars[] = {
 132         '<', '>'
 133 };
 134 const guint ACCOUNT_TITLE_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (account_title_forbidden_chars);
 135 const guint FOLDER_NAME_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (folder_name_forbidden_chars);
 136 const guint USER_NAME_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (user_name_forbidden_chars);
 137
 138 /* private */
 139 static gchar*   cite                    (const time_t sent_date, const gchar *from);
 140 static void     hyperlinkify_plain_text (GString *txt, gint offset);
 141 static gint     cmp_offsets_reverse     (const url_match_t *match1, const url_match_t *match2);
 142 static GSList*  get_url_matches         (GString *txt, gint offset);
 143
 144 static GString* get_next_line           (const char *b, const gsize blen, const gchar * iter);
 145 static int      get_indent_level        (const char *l);
 146 static void     unquote_line            (GString * l, const gchar *quote_symbol);
 147 static void     append_quoted           (GString * buf, const gchar *quote_symbol,
 148                                          const int indent, const GString * str,
 149                                          const int cutpoint);
 150 static int      get_breakpoint_utf8     (const gchar * s, const gint indent, const gint limit);
 151 static int      get_breakpoint_ascii    (const gchar * s, const gint indent, const gint limit);
 152 static int      get_breakpoint          (const gchar * s, const gint indent, const gint limit);
 153
 154 static gchar*   modest_text_utils_quote_plain_text (const gchar *text,
 155                                                     const gchar *cite,
 156                                                     const gchar *signature,
 157                                                     GList *attachments,
 158                                                     int limit);
 159
 160 static gchar*   modest_text_utils_quote_html       (const gchar *text,
 161                                                     const gchar *cite,
 162                                                     const gchar *signature,
 163                                                     GList *attachments,
 164                                                     int limit);
 165 static gchar*   get_email_from_address (const gchar *address);
 166
 167
 168 /* ******************************************************************* */
 169 /* ************************* PUBLIC FUNCTIONS ************************ */
 170 /* ******************************************************************* */
 171
 172 gchar *
 173 modest_text_utils_quote (const gchar *text,
 174                          const gchar *content_type,
 175                          const gchar *signature,
 176                          const gchar *from,
 177                          const time_t sent_date,
 178                          GList *attachments,
 179                          int limit)
 180 {
 181         gchar *retval, *cited;
 182
 183         g_return_val_if_fail (text, NULL);
 184         g_return_val_if_fail (content_type, NULL);
 185
 186         cited = cite (sent_date, from);
 187
 188         if (content_type && strcmp (content_type, "text/html") == 0)
 189                 /* TODO: extract the <body> of the HTML and pass it to
 190                    the function */
 191                 retval = modest_text_utils_quote_html (text, cited, signature, attachments, limit);
 192         else
 193                 retval = modest_text_utils_quote_plain_text (text, cited, signature, attachments, limit);
 194
 195         g_free (cited);
 196
 197         return retval;
 198 }
 199
 200
 201 gchar *
 202 modest_text_utils_cite (const gchar *text,
 203                         const gchar *content_type,
 204                         const gchar *signature,
 205                         const gchar *from,
 206                         time_t sent_date)
 207 {
 208         gchar *retval;
 209         gchar *tmp_sig;
 210
 211         g_return_val_if_fail (text, NULL);
 212         g_return_val_if_fail (content_type, NULL);
 213
 214         if (!signature) {
 215                 tmp_sig = g_strdup (text);
 216         } else {
 217                 tmp_sig = g_strconcat (text, "\n", MODEST_TEXT_UTILS_SIGNATURE_MARKER, "\n", signature, NULL);
 218         }
 219
 220         if (strcmp (content_type, "text/html") == 0) {
 221                 retval = modest_text_utils_convert_to_html_body (tmp_sig, -1, TRUE);
 222                 g_free (tmp_sig);
 223         } else {
 224                 retval = tmp_sig;
 225         }
 226
 227         return retval;
 228 }
 229
 230 static gchar *
 231 forward_cite (const gchar *from,
 232               const gchar *sent,
 233               const gchar *to,
 234               const gchar *subject)
 235 {
 236         g_return_val_if_fail (sent, NULL);
 237
 238         return g_strdup_printf ("%s\n%s %s\n%s %s\n%s %s\n%s %s\n",
 239                                 FORWARD_STRING,
 240                                 FROM_STRING, (from)?from:"",
 241                                 SENT_STRING, sent,
 242                                 TO_STRING, (to)?to:"",
 243                                 SUBJECT_STRING, (subject)?subject:"");
 244 }
 245
 246 gchar *
 247 modest_text_utils_inline (const gchar *text,
 248                           const gchar *content_type,
 249                           const gchar *signature,
 250                           const gchar *from,
 251                           time_t sent_date,
 252                           const gchar *to,
 253                           const gchar *subject)
 254 {
 255         gchar sent_str[101];
 256         gchar *cited;
 257         gchar *retval;
 258
 259         g_return_val_if_fail (text, NULL);
 260         g_return_val_if_fail (content_type, NULL);
 261
 262         modest_text_utils_strftime (sent_str, 100, "%c", sent_date);
 263
 264         cited = forward_cite (from, sent_str, to, subject);
 265
 266         if (content_type && strcmp (content_type, "text/html") == 0)
 267                 retval = modest_text_utils_quote_html (text, cited, signature, NULL, 80);
 268         else
 269                 retval = modest_text_utils_quote_plain_text (text, cited, signature, NULL, 80);
 270
 271         g_free (cited);
 272         return retval;
 273 }
 274
 275 /* just to prevent warnings:
 276  * warning: `%x' yields only last 2 digits of year in some locales
 277  */
 278 gsize
 279 modest_text_utils_strftime(char *s, gsize max, const char *fmt, time_t timet)
 280 {
 281         struct tm tm;
 282
 283         /* To prevent possible problems in strftime that could leave
 284            garbage in the s variable */
 285         if (s)
 286                 s[0] = '\0';
 287         else
 288                 return 0;
 289
 290         /* does not work on old maemo glib:
 291          *   g_date_set_time_t (&date, timet);
 292          */
 293         localtime_r (&timet, &tm);
 294         return strftime(s, max, fmt, &tm);
 295 }
 296
 297 gchar *
 298 modest_text_utils_derived_subject (const gchar *subject, const gchar *prefix)
 299 {
 300         gchar *tmp, *subject_dup, *retval;
 301         gint prefix_len;
 302
 303         g_return_val_if_fail (prefix, NULL);
 304
 305         if (!subject || subject[0] == '\0')
 306                 subject = _("mail_va_no_subject");
 307
 308         subject_dup = g_strdup (subject);
 309         tmp = g_strchug (subject_dup);
 310
 311         /* We do not want things like "Re: Re: Re:" or "Fw: Fw:" so
 312            delete the previous ones */
 313         prefix_len = strlen (prefix);
 314         do {
 315                 if (g_str_has_prefix (tmp, prefix)) {
 316                         tmp += prefix_len;
 317                         tmp = g_strchug (tmp);
 318                 } else {
 319                         gchar *prefix_down, *tmp_down;
 320
 321                         /* We need this to properly check the cases of
 322                            some clients adding FW: instead of Fw: for
 323                            example */
 324                         prefix_down = g_utf8_strdown (prefix, -1);
 325                         tmp_down = g_utf8_strdown (tmp, -1);
 326                         if (g_str_has_prefix (tmp_down, prefix_down)) {
 327                                 tmp += prefix_len;
 328                                 tmp = g_strchug (tmp);
 329                                 g_free (prefix_down);
 330                                 g_free (tmp_down);
 331                         } else {
 332                                 g_free (prefix_down);
 333                                 g_free (tmp_down);
 334                                 break;
 335                         }
 336                 }
 337         } while (tmp);
 338
 339         retval = g_strdup_printf ("%s %s", prefix, tmp);
 340         g_free (subject_dup);
 341
 342         return retval;
 343 }
 344
 345 gchar*
 346 modest_text_utils_remove_address (const gchar *address_list, const gchar *address)
 347 {
 348         gchar *dup, *token, *ptr = NULL, *result;
 349         GString *filtered_emails;
 350         gchar *email_address;
 351
 352         g_return_val_if_fail (address_list, NULL);
 353
 354         if (!address)
 355                 return g_strdup (address_list);
 356
 357         email_address = get_email_from_address (address);
 358
 359         /* search for substring */
 360         if (!strstr ((const char *) address_list, (const char *) email_address)) {
 361                 g_free (email_address);
 362                 return g_strdup (address_list);
 363         }
 364
 365         dup = g_strdup (address_list);
 366         filtered_emails = g_string_new (NULL);
 367
 368         token = strtok_r (dup, ",", &ptr);
 369
 370         while (token != NULL) {
 371                 /* Add to list if not found */
 372                 if (!strstr ((const char *) token, (const char *) email_address)) {
 373                         if (filtered_emails->len == 0)
 374                                 g_string_append_printf (filtered_emails, "%s", g_strstrip (token));
 375                         else
 376                                 g_string_append_printf (filtered_emails, ",%s", g_strstrip (token));
 377                 }
 378                 token = strtok_r (NULL, ",", &ptr);
 379         }
 380         result = filtered_emails->str;
 381
 382         /* Clean */
 383         g_free (email_address);
 384         g_free (dup);
 385         g_string_free (filtered_emails, FALSE);
 386
 387         return result;
 388 }
 389
 390
 391 gchar*
 392 modest_text_utils_remove_duplicate_addresses (const gchar *address_list)
 393 {
 394         GSList *addresses, *cursor;
 395         GHashTable *table;
 396         gchar *new_list = NULL;
 397
 398         g_return_val_if_fail (address_list, NULL);
 399
 400         table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
 401         addresses = modest_text_utils_split_addresses_list (address_list);
 402
 403         cursor = addresses;
 404         while (cursor) {
 405                 const gchar* address = (const gchar*)cursor->data;
 406
 407                 /* We need only the email to just compare it and not
 408                    the full address which would make "a <a@a.com>"
 409                    different from "a@a.com" */
 410                 const gchar *email = get_email_from_address (address);
 411
 412                 /* ignore the address if already seen */
 413                 if (g_hash_table_lookup (table, email) == 0) {
 414                         gchar *tmp;
 415
 416                         /* Include the full address and not only the
 417                            email in the returned list */
 418                         if (!new_list) {
 419                                 tmp = g_strdup (address);
 420                         } else {
 421                                 tmp = g_strjoin (",", new_list, address, NULL);
 422                                 g_free (new_list);
 423                         }
 424                         new_list = tmp;
 425
 426                         g_hash_table_insert (table, (gchar*)email, GINT_TO_POINTER(1));
 427                 }
 428                 cursor = g_slist_next (cursor);
 429         }
 430
 431         g_hash_table_unref (table);
 432         g_slist_foreach (addresses, (GFunc)g_free, NULL);
 433         g_slist_free (addresses);
 434
 435         if (new_list == NULL)
 436                 new_list = g_strdup ("");
 437
 438         return new_list;
 439 }
 440
 441
 442 static void
 443 modest_text_utils_convert_buffer_to_html_start (GString *html, const gchar *data, gssize n)
 444 {
 445         guint           i;
 446         gboolean        space_seen = FALSE;
 447         guint           break_dist = 0; /* distance since last break point */
 448
 449         if (n == -1)
 450                 n = strlen (data);
 451
 452         /* replace with special html chars where needed*/
 453         for (i = 0; i != n; ++i)  {
 454                 guchar kar = data[i];
 455
 456                 if (space_seen && kar != ' ') {
 457                         g_string_append (html, "&nbsp;");
 458                         space_seen = FALSE;
 459                 }
 460
 461                 /* we artificially insert a breakpoint (newline)
 462                  * after 256, to make sure our lines are not so long
 463                  * they will DOS the regexping later
 464                  * Also, check that kar is ASCII to make sure that we
 465                  * don't break a UTF8 char in two
 466                  */
 467                 if (++break_dist >= 256 && kar < 127) {
 468                         g_string_append_c (html, '\n');
 469                         break_dist = 0;
 470                 }
 471
 472                 switch (kar) {
 473                 case 0:
 474                 case MARK_AMP:
 475                 case MARK_AMP_URI:
 476                         /* this is a temp place holder for '&'; we can only
 477                                 * set the real '&' after hyperlink translation, otherwise
 478                                 * we might screw that up */
 479                         break; /* ignore embedded \0s and MARK_AMP */
 480                 case '<'  : g_string_append (html, MARK_AMP_STR "lt;");   break;
 481                 case '>'  : g_string_append (html, MARK_AMP_STR "gt;");   break;
 482                 case '&'  : g_string_append (html, MARK_AMP_URI_STR "amp;");  break; /* special case */
 483                 case '"'  : g_string_append (html, MARK_AMP_STR "quot;");  break;
 484
 485                 /* don't convert &apos; --> wpeditor will try to re-convert it... */
 486                 //case '\'' : g_string_append (html, "&apos;"); break;
 487                 case '\n' : g_string_append (html, "<br>\n");break_dist= 0; break;
 488                 case '\t' : g_string_append (html, MARK_AMP_STR "nbsp;" MARK_AMP_STR "nbsp;" MARK_AMP_STR "nbsp; ");
 489                         break_dist=0; break; /* note the space at the end*/
 490                 case ' ':
 491                         break_dist = 0;
 492                         if (space_seen) { /* second space in a row */
 493                                 g_string_append (html, "&nbsp; ");
 494                                 space_seen = FALSE;
 495                         } else
 496                                 space_seen = TRUE;
 497                         break;
 498                 default:
 499                         g_string_append_c (html, kar);
 500                 }
 501         }
 502 }
 503
 504
 505 static void
 506 modest_text_utils_convert_buffer_to_html_finish (GString *html)
 507 {
 508         int i;
 509         /* replace all our MARK_AMPs with real ones */
 510         for (i = 0; i != html->len; ++i)
 511                 if ((html->str)[i] == MARK_AMP || (html->str)[i] == MARK_AMP_URI)
 512                         (html->str)[i] = '&';
 513 }
 514
 515
 516 gchar*
 517 modest_text_utils_convert_to_html (const gchar *data)
 518 {
 519         GString         *html;
 520         gsize           len;
 521
 522         g_return_val_if_fail (data, NULL);
 523
 524         if (!data)
 525                 return NULL;
 526
 527         len = strlen (data);
 528         html = g_string_sized_new (1.5 * len);  /* just a  guess... */
 529
 530         g_string_append_printf (html,
 531                                 "<html><head>"
 532                                 "<meta http-equiv=\"content-type\" content=\"text/html; charset=utf8\">"
 533                                 "</head>"
 534                                 "<body>");
 535
 536         modest_text_utils_convert_buffer_to_html_start (html, data, -1);
 537
 538         g_string_append (html, "</body></html>");
 539
 540         if (len <= HYPERLINKIFY_MAX_LENGTH)
 541                 hyperlinkify_plain_text (html, 0);
 542
 543         modest_text_utils_convert_buffer_to_html_finish (html);
 544
 545         return g_string_free (html, FALSE);
 546 }
 547
 548 gchar *
 549 modest_text_utils_convert_to_html_body (const gchar *data, gssize n, gboolean hyperlinkify)
 550 {
 551         GString         *html;
 552
 553         g_return_val_if_fail (data, NULL);
 554
 555         if (!data)
 556                 return NULL;
 557
 558         if (n == -1)
 559                 n = strlen (data);
 560         html = g_string_sized_new (1.5 * n);    /* just a  guess... */
 561
 562         modest_text_utils_convert_buffer_to_html_start (html, data, n);
 563
 564         if (hyperlinkify && (n < HYPERLINKIFY_MAX_LENGTH))
 565                 hyperlinkify_plain_text (html, 0);
 566
 567         modest_text_utils_convert_buffer_to_html_finish (html);
 568
 569         return g_string_free (html, FALSE);
 570 }
 571
 572 void
 573 modest_text_utils_get_addresses_indexes (const gchar *addresses, GSList **start_indexes, GSList **end_indexes)
 574 {
 575         gchar *current, *start, *last_blank;
 576         gint start_offset = 0, current_offset = 0;
 577
 578         g_return_if_fail (start_indexes != NULL);
 579         g_return_if_fail (end_indexes != NULL);
 580
 581         start = (gchar *) addresses;
 582         current = start;
 583         last_blank = start;
 584
 585         while (*current != '\0') {
 586                 if ((start == current)&&((*current == ' ')||(*current == ',')||(*current == ';'))) {
 587                         start = g_utf8_next_char (start);
 588                         start_offset++;
 589                         last_blank = current;
 590                 } else if ((*current == ',')||(*current == ';')) {
 591                         gint *start_index, *end_index;
 592                         start_index = g_new0(gint, 1);
 593                         end_index = g_new0(gint, 1);
 594                         *start_index = start_offset;
 595                         *end_index = current_offset;
 596                         *start_indexes = g_slist_prepend (*start_indexes, start_index);
 597                         *end_indexes = g_slist_prepend (*end_indexes, end_index);
 598                         start = g_utf8_next_char (current);
 599                         start_offset = current_offset + 1;
 600                         last_blank = start;
 601                 } else if (*current == '"') {
 602                         current = g_utf8_next_char (current);
 603                         current_offset ++;
 604                         while ((*current != '"')&&(*current != '\0')) {
 605                                 current = g_utf8_next_char (current);
 606                                 current_offset ++;
 607                         }
 608                 }
 609
 610                 current = g_utf8_next_char (current);
 611                 current_offset ++;
 612         }
 613
 614         if (start != current) {
 615                         gint *start_index, *end_index;
 616                         start_index = g_new0(gint, 1);
 617                         end_index = g_new0(gint, 1);
 618                         *start_index = start_offset;
 619                         *end_index = current_offset;
 620                         *start_indexes = g_slist_prepend (*start_indexes, start_index);
 621                         *end_indexes = g_slist_prepend (*end_indexes, end_index);
 622         }
 623
 624         *start_indexes = g_slist_reverse (*start_indexes);
 625         *end_indexes = g_slist_reverse (*end_indexes);
 626
 627         return;
 628 }
 629
 630
 631 GSList *
 632 modest_text_utils_split_addresses_list (const gchar *addresses)
 633 {
 634         GSList *head;
 635         const gchar *my_addrs = addresses;
 636         const gchar *end;
 637         gchar *addr;
 638         gboolean after_at = FALSE;
 639
 640         g_return_val_if_fail (addresses, NULL);
 641
 642         /* skip any space, ',', ';' at the start */
 643         while (my_addrs && (my_addrs[0] == ' ' || my_addrs[0] == ',' || my_addrs[0] == ';'))
 644                ++my_addrs;
 645
 646         /* are we at the end of addresses list? */
 647         if (!my_addrs[0])
 648                 return NULL;
 649
 650         /* nope, we are at the start of some address
 651          * now, let's find the end of the address */
 652         end = my_addrs + 1;
 653         while (end[0] && end[0] != ';' && !(after_at && end[0] == ',')) {
 654                 if (end[0] == '\"') {
 655                         while (end[0] && end[0] != '\"')
 656                                 ++end;
 657                 }
 658                 if (end[0] == '@') {
 659                         after_at = TRUE;
 660                 }
 661                 if ((end[0] && end[0] == '>')&&(end[1] && end[1] == ',')) {
 662                         ++end;
 663                         break;
 664                 }
 665                 ++end;
 666         }
 667
 668         /* we got the address; copy it and remove trailing whitespace */
 669         addr = g_strndup (my_addrs, end - my_addrs);
 670         g_strchomp (addr);
 671
 672         head = g_slist_append (NULL, addr);
 673         head->next = modest_text_utils_split_addresses_list (end); /* recurse */
 674
 675         return head;
 676 }
 677
 678
 679 void
 680 modest_text_utils_address_range_at_position (const gchar *recipients_list,
 681                                              guint position,
 682                                              guint *start,
 683                                              guint *end)
 684 {
 685         gchar *current = NULL;
 686         gint range_start = 0;
 687         gint range_end = 0;
 688         gint index;
 689         gboolean is_quoted = FALSE;
 690
 691         g_return_if_fail (recipients_list);
 692         g_return_if_fail (position < g_utf8_strlen(recipients_list, -1));
 693
 694         index = 0;
 695         for (current = (gchar *) recipients_list; *current != '\0';
 696              current = g_utf8_find_next_char (current, NULL)) {
 697                 gunichar c = g_utf8_get_char (current);
 698
 699                 if ((c == ',') && (!is_quoted)) {
 700                         if (index < position) {
 701                                 range_start = index + 1;
 702                         } else {
 703                                 break;
 704                         }
 705                 } else if (c == '\"') {
 706                         is_quoted = !is_quoted;
 707                 } else if ((c == ' ') &&(range_start == index)) {
 708                         range_start ++;
 709                 }
 710                 index ++;
 711                 range_end = index;
 712         }
 713
 714         if (start)
 715                 *start = range_start;
 716         if (end)
 717                 *end = range_end;
 718 }
 719
 720 gchar *
 721 modest_text_utils_address_with_standard_length (const gchar *recipients_list)
 722 {
 723         gchar ** splitted;
 724         gchar ** current;
 725         GString *buffer = g_string_new ("");
 726
 727         splitted = g_strsplit (recipients_list, "\n", 0);
 728         current = splitted;
 729         while (*current) {
 730                 gchar *line;
 731                 if (current != splitted)
 732                         buffer = g_string_append_c (buffer, '\n');
 733                 line = g_strndup (*splitted, 1000);
 734                 buffer = g_string_append (buffer, line);
 735                 g_free (line);
 736                 current++;
 737         }
 738
 739         g_strfreev (splitted);
 740
 741         return g_string_free (buffer, FALSE);
 742 }
 743
 744
 745 /* ******************************************************************* */
 746 /* ************************* UTILIY FUNCTIONS ************************ */
 747 /* ******************************************************************* */
 748
 749 static GString *
 750 get_next_line (const gchar * b, const gsize blen, const gchar * iter)
 751 {
 752         GString *gs;
 753         const gchar *i0;
 754
 755         if (iter > b + blen)
 756                 return g_string_new("");
 757
 758         i0 = iter;
 759         while (iter[0]) {
 760                 if (iter[0] == '\n')
 761                         break;
 762                 iter++;
 763         }
 764         gs = g_string_new_len (i0, iter - i0);
 765         return gs;
 766 }
 767 static int
 768 get_indent_level (const char *l)
 769 {
 770         int indent = 0;
 771
 772         while (l[0]) {
 773                 if (l[0] == '>') {
 774                         indent++;
 775                         if (l[1] == ' ') {
 776                                 l++;
 777                         }
 778                 } else {
 779                         break;
 780                 }
 781                 l++;
 782
 783         }
 784
 785         /*      if we hit the signature marker "-- ", we return -(indent + 1). This
 786          *      stops reformatting.
 787          */
 788         if (strcmp (l, MODEST_TEXT_UTILS_SIGNATURE_MARKER) == 0) {
 789                 return -1 - indent;
 790         } else {
 791                 return indent;
 792         }
 793 }
 794
 795 static void
 796 unquote_line (GString * l, const gchar *quote_symbol)
 797 {
 798         gchar *p;
 799         gint quote_len;
 800
 801         p = l->str;
 802         quote_len = strlen (quote_symbol);
 803         while (p[0]) {
 804                 if (g_str_has_prefix (p, quote_symbol)) {
 805                         if (p[quote_len] == ' ') {
 806                                 p += quote_len;
 807                         }
 808                 } else {
 809                         break;
 810                 }
 811                 p++;
 812         }
 813         g_string_erase (l, 0, p - l->str);
 814 }
 815
 816 static void
 817 append_quoted (GString * buf, const gchar *quote_symbol,
 818                int indent, const GString * str,
 819                const int cutpoint)
 820 {
 821         int i;
 822         gchar *quote_concat;
 823
 824         indent = indent < 0 ? abs (indent) - 1 : indent;
 825         quote_concat = g_strconcat (quote_symbol, " ", NULL);
 826         for (i = 0; i <= indent; i++) {
 827                 g_string_append (buf, quote_concat);
 828         }
 829         g_free (quote_concat);
 830         if (cutpoint > 0) {
 831                 g_string_append_len (buf, str->str, cutpoint);
 832         } else {
 833                 g_string_append (buf, str->str);
 834         }
 835         g_string_append (buf, "\n");
 836 }
 837
 838 static int
 839 get_breakpoint_utf8 (const gchar * s, gint indent, const gint limit)
 840 {
 841         gint index = 0;
 842         const gchar *pos, *last;
 843         gunichar *uni;
 844
 845         indent = indent < 0 ? abs (indent) - 1 : indent;
 846
 847         last = NULL;
 848         pos = s;
 849         uni = g_utf8_to_ucs4_fast (s, -1, NULL);
 850         while (pos[0]) {
 851                 if ((index + 2 * indent > limit) && last) {
 852                         g_free (uni);
 853                         return last - s;
 854                 }
 855                 if (g_unichar_isspace (uni[index])) {
 856                         last = pos;
 857                 }
 858                 pos = g_utf8_next_char (pos);
 859                 index++;
 860         }
 861         g_free (uni);
 862         return strlen (s);
 863 }
 864
 865 static int
 866 get_breakpoint_ascii (const gchar * s, const gint indent, const gint limit)
 867 {
 868         gint i, last;
 869
 870         last = strlen (s);
 871         if (last + 2 * indent < limit)
 872                 return last;
 873
 874         for (i = strlen (s); i > 0; i--) {
 875                 if (s[i] == ' ') {
 876                         if (i + 2 * indent <= limit) {
 877                                 return i;
 878                         } else {
 879                                 last = i;
 880                         }
 881                 }
 882         }
 883         return last;
 884 }
 885
 886 static int
 887 get_breakpoint (const gchar * s, const gint indent, const gint limit)
 888 {
 889
 890         if (g_utf8_validate (s, -1, NULL)) {
 891                 return get_breakpoint_utf8 (s, indent, limit);
 892         } else {                /* assume ASCII */
 893                 //g_warning("invalid UTF-8 in msg");
 894                 return get_breakpoint_ascii (s, indent, limit);
 895         }
 896 }
 897
 898 static gchar *
 899 cite (const time_t sent_date, const gchar *from)
 900 {
 901         return g_strdup (_("mcen_ia_editor_original_message"));
 902 }
 903
 904 static gchar *
 905 quoted_attachments (GList *attachments)
 906 {
 907         GList *node = NULL;
 908         GString *result = g_string_new ("");
 909         for (node = attachments; node != NULL; node = g_list_next (node)) {
 910                 gchar *filename = (gchar *) node->data;
 911                 g_string_append_printf ( result, "%s %s\n", _("mcen_ia_editor_attach_filename"), filename);
 912         }
 913
 914         return g_string_free (result, FALSE);
 915
 916 }
 917
 918 static GString *
 919 modest_text_utils_quote_body (GString *output, const gchar *text,
 920                               const gchar *quote_symbol,
 921                               int limit)
 922 {
 923
 924         const gchar *iter;
 925         gsize len;
 926         gint indent, breakpoint, rem_indent = 0;
 927         GString *l, *remaining;
 928
 929         iter = text;
 930         len = strlen(text);
 931         remaining = g_string_new ("");
 932         do {
 933                 l = get_next_line (text, len, iter);
 934                 iter = iter + l->len + 1;
 935                 indent = get_indent_level (l->str);
 936                 unquote_line (l, quote_symbol);
 937
 938                 if (remaining->len) {
 939                         if (l->len && indent == rem_indent) {
 940                                 g_string_prepend (l, " ");
 941                                 g_string_prepend (l, remaining->str);
 942                         } else {
 943                                 do {
 944                                         breakpoint =
 945                                                 get_breakpoint (remaining->str,
 946                                                                 rem_indent,
 947                                                                 limit);
 948                                         append_quoted (output, quote_symbol, rem_indent,
 949                                                        remaining, breakpoint);
 950                                         g_string_erase (remaining, 0,
 951                                                         breakpoint);
 952                                         if (remaining->str[0] == ' ') {
 953                                                 g_string_erase (remaining, 0,
 954                                                                 1);
 955                                         }
 956                                 } while (remaining->len);
 957                         }
 958                 }
 959                 g_string_free (remaining, TRUE);
 960                 breakpoint = get_breakpoint (l->str, indent, limit);
 961                 remaining = g_string_new (l->str + breakpoint);
 962                 if (remaining->str[0] == ' ') {
 963                         g_string_erase (remaining, 0, 1);
 964                 }
 965                 rem_indent = indent;
 966                 append_quoted (output, quote_symbol, indent, l, breakpoint);
 967                 g_string_free (l, TRUE);
 968         } while ((iter < text + len) || (remaining->str[0]));
 969
 970         return output;
 971 }
 972
 973 static gchar *
 974 modest_text_utils_quote_plain_text (const gchar *text,
 975                                     const gchar *cite,
 976                                     const gchar *signature,
 977                                     GList *attachments,
 978                                     int limit)
 979 {
 980         GString *q;
 981         gchar *attachments_string = NULL;
 982
 983         q = g_string_new ("");
 984
 985         if (signature != NULL) {
 986                 g_string_append_printf (q, "\n%s\n", MODEST_TEXT_UTILS_SIGNATURE_MARKER);
 987                 q = g_string_append (q, signature);
 988         }
 989
 990         q = g_string_append (q, "\n");
 991         q = g_string_append (q, cite);
 992         q = g_string_append_c (q, '\n');
 993
 994         q = modest_text_utils_quote_body (q, text, ">", limit);
 995
 996         attachments_string = quoted_attachments (attachments);
 997         q = g_string_append (q, attachments_string);
 998         g_free (attachments_string);
 999
1000         return g_string_free (q, FALSE);
1001 }
1002
1003 static void
1004 quote_html_add_to_gstring (GString *string,
1005                            const gchar *text)
1006 {
1007         if (text && strcmp (text, "")) {
1008                 gchar *html_text = modest_text_utils_convert_to_html_body (text, -1, TRUE);
1009                 g_string_append_printf (string, "%s<br/>", html_text);
1010                 g_free (html_text);
1011         }
1012 }
1013
1014 static gchar*
1015 modest_text_utils_quote_html (const gchar *text,
1016                               const gchar *cite,
1017                               const gchar *signature,
1018                               GList *attachments,
1019                               int limit)
1020 {
1021         GString *result_string;
1022
1023         result_string =
1024                 g_string_new ( \
1025                               "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n" \
1026                               "<html>\n"                                \
1027                               "<body>\n<br/>\n");
1028
1029         if (text || cite || signature) {
1030                 GString *quoted_text;
1031                 g_string_append (result_string, "<pre>\n");
1032                 if (signature) {
1033                         quote_html_add_to_gstring (result_string, MODEST_TEXT_UTILS_SIGNATURE_MARKER);
1034                         quote_html_add_to_gstring (result_string, signature);
1035                 }
1036                 quote_html_add_to_gstring (result_string, cite);
1037                 quoted_text = g_string_new ("");
1038                 quoted_text = modest_text_utils_quote_body (quoted_text, (text) ? text : "", ">", limit);
1039                 quote_html_add_to_gstring (result_string, quoted_text->str);
1040                 g_string_free (quoted_text, TRUE);
1041                 if (attachments) {
1042                         gchar *attachments_string = quoted_attachments (attachments);
1043                         quote_html_add_to_gstring (result_string, attachments_string);
1044                         g_free (attachments_string);
1045                 }
1046                 g_string_append (result_string, "</pre>");
1047         }
1048         g_string_append (result_string, "</body>");
1049         g_string_append (result_string, "</html>");
1050
1051         return g_string_free (result_string, FALSE);
1052 }
1053
1054 static gint
1055 cmp_offsets_reverse (const url_match_t *match1, const url_match_t *match2)
1056 {
1057         return match2->offset - match1->offset;
1058 }
1059
1060 static gint url_matches_block = 0;
1061 static url_match_pattern_t patterns[] = MAIL_VIEWER_URL_MATCH_PATTERNS;
1062 static GMutex *url_patterns_mutex = NULL;
1063
1064
1065 static gboolean
1066 compile_patterns ()
1067 {
1068         guint i;
1069         const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
1070         for (i = 0; i != pattern_num; ++i) {
1071                 patterns[i].preg = g_slice_new0 (regex_t);
1072
1073                 /* this should not happen */
1074                 if (regcomp (patterns[i].preg, patterns[i].regex,
1075                              REG_ICASE|REG_EXTENDED|REG_NEWLINE) != 0) {
1076                         g_warning ("%s: error in regexp:\n%s\n", __FUNCTION__, patterns[i].regex);
1077                         return FALSE;
1078                 }
1079         }
1080         return TRUE;
1081 }
1082
1083 static void
1084 free_patterns ()
1085 {
1086         guint i;
1087         const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
1088         for (i = 0; i != pattern_num; ++i) {
1089                 regfree (patterns[i].preg);
1090                 g_slice_free  (regex_t, patterns[i].preg);
1091         } /* don't free patterns itself -- it's static */
1092 }
1093
1094 void
1095 modest_text_utils_hyperlinkify_begin (void)
1096 {
1097
1098         if (url_patterns_mutex == NULL) {
1099                 url_patterns_mutex = g_mutex_new ();
1100         }
1101         g_mutex_lock (url_patterns_mutex);
1102         if (url_matches_block == 0)
1103                 compile_patterns ();
1104         url_matches_block ++;
1105         g_mutex_unlock (url_patterns_mutex);
1106 }
1107
1108 void
1109 modest_text_utils_hyperlinkify_end (void)
1110 {
1111         g_mutex_lock (url_patterns_mutex);
1112         url_matches_block--;
1113         if (url_matches_block <= 0)
1114                 free_patterns ();
1115         g_mutex_unlock (url_patterns_mutex);
1116 }
1117
1118
1119 static GSList*
1120 get_url_matches (GString *txt, gint offset)
1121 {
1122         regmatch_t rm;
1123         guint rv, i, tmp_offset = 0;
1124         GSList *match_list = NULL;
1125
1126         const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
1127
1128         /* initalize the regexps */
1129         modest_text_utils_hyperlinkify_begin ();
1130
1131         /* find all the matches */
1132         for (i = 0; i != pattern_num; ++i) {
1133                 tmp_offset     = offset;
1134                 while (1) {
1135                         url_match_t *match;
1136                         gboolean is_submatch;
1137                         GSList *cursor;
1138
1139                         if ((rv = regexec (patterns[i].preg, txt->str + tmp_offset, 1, &rm, 0)) != 0) {
1140                                 g_return_val_if_fail (rv == REG_NOMATCH, NULL); /* this should not happen */
1141                                 break; /* try next regexp */
1142                         }
1143                         if (rm.rm_so == -1)
1144                                 break;
1145
1146                         is_submatch = FALSE;
1147                         /* check  old matches to see if this has already been matched */
1148                         cursor = match_list;
1149                         while (cursor && !is_submatch) {
1150                                 const url_match_t *old_match =
1151                                         (const url_match_t *) cursor->data;
1152                                 guint new_offset = tmp_offset + rm.rm_so;
1153                                 is_submatch = (new_offset >  old_match->offset &&
1154                                                new_offset <  old_match->offset + old_match->len);
1155                                 cursor = g_slist_next (cursor);
1156                         }
1157
1158                         if (!is_submatch) {
1159                                 /* make a list of our matches (<offset, len, prefix> tupels)*/
1160                                 match = g_slice_new (url_match_t);
1161                                 match->offset = tmp_offset + rm.rm_so;
1162                                 match->len    = rm.rm_eo - rm.rm_so;
1163                                 match->prefix = patterns[i].prefix;
1164                                 match_list = g_slist_prepend (match_list, match);
1165                         }
1166                         tmp_offset += rm.rm_eo;
1167                 }
1168         }
1169
1170         modest_text_utils_hyperlinkify_end ();
1171
1172         /* now sort the list, so the matches are in reverse order of occurence.
1173          * that way, we can do the replacements starting from the end, so we don't need
1174          * to recalculate the offsets
1175          */
1176         match_list = g_slist_sort (match_list,
1177                                    (GCompareFunc)cmp_offsets_reverse);
1178         return match_list;
1179 }
1180
1181
1182
1183 /* replace all occurences of needle in haystack with repl*/
1184 static gchar*
1185 replace_string (const gchar *haystack, const gchar *needle, gchar repl)
1186 {
1187         gchar *str, *cursor;
1188
1189         if (!haystack || !needle || strlen(needle) == 0)
1190                 return haystack ? g_strdup(haystack) : NULL;
1191
1192         str = g_strdup (haystack);
1193
1194         for (cursor = str; cursor && *cursor; ++cursor) {
1195                 if (g_str_has_prefix (cursor, needle)) {
1196                         cursor[0] = repl;
1197                         memmove (cursor + 1,
1198                                  cursor + strlen (needle),
1199                                  strlen (cursor + strlen (needle)) + 1);
1200                 }
1201         }
1202
1203         return str;
1204 }
1205
1206 static void
1207 hyperlinkify_plain_text (GString *txt, gint offset)
1208 {
1209         GSList *cursor;
1210         GSList *match_list = get_url_matches (txt, offset);
1211
1212         /* we will work backwards, so the offsets stay valid */
1213         for (cursor = match_list; cursor; cursor = cursor->next) {
1214
1215                 url_match_t *match = (url_match_t*) cursor->data;
1216                 gchar *url  = g_strndup (txt->str + match->offset, match->len);
1217                 gchar *repl = NULL; /* replacement  */
1218
1219                 /* the string still contains $(MARK_AMP_URI_STR)"amp;" for each
1220                  * '&' in the original, because of the text->html conversion.
1221                  * in the href-URL (and only there), we must convert that back to
1222                  * '&'
1223                  */
1224                 gchar *href_url = replace_string (url, MARK_AMP_URI_STR "amp;", '&');
1225
1226                 /* the prefix is NULL: use the one that is already there */
1227                 repl = g_strdup_printf ("<a href=\"%s%s\">%s</a>",
1228                                         match->prefix ? match->prefix : EMPTY_STRING,
1229                                         href_url, url);
1230
1231                 /* replace the old thing with our hyperlink
1232                  * replacement thing */
1233                 g_string_erase  (txt, match->offset, match->len);
1234                 g_string_insert (txt, match->offset, repl);
1235
1236                 g_free (url);
1237                 g_free (repl);
1238                 g_free (href_url);
1239
1240                 g_slice_free (url_match_t, match);
1241         }
1242
1243         g_slist_free (match_list);
1244 }
1245
1246 void
1247 modest_text_utils_hyperlinkify (GString *string_buffer)
1248 {
1249         gchar *after_body;
1250         gint offset = 0;
1251
1252         after_body = strstr (string_buffer->str, "<body>");
1253         if (after_body != NULL)
1254                 offset = after_body - string_buffer->str;
1255         hyperlinkify_plain_text (string_buffer, offset);
1256 }
1257
1258
1259 /* for optimization reasons, we change the string in-place */
1260 void
1261 modest_text_utils_get_display_address (gchar *address)
1262 {
1263         int i;
1264
1265         g_return_if_fail (address);
1266
1267         if (!address)
1268                 return;
1269
1270         /* should not be needed, and otherwise, we probably won't screw up the address
1271          * more than it already is :)
1272          * g_return_val_if_fail (g_utf8_validate (address, -1, NULL), NULL);
1273          * */
1274
1275         /* remove leading whitespace */
1276         if (address[0] == ' ')
1277                 g_strchug (address);
1278
1279         for (i = 0; address[i]; ++i) {
1280                 if (address[i] == '<') {
1281                         if (G_UNLIKELY(i == 0)) {
1282                                 break; /* there's nothing else, leave it */
1283                         }else {
1284                                 address[i] = '\0'; /* terminate the string here */
1285                                 break;
1286                         }
1287                 }
1288         }
1289
1290         g_strchomp (address);
1291 }
1292
1293
1294 gchar *
1295 modest_text_utils_get_display_addresses (const gchar *recipients)
1296 {
1297         gchar *addresses;
1298         GSList *recipient_list;
1299
1300         addresses = NULL;
1301         recipient_list = modest_text_utils_split_addresses_list (recipients);
1302         if (recipient_list) {
1303                 GString *add_string = g_string_sized_new (strlen (recipients));
1304                 GSList *iter = recipient_list;
1305                 gboolean first = TRUE;
1306
1307                 while (iter) {
1308                         /* Strings are changed in place */
1309                         modest_text_utils_get_display_address ((gchar *) iter->data);
1310                         if (G_UNLIKELY (first)) {
1311                                 g_string_append_printf (add_string, "%s", (gchar *) iter->data);
1312                                 first = FALSE;
1313                         } else {
1314                                 g_string_append_printf (add_string, ", %s", (gchar *) iter->data);
1315                         }
1316                         iter = g_slist_next (iter);
1317                 }
1318                 g_slist_foreach (recipient_list, (GFunc) g_free, NULL);
1319                 g_slist_free (recipient_list);
1320                 addresses = g_string_free (add_string, FALSE);
1321         }
1322
1323         return addresses;
1324 }
1325
1326
1327 gchar *
1328 modest_text_utils_get_email_address (const gchar *full_address)
1329 {
1330         const gchar *left, *right;
1331
1332         g_return_val_if_fail (full_address, NULL);
1333
1334         if (!full_address)
1335                 return NULL;
1336
1337         g_return_val_if_fail (g_utf8_validate (full_address, -1, NULL), NULL);
1338
1339         left = g_strrstr_len (full_address, strlen(full_address), "<");
1340         if (left == NULL)
1341                 return g_strdup (full_address);
1342
1343         right = g_strstr_len (left, strlen(left), ">");
1344         if (right == NULL)
1345                 return g_strdup (full_address);
1346
1347         return g_strndup (left + 1, right - left - 1);
1348 }
1349
1350 gint
1351 modest_text_utils_get_subject_prefix_len (const gchar *sub)
1352 {
1353         gint prefix_len = 0;
1354
1355         g_return_val_if_fail (sub, 0);
1356
1357         if (!sub)
1358                 return 0;
1359
1360         /* optimization: "Re", "RE", "re","Fwd", "FWD", "fwd","FW","Fw", "fw" */
1361         if (sub[0] != 'R' && sub[0] != 'F' && sub[0] != 'r' && sub[0] != 'f')
1362                 return 0;
1363         else if (sub[0] && sub[1] != 'e' && sub[1] != 'E' && sub[1] != 'w' && sub[1] != 'W')
1364                 return 0;
1365
1366         prefix_len = 2;
1367         if (sub[2] == 'd')
1368                 ++prefix_len;
1369
1370         /* skip over a [...] block */
1371         if (sub[prefix_len] == '[') {
1372                 int c = prefix_len + 1;
1373                 while (sub[c] && sub[c] != ']')
1374                         ++c;
1375                 if (!sub[c])
1376                         return 0; /* no end to the ']' found */
1377                 else
1378                         prefix_len = c + 1;
1379         }
1380
1381         /* did we find the ':' ? */
1382         if (sub[prefix_len] == ':') {
1383                 ++prefix_len;
1384                 if (sub[prefix_len] == ' ')
1385                         ++prefix_len;
1386                 prefix_len += modest_text_utils_get_subject_prefix_len (sub + prefix_len);
1387 /*              g_warning ("['%s','%s']", sub, (char*) sub + prefix_len); */
1388                 return prefix_len;
1389         } else
1390                 return 0;
1391 }
1392
1393
1394 gint
1395 modest_text_utils_utf8_strcmp (const gchar* s1, const gchar *s2, gboolean insensitive)
1396 {
1397
1398 /* work even when s1 and/or s2 == NULL */
1399         if (G_UNLIKELY(s1 == s2))
1400                 return 0;
1401         if (G_UNLIKELY(!s1))
1402                 return -1;
1403         if (G_UNLIKELY(!s2))
1404                 return 1;
1405
1406         /* if it's not case sensitive */
1407         if (!insensitive) {
1408
1409                 /* optimization: shortcut if first char is ascii */
1410                 if (((s1[0] & 0x80)== 0) && ((s2[0] & 0x80) == 0) &&
1411                     (s1[0] != s2[0]))
1412                         return s1[0] - s2[0];
1413
1414                 return g_utf8_collate (s1, s2);
1415
1416         } else {
1417                 gint result;
1418                 gchar *n1, *n2;
1419
1420                 /* optimization: shortcut if first char is ascii */
1421                 if (((s1[0] & 0x80) == 0) && ((s2[0] & 0x80) == 0) &&
1422                     (tolower(s1[0]) != tolower (s2[0])))
1423                         return tolower(s1[0]) - tolower(s2[0]);
1424
1425                 n1 = g_utf8_strdown (s1, -1);
1426                 n2 = g_utf8_strdown (s2, -1);
1427
1428                 result = g_utf8_collate (n1, n2);
1429
1430                 g_free (n1);
1431                 g_free (n2);
1432
1433                 return result;
1434         }
1435 }
1436
1437
1438 const gchar*
1439 modest_text_utils_get_display_date (time_t date)
1440 {
1441 #define DATE_BUF_SIZE 64
1442         static gchar date_buf[DATE_BUF_SIZE];
1443
1444         /* calculate the # of days since epoch for
1445          * for today and for the date provided
1446          * based on idea from pvanhoof */
1447         int day      = time(NULL) / (24 * 60 * 60);
1448         int date_day = date       / (24 * 60 * 60);
1449
1450         /* if it's today, show the time, if it's not today, show the date instead */
1451
1452         /* TODO: take into account the system config for 24/12h */
1453 #ifdef MODEST_TOOLKIT_HILDON2
1454         if (day == date_day) /* is the date today? */
1455                 modest_text_utils_strftime (date_buf, DATE_BUF_SIZE, _HL("wdgt_va_24h_time"), date);
1456         else
1457                 modest_text_utils_strftime (date_buf, DATE_BUF_SIZE, _HL("wdgt_va_date"), date);
1458 #else
1459         if (day == date_day) /* is the date today? */
1460                 modest_text_utils_strftime (date_buf, DATE_BUF_SIZE, "%X", date);
1461         else
1462                 modest_text_utils_strftime (date_buf, DATE_BUF_SIZE, "%x", date);
1463 #endif
1464
1465         return date_buf; /* this is a static buffer, don't free! */
1466 }
1467
1468
1469
1470 gboolean
1471 modest_text_utils_validate_folder_name (const gchar *folder_name)
1472 {
1473         /* based on http://msdn2.microsoft.com/en-us/library/aa365247.aspx,
1474          * with some extras */
1475
1476         guint len;
1477         gint i;
1478         const gchar **cursor = NULL;
1479         const gchar *forbidden_names[] = { /* windows does not like these */
1480                 "CON", "PRN", "AUX", "NUL", ".", "..", "cur", "tmp", "new",
1481                 NULL /* cur, tmp, new are reserved for Maildir */
1482         };
1483
1484         /* cannot be NULL */
1485         if (!folder_name)
1486                 return FALSE;
1487
1488         /* cannot be empty */
1489         len = strlen(folder_name);
1490         if (len == 0)
1491                 return FALSE;
1492
1493         /* cannot start with a dot, vfat does not seem to like that */
1494         if (folder_name[0] == '.')
1495                 return FALSE;
1496
1497         /* cannot start or end with a space */
1498         if (g_ascii_isspace(folder_name[0]) || g_ascii_isspace(folder_name[len - 1]))
1499                 return FALSE;
1500
1501         /* cannot contain a forbidden char */
1502         for (i = 0; i < len; i++)
1503                 if (modest_text_utils_is_forbidden_char (folder_name[i], FOLDER_NAME_FORBIDDEN_CHARS))
1504                         return FALSE;
1505
1506         /* Cannot contain Windows port numbers. I'd like to use GRegex
1507            but it's still not available in Maemo. sergio */
1508         if (!g_ascii_strncasecmp (folder_name, "LPT", 3) ||
1509             !g_ascii_strncasecmp (folder_name, "COM", 3)) {
1510                 glong val;
1511                 gchar *endptr;
1512
1513                 /* We skip the first 3 characters for the
1514                    comparison */
1515                 val = strtol(folder_name+3, &endptr, 10);
1516
1517                 /* If the conversion to long succeeded then the string
1518                    is not valid for us */
1519                 if (*endptr == '\0')
1520                         return FALSE;
1521                 else
1522                         return TRUE;
1523         }
1524
1525         /* cannot contain a forbidden word */
1526         if (len <= 4) {
1527                 for (cursor = forbidden_names; cursor && *cursor; ++cursor) {
1528                         if (g_ascii_strcasecmp (folder_name, *cursor) == 0)
1529                                 return FALSE;
1530                 }
1531         }
1532
1533         return TRUE; /* it's valid! */
1534 }
1535
1536
1537
1538 gboolean
1539 modest_text_utils_validate_domain_name (const gchar *domain)
1540 {
1541         gboolean valid = FALSE;
1542         regex_t rx;
1543         const gchar* domain_regex = "^([a-z0-9-]*[a-z0-9]\\.)+[a-z0-9-]*[a-z0-9]$";
1544
1545         g_return_val_if_fail (domain, FALSE);
1546
1547         if (!domain)
1548                 return FALSE;
1549
1550         memset (&rx, 0, sizeof(regex_t)); /* coverity wants this... */
1551
1552         /* domain name: all alphanum or '-' or '.',
1553          * but beginning/ending in alphanum */
1554         if (regcomp (&rx, domain_regex, REG_ICASE|REG_EXTENDED|REG_NOSUB)) {
1555                 g_warning ("BUG: error in regexp");
1556                 return FALSE;
1557         }
1558
1559         valid = (regexec (&rx, domain, 1, NULL, 0) == 0);
1560         regfree (&rx);
1561
1562         return valid;
1563 }
1564
1565
1566
1567 gboolean
1568 modest_text_utils_validate_email_address (const gchar *email_address,
1569                                           const gchar **invalid_char_position)
1570 {
1571         int count = 0;
1572         const gchar *c = NULL, *domain = NULL;
1573         static gchar *rfc822_specials = "()<>@,;:\\\"[]&";
1574
1575         if (invalid_char_position)
1576                 *invalid_char_position = NULL;
1577
1578         g_return_val_if_fail (email_address, FALSE);
1579
1580         /* check that the email adress contains exactly one @ */
1581         if (!strstr(email_address, "@") ||
1582                         (strstr(email_address, "@") != g_strrstr(email_address, "@")))
1583                 return FALSE;
1584
1585         /* first we validate the name portion (name@domain) */
1586         for (c = email_address;  *c;  c++) {
1587                 if (*c == '\"' &&
1588                     (c == email_address ||
1589                      *(c - 1) == '.' ||
1590                      *(c - 1) == '\"')) {
1591                         while (*++c) {
1592                                 if (*c == '\"')
1593                                         break;
1594                                 if (*c == '\\' && (*++c == ' '))
1595                                         continue;
1596                                 if (*c <= ' ' || *c >= 127)
1597                                         return FALSE;
1598                         }
1599                         if (!*c++)
1600                                 return FALSE;
1601                         if (*c == '@')
1602                                 break;
1603                         if (*c != '.')
1604                                 return FALSE;
1605                         continue;
1606                 }
1607                 if (*c == '@')
1608                         break;
1609                 if (*c <= ' ' || *c >= 127)
1610                         return FALSE;
1611                 if (strchr(rfc822_specials, *c)) {
1612                         if (invalid_char_position)
1613                                 *invalid_char_position = c;
1614                         return FALSE;
1615                 }
1616         }
1617         if (c == email_address || *(c - 1) == '.')
1618                 return FALSE;
1619
1620         /* next we validate the domain portion (name@domain) */
1621         if (!*(domain = ++c))
1622                 return FALSE;
1623         do {
1624                 if (*c == '.') {
1625                         if (c == domain || *(c - 1) == '.' || *(c + 1) == '\0')
1626                                 return FALSE;
1627                         count++;
1628                 }
1629                 if (*c <= ' ' || *c >= 127)
1630                         return FALSE;
1631                 if (strchr(rfc822_specials, *c)) {
1632                         if (invalid_char_position)
1633                                 *invalid_char_position = c;
1634                         return FALSE;
1635                 }
1636         } while (*++c);
1637
1638         return (count >= 1) ? TRUE : FALSE;
1639 }
1640
1641 gboolean
1642 modest_text_utils_validate_recipient (const gchar *recipient, const gchar **invalid_char_position)
1643 {
1644         gchar *stripped, *current;
1645         gchar *right_part;
1646         gboolean has_error = FALSE;
1647
1648         if (invalid_char_position)
1649                 *invalid_char_position = NULL;
1650
1651         g_return_val_if_fail (recipient, FALSE);
1652
1653         if (modest_text_utils_validate_email_address (recipient, invalid_char_position))
1654                 return TRUE;
1655
1656         stripped = g_strdup (recipient);
1657         stripped = g_strstrip (stripped);
1658         current = stripped;
1659
1660         if (*current == '\0') {
1661                 g_free (stripped);
1662                 return FALSE;
1663         }
1664
1665         /* quoted string */
1666         if (*current == '\"') {
1667                 current = g_utf8_next_char (current);
1668                 has_error = TRUE;
1669                 for (; *current != '\0'; current = g_utf8_next_char (current)) {
1670                         if (*current == '\\') {
1671                                 /* TODO: This causes a warning, which breaks the build,
1672                                  * because a gchar cannot be < 0.
1673                                  * murrayc.
1674                                 if (current[1] <0) {
1675                                         has_error = TRUE;
1676                                         break;
1677                                 }
1678                                 */
1679                         } else if (*current == '\"') {
1680                                 has_error = FALSE;
1681                                 current = g_utf8_next_char (current);
1682                                 break;
1683                         }
1684                 }
1685         } else {
1686                 has_error = TRUE;
1687                 for (current = stripped ; *current != '\0'; current = g_utf8_next_char (current)) {
1688                         if (*current == '<') {
1689                                 has_error = FALSE;
1690                                 break;
1691                         }
1692                 }
1693         }
1694
1695         if (has_error) {
1696                 g_free (stripped);
1697                 return FALSE;
1698         }
1699
1700         right_part = g_strdup (current);
1701         g_free (stripped);
1702         right_part = g_strstrip (right_part);
1703
1704         if (g_str_has_prefix (right_part, "<") &&
1705             g_str_has_suffix (right_part, ">")) {
1706                 gchar *address;
1707                 gboolean valid;
1708
1709                 address = g_strndup (right_part+1, strlen (right_part) - 2);
1710                 g_free (right_part);
1711                 valid = modest_text_utils_validate_email_address (address, invalid_char_position);
1712                 g_free (address);
1713                 return valid;
1714         } else {
1715                 g_free (right_part);
1716                 return FALSE;
1717         }
1718 }
1719
1720
1721 gchar *
1722 modest_text_utils_get_display_size (guint64 size)
1723 {
1724         const guint KB=1024;
1725         const guint MB=1024 * KB;
1726         const guint GB=1024 * MB;
1727
1728         if (size == 0)
1729                 return g_strdup_printf (_FM("sfil_li_size_kb"), (int) 0);
1730         if (0 <= size && size < KB)
1731                 return g_strdup_printf (_FM("sfil_li_size_1kb_99kb"), (int) 1);
1732         else if (KB <= size && size < 100 * KB)
1733                 return g_strdup_printf (_FM("sfil_li_size_1kb_99kb"), (int) size / KB);
1734         else if (100*KB <= size && size < MB)
1735                 return g_strdup_printf (_FM("sfil_li_size_100kb_1mb"), (int) size / KB);
1736         else if (MB <= size && size < 10*MB)
1737                 return g_strdup_printf (_FM("sfil_li_size_1mb_10mb"), (float) size / MB);
1738         else if (10*MB <= size && size < GB)
1739                 return g_strdup_printf (_FM("sfil_li_size_10mb_1gb"), (float) size / MB);
1740         else
1741                 return g_strdup_printf (_FM("sfil_li_size_1gb_or_greater"), (float) size / GB);
1742 }
1743
1744 static gchar *
1745 get_email_from_address (const gchar * address)
1746 {
1747         gchar *left_limit, *right_limit;
1748
1749         left_limit = strstr (address, "<");
1750         right_limit = g_strrstr (address, ">");
1751
1752         if ((left_limit == NULL)||(right_limit == NULL)|| (left_limit > right_limit))
1753                 return g_strdup (address);
1754         else
1755                 return g_strndup (left_limit + 1, (right_limit - left_limit) - 1);
1756 }
1757
1758 gchar *
1759 modest_text_utils_get_color_string (GdkColor *color)
1760 {
1761         g_return_val_if_fail (color, NULL);
1762
1763         return g_strdup_printf ("#%x%x%x%x%x%x%x%x%x%x%x%x",
1764                                 (color->red >> 12)   & 0xf, (color->red >> 8)   & 0xf,
1765                                 (color->red >>  4)   & 0xf, (color->red)        & 0xf,
1766                                 (color->green >> 12) & 0xf, (color->green >> 8) & 0xf,
1767                                 (color->green >>  4) & 0xf, (color->green)      & 0xf,
1768                                 (color->blue >> 12)  & 0xf, (color->blue >> 8)  & 0xf,
1769                                 (color->blue >>  4)  & 0xf, (color->blue)       & 0xf);
1770 }
1771
1772 gchar *
1773 modest_text_utils_text_buffer_get_text (GtkTextBuffer *buffer)
1774 {
1775         GtkTextIter start, end;
1776         gchar *slice, *current;
1777         GString *result = g_string_new ("");
1778
1779         g_return_val_if_fail (buffer && GTK_IS_TEXT_BUFFER (buffer), NULL);
1780
1781         gtk_text_buffer_get_start_iter (buffer, &start);
1782         gtk_text_buffer_get_end_iter (buffer, &end);
1783
1784         slice = gtk_text_buffer_get_slice (buffer, &start, &end, FALSE);
1785         current = slice;
1786
1787         while (current && current != '\0') {
1788                 if (g_utf8_get_char (current) == 0xFFFC) {
1789                         result = g_string_append_c (result, ' ');
1790                         current = g_utf8_next_char (current);
1791                 } else {
1792                         gchar *next = g_utf8_strchr (current, -1, 0xFFFC);
1793                         if (next == NULL) {
1794                                 result = g_string_append (result, current);
1795                         } else {
1796                                 result = g_string_append_len (result, current, next - current);
1797                         }
1798                         current = next;
1799                 }
1800         }
1801         g_free (slice);
1802
1803         return g_string_free (result, FALSE);
1804
1805 }
1806
1807 gboolean
1808 modest_text_utils_is_forbidden_char (const gchar character,
1809                                      ModestTextUtilsForbiddenCharType type)
1810 {
1811         gint i, len;
1812         const gchar *forbidden_chars = NULL;
1813
1814         /* We need to get the length in the switch because the
1815            compiler needs to know the size at compile time */
1816         switch (type) {
1817         case ACCOUNT_TITLE_FORBIDDEN_CHARS:
1818                 forbidden_chars = account_title_forbidden_chars;
1819                 len = G_N_ELEMENTS (account_title_forbidden_chars);
1820                 break;
1821         case FOLDER_NAME_FORBIDDEN_CHARS:
1822                 forbidden_chars = folder_name_forbidden_chars;
1823                 len = G_N_ELEMENTS (folder_name_forbidden_chars);
1824                 break;
1825         case USER_NAME_FORBIDDEN_NAMES:
1826                 forbidden_chars = user_name_forbidden_chars;
1827                 len = G_N_ELEMENTS (user_name_forbidden_chars);
1828                 break;
1829         default:
1830                 g_return_val_if_reached (TRUE);
1831         }
1832
1833         for (i = 0; i < len ; i++)
1834                 if (forbidden_chars[i] == character)
1835                         return TRUE;
1836
1837         return FALSE; /* it's valid! */
1838 }
1839
1840 gchar *
1841 modest_text_utils_label_get_selection (GtkLabel *label)
1842 {
1843         gint start, end;
1844         gchar *selection;
1845
1846         if (gtk_label_get_selection_bounds (GTK_LABEL (label), &start, &end)) {
1847                 const gchar *start_offset;
1848                 const gchar *end_offset;
1849                 start_offset = gtk_label_get_text (GTK_LABEL (label));
1850                 start_offset = g_utf8_offset_to_pointer (start_offset, start);
1851                 end_offset = gtk_label_get_text (GTK_LABEL (label));
1852                 end_offset = g_utf8_offset_to_pointer (end_offset, end);
1853                 selection = g_strndup (start_offset, end_offset - start_offset);
1854                 return selection;
1855         } else {
1856                 return g_strdup ("");
1857         }
1858 }
1859
1860 static gboolean
1861 _forward_search_image_char (gunichar ch,
1862                             gpointer userdata)
1863 {
1864         return (ch == 0xFFFC);
1865 }
1866
1867 gboolean
1868 modest_text_utils_buffer_selection_is_valid (GtkTextBuffer *buffer)
1869 {
1870         gboolean result;
1871         GtkTextIter start, end;
1872
1873         g_return_val_if_fail (GTK_IS_TEXT_BUFFER (buffer), FALSE);
1874
1875         result = gtk_text_buffer_get_has_selection (GTK_TEXT_BUFFER (buffer));
1876
1877         /* check there are no images in selection */
1878         if (result) {
1879                 gtk_text_buffer_get_selection_bounds (buffer, &start, &end);
1880                 if (gtk_text_iter_get_char (&start)== 0xFFFC)
1881                         result = FALSE;
1882                 else {
1883                         gtk_text_iter_backward_char (&end);
1884                         if (gtk_text_iter_forward_find_char (&start, _forward_search_image_char,
1885                                                              NULL, &end))
1886                                 result = FALSE;
1887                 }
1888
1889         }
1890
1891         return result;
1892 }
1893
1894 static void
1895 remove_quotes (gchar **quotes)
1896 {
1897         if (g_str_has_prefix (*quotes, "\"") && g_str_has_suffix (*quotes, "\"")) {
1898                 gchar *result;
1899                 result = g_strndup ((*quotes)+1, strlen (*quotes) - 2);
1900                 g_free (*quotes);
1901                 *quotes = result;
1902         }
1903 }
1904
1905 gchar *
1906 modest_text_utils_escape_mnemonics (const gchar *text)
1907 {
1908         const gchar *p;
1909         GString *result = NULL;
1910
1911         if (text == NULL)
1912                 return NULL;
1913
1914         result = g_string_new ("");
1915         for (p = text; *p != '\0'; p++) {
1916                 if (*p == '_')
1917                         result = g_string_append (result, "__");
1918                 else
1919                         result = g_string_append_c (result, *p);
1920         }
1921
1922         return g_string_free (result, FALSE);
1923 }
1924
1925 gchar *
1926 modest_text_utils_simplify_recipients (const gchar *recipients)
1927 {
1928         GSList *addresses, *node;
1929         GString *result;
1930         gboolean is_first = TRUE;
1931
1932         if (recipients == NULL)
1933                 return g_strdup ("");
1934
1935         addresses = modest_text_utils_split_addresses_list (recipients);
1936         result = g_string_new ("");
1937
1938         for (node = addresses; node != NULL; node = g_slist_next (node)) {
1939                 const gchar *address = (const gchar *) node->data;
1940                 gchar *left_limit, *right_limit;
1941
1942                 left_limit = strstr (address, "<");
1943                 right_limit = g_strrstr (address, ">");
1944
1945                 if (is_first)
1946                         is_first = FALSE;
1947                 else
1948                         result = g_string_append (result, ", ");
1949
1950                 if ((left_limit == NULL)||(right_limit == NULL)|| (left_limit > right_limit)) {
1951                         result = g_string_append (result, address);
1952                 } else {
1953                         gchar *name_side;
1954                         gchar *email_side;
1955                         name_side = g_strndup (address, left_limit - address);
1956                         name_side = g_strstrip (name_side);
1957                         remove_quotes (&name_side);
1958                         email_side = get_email_from_address (address);
1959                         if (name_side && email_side && !strcmp (name_side, email_side)) {
1960                                 result = g_string_append (result, email_side);
1961                         } else {
1962                                 result = g_string_append (result, address);
1963                         }
1964                         g_free (name_side);
1965                         g_free (email_side);
1966                 }
1967
1968         }
1969         g_slist_foreach (addresses, (GFunc)g_free, NULL);
1970         g_slist_free (addresses);
1971
1972         return g_string_free (result, FALSE);
1973
1974 }
1975
1976 GSList *
1977 modest_text_utils_remove_duplicate_addresses_list (GSList *address_list)
1978 {
1979         GSList *new_list, *iter;
1980         GHashTable *table;
1981
1982         g_return_val_if_fail (address_list, NULL);
1983
1984         table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
1985
1986         new_list = address_list;
1987         iter = address_list;
1988         while (iter) {
1989                 const gchar* address = (const gchar*)iter->data;
1990
1991                 /* We need only the email to just compare it and not
1992                    the full address which would make "a <a@a.com>"
1993                    different from "a@a.com" */
1994                 const gchar *email = get_email_from_address (address);
1995
1996                 /* ignore the address if already seen */
1997                 if (g_hash_table_lookup (table, email) == 0) {
1998                         g_hash_table_insert (table, (gchar*)email, GINT_TO_POINTER(1));
1999                         iter = g_slist_next (iter);
2000                 } else {
2001                         GSList *tmp = g_slist_next (iter);
2002                         new_list = g_slist_delete_link (new_list, iter);
2003                         iter = tmp;
2004                 }
2005         }
2006
2007         g_hash_table_unref (table);
2008
2009         return new_list;
2010 }
2011
2012 gchar *
2013 modest_text_utils_get_secure_header (const gchar *value,
2014                                      const gchar *header)
2015 {
2016         const gint max_len = 128;
2017         gchar *new_value = NULL;
2018         gchar *needle = g_strrstr (value, header);
2019
2020         if (needle && value != needle)
2021                 new_value = g_strdup (needle + strlen (header));
2022
2023         if (!new_value)
2024                 new_value = g_strdup (value);
2025
2026         /* Do a max length check to prevent DoS attacks caused by huge
2027            malformed headers */
2028         if (g_utf8_validate (new_value, -1, NULL)) {
2029                 if (g_utf8_strlen (new_value, -1) > max_len) {
2030                         gchar *tmp = g_malloc0 (max_len * 4);
2031                         g_utf8_strncpy (tmp, (const gchar *) new_value, max_len);
2032                         g_free (new_value);
2033                         new_value = tmp;
2034                 }
2035         } else {
2036                 if (strlen (new_value) > max_len) {
2037                         gchar *tmp = g_malloc0 (max_len);
2038                         strncpy (new_value, tmp, max_len);
2039                         g_free (new_value);
2040                         new_value = tmp;
2041                 }
2042         }
2043
2044         return new_value;
2045 }