Fixes NB#123378, Pango hangs when checking the message details of a malformed message...
[modest] / src / modest-text-utils.c
1 /* Copyright (c) 2006, Nokia Corporation
2  * All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are
6  * met:
7  *
8  * * Redistributions of source code must retain the above copyright
9  *   notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  *   notice, this list of conditions and the following disclaimer in the
12  *   documentation and/or other materials provided with the distribution.
13  * * Neither the name of the Nokia Corporation nor the names of its
14  *   contributors may be used to endorse or promote products derived from
15  *   this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
18  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
20  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
21  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30
31
32 #ifndef _GNU_SOURCE
33 #define _GNU_SOURCE
34 #endif /*_GNU_SOURCE*/
35 #include <string.h> /* for strcasestr */
36
37
38 #include <glib.h>
39 #include <stdlib.h>
40 #include <glib/gi18n.h>
41 #include <regex.h>
42 #include <modest-tny-platform-factory.h>
43 #include <modest-text-utils.h>
44 #include <modest-runtime.h>
45 #include <ctype.h>
46
47 #ifdef HAVE_CONFIG_H
48 #include <config.h>
49 #endif /*HAVE_CONFIG_H */
50
51 /* defines */
52 #define FORWARD_STRING _("mcen_ia_editor_original_message")
53 #define FROM_STRING _("mail_va_from")
54 #define SENT_STRING _("mcen_fi_message_properties_sent")
55 #define TO_STRING _("mail_va_to")
56 #define SUBJECT_STRING _("mail_va_subject")
57 #define EMPTY_STRING ""
58
59 /*
60  * do the hyperlinkification only for texts < 50 Kb,
61  * as it's quite slow. Without this, e.g. mail with
62  * an uuencoded part (which is not recognized as attachment,
63  * will hang modest
64  */
65 #define HYPERLINKIFY_MAX_LENGTH (1024*50)
66
67 /*
68  * we need these regexps to find URLs in plain text e-mails
69  */
70 typedef struct _url_match_pattern_t url_match_pattern_t;
71 struct _url_match_pattern_t {
72         gchar   *regex;
73         regex_t *preg;
74         gchar   *prefix;
75 };
76
77 typedef struct _url_match_t url_match_t;
78 struct _url_match_t {
79         guint offset;
80         guint len;
81         const gchar* prefix;
82 };
83
84
85 /*
86  * we mark the ampersand with \007 when converting text->html
87  * because after text->html we do hyperlink detecting, which
88  * could be screwed up by the ampersand.
89  * ie. 1<3 ==> 1\007lt;3
90  */
91 #define MARK_AMP '\007'
92 #define MARK_AMP_STR "\007"
93
94 /* mark &amp; separately, because they are parts of urls.
95  * ie. a&b => a\006amp;b, but a>b => a\007gt;b
96  *
97  * we need to handle '&' separately, because it can be part of URIs
98  * (as in href="http://foo.bar?a=1&b=1"), so inside those URIs
99  * we need to re-replace \006amp; with '&' again, while outside uri's
100  * it will be '&amp;'
101  * 
102  * yes, it's messy, but a consequence of doing text->html first, then hyperlinkify
103  */
104 #define MARK_AMP_URI '\006'
105 #define MARK_AMP_URI_STR "\006"
106
107
108 /* note: match MARK_AMP_URI_STR as well, because after txt->html, a '&' will look like $(MARK_AMP_URI_STR)"amp;" */
109 #define MAIL_VIEWER_URL_MATCH_PATTERNS  {                               \
110         { "(feed:|)(file|rtsp|http|ftp|https|mms|mmsh|webcal|feed|rtsp|rdp|lastfm|sip)://[-a-z0-9_$.+!*(),;:@%=\?/~#&" MARK_AMP_URI_STR \
111                         "]+[-a-z0-9_$%&" MARK_AMP_URI_STR "=?/~#]",     \
112           NULL, NULL },\
113         { "www\\.[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]",\
114                         NULL, "http://" },                              \
115         { "ftp\\.[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]",\
116           NULL, "ftp://" },\
117         { "(jabberto|voipto|sipto|sip|chatto|skype|xmpp):[-_a-z@0-9.+]+", \
118            NULL, NULL},                                             \
119         { "mailto:[-_a-z0-9.\\+]+@[-_a-z0-9.]+",                    \
120           NULL, NULL},\
121         { "[-_a-z0-9.\\+]+@[-_a-z0-9.]+",\
122           NULL, "mailto:"}\
123         }
124
125 const gchar account_title_forbidden_chars[] = {
126         '\\', '/', ':', '*', '?', '\'', '<', '>', '|', '^'
127 };
128 const gchar folder_name_forbidden_chars[] = {
129         '<', '>', ':', '\'', '/', '\\', '|', '?', '*', '^', '%', '$', '#', '&'
130 };
131 const gchar user_name_forbidden_chars[] = {
132         '<', '>'
133 };
134 const guint ACCOUNT_TITLE_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (account_title_forbidden_chars);
135 const guint FOLDER_NAME_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (folder_name_forbidden_chars);
136 const guint USER_NAME_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (user_name_forbidden_chars);
137
138 /* private */
139 static gchar*   cite                    (const time_t sent_date, const gchar *from);
140 static void     hyperlinkify_plain_text (GString *txt, gint offset);
141 static gint     cmp_offsets_reverse     (const url_match_t *match1, const url_match_t *match2);
142 static GSList*  get_url_matches         (GString *txt, gint offset);
143
144 static GString* get_next_line           (const char *b, const gsize blen, const gchar * iter);
145 static int      get_indent_level        (const char *l);
146 static void     unquote_line            (GString * l, const gchar *quote_symbol);
147 static void     append_quoted           (GString * buf, const gchar *quote_symbol,
148                                          const int indent, const GString * str, 
149                                          const int cutpoint);
150 static int      get_breakpoint_utf8     (const gchar * s, const gint indent, const gint limit);
151 static int      get_breakpoint_ascii    (const gchar * s, const gint indent, const gint limit);
152 static int      get_breakpoint          (const gchar * s, const gint indent, const gint limit);
153
154 static gchar*   modest_text_utils_quote_plain_text (const gchar *text, 
155                                                     const gchar *cite, 
156                                                     const gchar *signature,
157                                                     GList *attachments, 
158                                                     int limit);
159
160 static gchar*   modest_text_utils_quote_html       (const gchar *text, 
161                                                     const gchar *cite,
162                                                     const gchar *signature,
163                                                     GList *attachments,
164                                                     int limit);
165 static gchar*   get_email_from_address (const gchar *address);
166
167
168 /* ******************************************************************* */
169 /* ************************* PUBLIC FUNCTIONS ************************ */
170 /* ******************************************************************* */
171
172 gchar *
173 modest_text_utils_quote (const gchar *text, 
174                          const gchar *content_type,
175                          const gchar *signature,
176                          const gchar *from,
177                          const time_t sent_date, 
178                          GList *attachments,
179                          int limit)
180 {
181         gchar *retval, *cited;
182
183         g_return_val_if_fail (text, NULL);
184         g_return_val_if_fail (content_type, NULL);
185
186         cited = cite (sent_date, from);
187         
188         if (content_type && strcmp (content_type, "text/html") == 0)
189                 /* TODO: extract the <body> of the HTML and pass it to
190                    the function */
191                 retval = modest_text_utils_quote_html (text, cited, signature, attachments, limit);
192         else
193                 retval = modest_text_utils_quote_plain_text (text, cited, signature, attachments, limit);
194         
195         g_free (cited);
196         
197         return retval;
198 }
199
200
201 gchar *
202 modest_text_utils_cite (const gchar *text,
203                         const gchar *content_type,
204                         const gchar *signature,
205                         const gchar *from,
206                         time_t sent_date)
207 {
208         gchar *retval;
209         gchar *tmp_sig;
210         
211         g_return_val_if_fail (text, NULL);
212         g_return_val_if_fail (content_type, NULL);
213         
214         if (!signature) {
215                 tmp_sig = g_strdup (text);
216         } else {
217                 tmp_sig = g_strconcat (text, "\n", MODEST_TEXT_UTILS_SIGNATURE_MARKER, "\n", signature, NULL);
218         }
219
220         if (strcmp (content_type, "text/html") == 0) {
221                 retval = modest_text_utils_convert_to_html_body (tmp_sig, -1, TRUE);
222                 g_free (tmp_sig);
223         } else {
224                 retval = tmp_sig;
225         }
226
227         return retval;
228 }
229
230 static gchar *
231 forward_cite (const gchar *from,
232               const gchar *sent,
233               const gchar *to,
234               const gchar *subject)
235 {
236         g_return_val_if_fail (sent, NULL);
237         
238         return g_strdup_printf ("%s\n%s %s\n%s %s\n%s %s\n%s %s\n", 
239                                 FORWARD_STRING, 
240                                 FROM_STRING, (from)?from:"",
241                                 SENT_STRING, sent,
242                                 TO_STRING, (to)?to:"",
243                                 SUBJECT_STRING, (subject)?subject:"");
244 }
245
246 gchar * 
247 modest_text_utils_inline (const gchar *text,
248                           const gchar *content_type,
249                           const gchar *signature,
250                           const gchar *from,
251                           time_t sent_date,
252                           const gchar *to,
253                           const gchar *subject)
254 {
255         gchar sent_str[101];
256         gchar *cited;
257         gchar *retval;
258         
259         g_return_val_if_fail (text, NULL);
260         g_return_val_if_fail (content_type, NULL);
261         
262         modest_text_utils_strftime (sent_str, 100, "%c", sent_date);
263
264         cited = forward_cite (from, sent_str, to, subject);
265         
266         if (content_type && strcmp (content_type, "text/html") == 0)
267                 retval = modest_text_utils_quote_html (text, cited, signature, NULL, 80);
268         else
269                 retval = modest_text_utils_quote_plain_text (text, cited, signature, NULL, 80);
270         
271         g_free (cited);
272         return retval;
273 }
274
275 /* just to prevent warnings:
276  * warning: `%x' yields only last 2 digits of year in some locales
277  */
278 gsize
279 modest_text_utils_strftime(char *s, gsize max, const char *fmt, time_t timet)
280 {
281         struct tm tm;
282
283         /* To prevent possible problems in strftime that could leave
284            garbage in the s variable */
285         if (s)
286                 s[0] = '\0';
287         else
288                 return 0;
289
290         /* does not work on old maemo glib: 
291          *   g_date_set_time_t (&date, timet);
292          */
293         localtime_r (&timet, &tm);
294         return strftime(s, max, fmt, &tm);
295 }
296
297 gchar *
298 modest_text_utils_derived_subject (const gchar *subject, const gchar *prefix)
299 {
300         gchar *tmp, *subject_dup, *retval;
301         gint prefix_len;
302
303         g_return_val_if_fail (prefix, NULL);
304
305         if (!subject || subject[0] == '\0')
306                 subject = _("mail_va_no_subject");
307
308         subject_dup = g_strdup (subject);
309         tmp = g_strchug (subject_dup);
310
311         /* We do not want things like "Re: Re: Re:" or "Fw: Fw:" so
312            delete the previous ones */
313         prefix_len = strlen (prefix);
314         do {
315                 if (g_str_has_prefix (tmp, prefix)) {
316                         tmp += prefix_len;
317                         tmp = g_strchug (tmp);
318                 } else {
319                         gchar *prefix_down, *tmp_down;
320
321                         /* We need this to properly check the cases of
322                            some clients adding FW: instead of Fw: for
323                            example */
324                         prefix_down = g_utf8_strdown (prefix, -1);
325                         tmp_down = g_utf8_strdown (tmp, -1);
326                         if (g_str_has_prefix (tmp_down, prefix_down)) {
327                                 tmp += prefix_len;
328                                 tmp = g_strchug (tmp);
329                                 g_free (prefix_down);
330                                 g_free (tmp_down);
331                         } else {
332                                 g_free (prefix_down);
333                                 g_free (tmp_down);
334                                 break;
335                         }
336                 }
337         } while (tmp);
338
339         retval = g_strdup_printf ("%s %s", prefix, tmp);
340         g_free (subject_dup);
341
342         return retval;
343 }
344
345 gchar*
346 modest_text_utils_remove_address (const gchar *address_list, const gchar *address)
347 {
348         gchar *dup, *token, *ptr = NULL, *result;
349         GString *filtered_emails;
350         gchar *email_address;
351
352         g_return_val_if_fail (address_list, NULL);
353         
354         if (!address)
355                 return g_strdup (address_list);
356
357         email_address = get_email_from_address (address);
358         
359         /* search for substring */
360         if (!strstr ((const char *) address_list, (const char *) email_address)) {
361                 g_free (email_address);
362                 return g_strdup (address_list);
363         }
364
365         dup = g_strdup (address_list);
366         filtered_emails = g_string_new (NULL);
367         
368         token = strtok_r (dup, ",", &ptr);
369
370         while (token != NULL) {
371                 /* Add to list if not found */
372                 if (!strstr ((const char *) token, (const char *) email_address)) {
373                         if (filtered_emails->len == 0)
374                                 g_string_append_printf (filtered_emails, "%s", g_strstrip (token));
375                         else
376                                 g_string_append_printf (filtered_emails, ",%s", g_strstrip (token));
377                 }
378                 token = strtok_r (NULL, ",", &ptr);
379         }
380         result = filtered_emails->str;
381
382         /* Clean */
383         g_free (email_address);
384         g_free (dup);
385         g_string_free (filtered_emails, FALSE);
386
387         return result;
388 }
389
390
391 gchar*
392 modest_text_utils_remove_duplicate_addresses (const gchar *address_list)
393 {
394         GSList *addresses, *cursor;
395         GHashTable *table;
396         gchar *new_list = NULL;
397         
398         g_return_val_if_fail (address_list, NULL);
399
400         table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
401         addresses = modest_text_utils_split_addresses_list (address_list);
402
403         cursor = addresses;
404         while (cursor) {
405                 const gchar* address = (const gchar*)cursor->data;
406
407                 /* We need only the email to just compare it and not
408                    the full address which would make "a <a@a.com>"
409                    different from "a@a.com" */
410                 const gchar *email = get_email_from_address (address);
411
412                 /* ignore the address if already seen */
413                 if (g_hash_table_lookup (table, email) == 0) {
414                         gchar *tmp;
415
416                         /* Include the full address and not only the
417                            email in the returned list */
418                         if (!new_list) {
419                                 tmp = g_strdup (address);
420                         } else {
421                                 tmp = g_strjoin (",", new_list, address, NULL);
422                                 g_free (new_list);
423                         }
424                         new_list = tmp;
425                         
426                         g_hash_table_insert (table, (gchar*)email, GINT_TO_POINTER(1));
427                 }
428                 cursor = g_slist_next (cursor);
429         }
430
431         g_hash_table_unref (table);
432         g_slist_foreach (addresses, (GFunc)g_free, NULL);
433         g_slist_free (addresses);
434
435         if (new_list == NULL)
436                 new_list = g_strdup ("");
437
438         return new_list;
439 }
440
441
442 static void
443 modest_text_utils_convert_buffer_to_html_start (GString *html, const gchar *data, gssize n)
444 {
445         guint           i;
446         gboolean        space_seen = FALSE;
447         guint           break_dist = 0; /* distance since last break point */
448
449         if (n == -1)
450                 n = strlen (data);
451
452         /* replace with special html chars where needed*/
453         for (i = 0; i != n; ++i)  {
454                 guchar kar = data[i];
455                 
456                 if (space_seen && kar != ' ') {
457                         g_string_append (html, "&nbsp;");
458                         space_seen = FALSE;
459                 }
460                 
461                 /* we artificially insert a breakpoint (newline)
462                  * after 256, to make sure our lines are not so long
463                  * they will DOS the regexping later
464                  * Also, check that kar is ASCII to make sure that we
465                  * don't break a UTF8 char in two
466                  */
467                 if (++break_dist >= 256 && kar < 127) {
468                         g_string_append_c (html, '\n');
469                         break_dist = 0;
470                 }
471                 
472                 switch (kar) {
473                 case 0:
474                 case MARK_AMP:
475                 case MARK_AMP_URI:      
476                         /* this is a temp place holder for '&'; we can only
477                                 * set the real '&' after hyperlink translation, otherwise
478                                 * we might screw that up */
479                         break; /* ignore embedded \0s and MARK_AMP */   
480                 case '<'  : g_string_append (html, MARK_AMP_STR "lt;");   break;
481                 case '>'  : g_string_append (html, MARK_AMP_STR "gt;");   break;
482                 case '&'  : g_string_append (html, MARK_AMP_URI_STR "amp;");  break; /* special case */
483                 case '"'  : g_string_append (html, MARK_AMP_STR "quot;");  break;
484
485                 /* don't convert &apos; --> wpeditor will try to re-convert it... */    
486                 //case '\'' : g_string_append (html, "&apos;"); break;
487                 case '\n' : g_string_append (html, "<br>\n");break_dist= 0; break;
488                 case '\t' : g_string_append (html, MARK_AMP_STR "nbsp;" MARK_AMP_STR "nbsp;" MARK_AMP_STR "nbsp; ");
489                         break_dist=0; break; /* note the space at the end*/
490                 case ' ':
491                         break_dist = 0;
492                         if (space_seen) { /* second space in a row */
493                                 g_string_append (html, "&nbsp; ");
494                                 space_seen = FALSE;
495                         } else
496                                 space_seen = TRUE;
497                         break;
498                 default:
499                         g_string_append_c (html, kar);
500                 }
501         }
502 }
503
504
505 static void
506 modest_text_utils_convert_buffer_to_html_finish (GString *html)
507 {
508         int i;
509         /* replace all our MARK_AMPs with real ones */
510         for (i = 0; i != html->len; ++i)
511                 if ((html->str)[i] == MARK_AMP || (html->str)[i] == MARK_AMP_URI)
512                         (html->str)[i] = '&';
513 }
514
515
516 gchar*
517 modest_text_utils_convert_to_html (const gchar *data)
518 {
519         GString         *html;      
520         gsize           len;
521
522         g_return_val_if_fail (data, NULL);
523         
524         if (!data)
525                 return NULL;
526
527         len = strlen (data);
528         html = g_string_sized_new (1.5 * len);  /* just a  guess... */
529
530         g_string_append_printf (html,
531                                 "<html><head>"
532                                 "<meta http-equiv=\"content-type\" content=\"text/html; charset=utf8\">"
533                                 "</head>"
534                                 "<body>");
535
536         modest_text_utils_convert_buffer_to_html_start (html, data, -1);
537         
538         g_string_append (html, "</body></html>");
539
540         if (len <= HYPERLINKIFY_MAX_LENGTH)
541                 hyperlinkify_plain_text (html, 0);
542
543         modest_text_utils_convert_buffer_to_html_finish (html);
544         
545         return g_string_free (html, FALSE);
546 }
547
548 gchar *
549 modest_text_utils_convert_to_html_body (const gchar *data, gssize n, gboolean hyperlinkify)
550 {
551         GString         *html;      
552
553         g_return_val_if_fail (data, NULL);
554
555         if (!data)
556                 return NULL;
557
558         if (n == -1) 
559                 n = strlen (data);
560         html = g_string_sized_new (1.5 * n);    /* just a  guess... */
561
562         modest_text_utils_convert_buffer_to_html_start (html, data, n);
563
564         if (hyperlinkify && (n < HYPERLINKIFY_MAX_LENGTH))
565                 hyperlinkify_plain_text (html, 0);
566
567         modest_text_utils_convert_buffer_to_html_finish (html);
568         
569         return g_string_free (html, FALSE);
570 }
571
572 void
573 modest_text_utils_get_addresses_indexes (const gchar *addresses, GSList **start_indexes, GSList **end_indexes)
574 {
575         gchar *current, *start, *last_blank;
576         gint start_offset = 0, current_offset = 0;
577
578         g_return_if_fail (start_indexes != NULL);
579         g_return_if_fail (end_indexes != NULL);
580
581         start = (gchar *) addresses;
582         current = start;
583         last_blank = start;
584
585         while (*current != '\0') {
586                 if ((start == current)&&((*current == ' ')||(*current == ',')||(*current == ';'))) {
587                         start = g_utf8_next_char (start);
588                         start_offset++;
589                         last_blank = current;
590                 } else if ((*current == ',')||(*current == ';')) {
591                         gint *start_index, *end_index;
592                         start_index = g_new0(gint, 1);
593                         end_index = g_new0(gint, 1);
594                         *start_index = start_offset;
595                         *end_index = current_offset;
596                         *start_indexes = g_slist_prepend (*start_indexes, start_index);
597                         *end_indexes = g_slist_prepend (*end_indexes, end_index);
598                         start = g_utf8_next_char (current);
599                         start_offset = current_offset + 1;
600                         last_blank = start;
601                 } else if (*current == '"') {
602                         current = g_utf8_next_char (current);
603                         current_offset ++;
604                         while ((*current != '"')&&(*current != '\0')) {
605                                 current = g_utf8_next_char (current);
606                                 current_offset ++;
607                         }
608                 }
609                                 
610                 current = g_utf8_next_char (current);
611                 current_offset ++;
612         }
613
614         if (start != current) {
615                         gint *start_index, *end_index;
616                         start_index = g_new0(gint, 1);
617                         end_index = g_new0(gint, 1);
618                         *start_index = start_offset;
619                         *end_index = current_offset;
620                         *start_indexes = g_slist_prepend (*start_indexes, start_index);
621                         *end_indexes = g_slist_prepend (*end_indexes, end_index);
622         }
623         
624         *start_indexes = g_slist_reverse (*start_indexes);
625         *end_indexes = g_slist_reverse (*end_indexes);
626
627         return;
628 }
629
630
631 GSList *
632 modest_text_utils_split_addresses_list (const gchar *addresses)
633 {
634         GSList *head;
635         const gchar *my_addrs = addresses;
636         const gchar *end;
637         gchar *addr;
638         gboolean after_at = FALSE;
639
640         g_return_val_if_fail (addresses, NULL);
641         
642         /* skip any space, ',', ';' at the start */
643         while (my_addrs && (my_addrs[0] == ' ' || my_addrs[0] == ',' || my_addrs[0] == ';'))
644                ++my_addrs;
645
646         /* are we at the end of addresses list? */
647         if (!my_addrs[0])
648                 return NULL;
649         
650         /* nope, we are at the start of some address
651          * now, let's find the end of the address */
652         end = my_addrs + 1;
653         while (end[0] && end[0] != ';' && !(after_at && end[0] == ',')) {
654                 if (end[0] == '\"') {
655                         while (end[0] && end[0] != '\"')
656                                 ++end;
657                 }
658                 if (end[0] == '@') {
659                         after_at = TRUE;
660                 }
661                 if ((end[0] && end[0] == '>')&&(end[1] && end[1] == ',')) {
662                         ++end;
663                         break;
664                 }
665                 ++end;
666         }
667
668         /* we got the address; copy it and remove trailing whitespace */
669         addr = g_strndup (my_addrs, end - my_addrs);
670         g_strchomp (addr);
671
672         head = g_slist_append (NULL, addr);
673         head->next = modest_text_utils_split_addresses_list (end); /* recurse */
674
675         return head;
676 }
677
678
679 void
680 modest_text_utils_address_range_at_position (const gchar *recipients_list,
681                                              guint position,
682                                              guint *start,
683                                              guint *end)
684 {
685         gchar *current = NULL;
686         gint range_start = 0;
687         gint range_end = 0;
688         gint index;
689         gboolean is_quoted = FALSE;
690
691         g_return_if_fail (recipients_list);
692         g_return_if_fail (position < g_utf8_strlen(recipients_list, -1));
693                 
694         index = 0;
695         for (current = (gchar *) recipients_list; *current != '\0';
696              current = g_utf8_find_next_char (current, NULL)) {
697                 gunichar c = g_utf8_get_char (current);
698
699                 if ((c == ',') && (!is_quoted)) {
700                         if (index < position) {
701                                 range_start = index + 1;
702                         } else {
703                                 break;
704                         }
705                 } else if (c == '\"') {
706                         is_quoted = !is_quoted;
707                 } else if ((c == ' ') &&(range_start == index)) {
708                         range_start ++;
709                 }
710                 index ++;
711                 range_end = index;
712         }
713
714         if (start)
715                 *start = range_start;
716         if (end)
717                 *end = range_end;
718 }
719
720 gchar *
721 modest_text_utils_address_with_standard_length (const gchar *recipients_list)
722 {
723         gchar ** splitted;
724         gchar ** current;
725         GString *buffer = g_string_new ("");
726
727         splitted = g_strsplit (recipients_list, "\n", 0);
728         current = splitted;
729         while (*current) {
730                 gchar *line;
731                 if (current != splitted)
732                         buffer = g_string_append_c (buffer, '\n');
733                 line = g_strndup (*splitted, 1000);
734                 buffer = g_string_append (buffer, line);
735                 g_free (line);
736                 current++;
737         }
738
739         g_strfreev (splitted);
740
741         return g_string_free (buffer, FALSE);
742 }
743
744
745 /* ******************************************************************* */
746 /* ************************* UTILIY FUNCTIONS ************************ */
747 /* ******************************************************************* */
748
749 static GString *
750 get_next_line (const gchar * b, const gsize blen, const gchar * iter)
751 {
752         GString *gs;
753         const gchar *i0;
754         
755         if (iter > b + blen)
756                 return g_string_new("");
757         
758         i0 = iter;
759         while (iter[0]) {
760                 if (iter[0] == '\n')
761                         break;
762                 iter++;
763         }
764         gs = g_string_new_len (i0, iter - i0);
765         return gs;
766 }
767 static int
768 get_indent_level (const char *l)
769 {
770         int indent = 0;
771
772         while (l[0]) {
773                 if (l[0] == '>') {
774                         indent++;
775                         if (l[1] == ' ') {
776                                 l++;
777                         }
778                 } else {
779                         break;
780                 }
781                 l++;
782
783         }
784
785         /*      if we hit the signature marker "-- ", we return -(indent + 1). This
786          *      stops reformatting.
787          */
788         if (strcmp (l, MODEST_TEXT_UTILS_SIGNATURE_MARKER) == 0) {
789                 return -1 - indent;
790         } else {
791                 return indent;
792         }
793 }
794
795 static void
796 unquote_line (GString * l, const gchar *quote_symbol)
797 {
798         gchar *p;
799         gint quote_len;
800
801         p = l->str;
802         quote_len = strlen (quote_symbol);
803         while (p[0]) {
804                 if (g_str_has_prefix (p, quote_symbol)) {
805                         if (p[quote_len] == ' ') {
806                                 p += quote_len;
807                         }
808                 } else {
809                         break;
810                 }
811                 p++;
812         }
813         g_string_erase (l, 0, p - l->str);
814 }
815
816 static void
817 append_quoted (GString * buf, const gchar *quote_symbol,
818                int indent, const GString * str,
819                const int cutpoint)
820 {
821         int i;
822         gchar *quote_concat;
823
824         indent = indent < 0 ? abs (indent) - 1 : indent;
825         quote_concat = g_strconcat (quote_symbol, " ", NULL);
826         for (i = 0; i <= indent; i++) {
827                 g_string_append (buf, quote_concat);
828         }
829         g_free (quote_concat);
830         if (cutpoint > 0) {
831                 g_string_append_len (buf, str->str, cutpoint);
832         } else {
833                 g_string_append (buf, str->str);
834         }
835         g_string_append (buf, "\n");
836 }
837
838 static int
839 get_breakpoint_utf8 (const gchar * s, gint indent, const gint limit)
840 {
841         gint index = 0;
842         const gchar *pos, *last;
843         gunichar *uni;
844
845         indent = indent < 0 ? abs (indent) - 1 : indent;
846
847         last = NULL;
848         pos = s;
849         uni = g_utf8_to_ucs4_fast (s, -1, NULL);
850         while (pos[0]) {
851                 if ((index + 2 * indent > limit) && last) {
852                         g_free (uni);
853                         return last - s;
854                 }
855                 if (g_unichar_isspace (uni[index])) {
856                         last = pos;
857                 }
858                 pos = g_utf8_next_char (pos);
859                 index++;
860         }
861         g_free (uni);
862         return strlen (s);
863 }
864
865 static int
866 get_breakpoint_ascii (const gchar * s, const gint indent, const gint limit)
867 {
868         gint i, last;
869
870         last = strlen (s);
871         if (last + 2 * indent < limit)
872                 return last;
873
874         for (i = strlen (s); i > 0; i--) {
875                 if (s[i] == ' ') {
876                         if (i + 2 * indent <= limit) {
877                                 return i;
878                         } else {
879                                 last = i;
880                         }
881                 }
882         }
883         return last;
884 }
885
886 static int
887 get_breakpoint (const gchar * s, const gint indent, const gint limit)
888 {
889
890         if (g_utf8_validate (s, -1, NULL)) {
891                 return get_breakpoint_utf8 (s, indent, limit);
892         } else {                /* assume ASCII */
893                 //g_warning("invalid UTF-8 in msg");
894                 return get_breakpoint_ascii (s, indent, limit);
895         }
896 }
897
898 static gchar *
899 cite (const time_t sent_date, const gchar *from)
900 {
901         return g_strdup (_("mcen_ia_editor_original_message"));
902 }
903
904 static gchar *
905 quoted_attachments (GList *attachments)
906 {
907         GList *node = NULL;
908         GString *result = g_string_new ("");
909         for (node = attachments; node != NULL; node = g_list_next (node)) {
910                 gchar *filename = (gchar *) node->data;
911                 g_string_append_printf ( result, "%s %s\n", _("mcen_ia_editor_attach_filename"), filename);
912         }
913
914         return g_string_free (result, FALSE);
915
916 }
917
918 static GString *
919 modest_text_utils_quote_body (GString *output, const gchar *text,
920                               const gchar *quote_symbol,
921                               int limit)
922 {
923
924         const gchar *iter;
925         gsize len;
926         gint indent, breakpoint, rem_indent = 0;
927         GString *l, *remaining;
928
929         iter = text;
930         len = strlen(text);
931         remaining = g_string_new ("");
932         do {
933                 l = get_next_line (text, len, iter);
934                 iter = iter + l->len + 1;
935                 indent = get_indent_level (l->str);
936                 unquote_line (l, quote_symbol);
937
938                 if (remaining->len) {
939                         if (l->len && indent == rem_indent) {
940                                 g_string_prepend (l, " ");
941                                 g_string_prepend (l, remaining->str);
942                         } else {
943                                 do {
944                                         breakpoint =
945                                                 get_breakpoint (remaining->str,
946                                                                 rem_indent,
947                                                                 limit);
948                                         append_quoted (output, quote_symbol, rem_indent,
949                                                        remaining, breakpoint);
950                                         g_string_erase (remaining, 0,
951                                                         breakpoint);
952                                         if (remaining->str[0] == ' ') {
953                                                 g_string_erase (remaining, 0,
954                                                                 1);
955                                         }
956                                 } while (remaining->len);
957                         }
958                 }
959                 g_string_free (remaining, TRUE);
960                 breakpoint = get_breakpoint (l->str, indent, limit);
961                 remaining = g_string_new (l->str + breakpoint);
962                 if (remaining->str[0] == ' ') {
963                         g_string_erase (remaining, 0, 1);
964                 }
965                 rem_indent = indent;
966                 append_quoted (output, quote_symbol, indent, l, breakpoint);
967                 g_string_free (l, TRUE);
968         } while ((iter < text + len) || (remaining->str[0]));
969
970         return output;
971 }
972
973 static gchar *
974 modest_text_utils_quote_plain_text (const gchar *text, 
975                                     const gchar *cite, 
976                                     const gchar *signature,
977                                     GList *attachments,
978                                     int limit)
979 {
980         GString *q;
981         gchar *attachments_string = NULL;
982
983         q = g_string_new ("");
984
985         if (signature != NULL) {
986                 g_string_append_printf (q, "\n%s\n", MODEST_TEXT_UTILS_SIGNATURE_MARKER);
987                 q = g_string_append (q, signature);
988         }
989
990         q = g_string_append (q, "\n");
991         q = g_string_append (q, cite);
992         q = g_string_append_c (q, '\n');
993
994         q = modest_text_utils_quote_body (q, text, ">", limit);
995
996         attachments_string = quoted_attachments (attachments);
997         q = g_string_append (q, attachments_string);
998         g_free (attachments_string);
999
1000         return g_string_free (q, FALSE);
1001 }
1002
1003 static void
1004 quote_html_add_to_gstring (GString *string,
1005                            const gchar *text)
1006 {
1007         if (text && strcmp (text, "")) {
1008                 gchar *html_text = modest_text_utils_convert_to_html_body (text, -1, TRUE);
1009                 g_string_append_printf (string, "%s<br/>", html_text);
1010                 g_free (html_text);
1011         }
1012 }
1013
1014 static gchar*
1015 modest_text_utils_quote_html (const gchar *text, 
1016                               const gchar *cite, 
1017                               const gchar *signature,
1018                               GList *attachments,
1019                               int limit)
1020 {
1021         GString *result_string;
1022
1023         result_string = 
1024                 g_string_new ( \
1025                               "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n" \
1026                               "<html>\n"                                \
1027                               "<body>\n<br/>\n");
1028
1029         if (text || cite || signature) {
1030                 GString *quoted_text;
1031                 g_string_append (result_string, "<pre>\n");
1032                 if (signature) {
1033                         quote_html_add_to_gstring (result_string, MODEST_TEXT_UTILS_SIGNATURE_MARKER);
1034                         quote_html_add_to_gstring (result_string, signature);
1035                 }
1036                 quote_html_add_to_gstring (result_string, cite);
1037                 quoted_text = g_string_new ("");
1038                 quoted_text = modest_text_utils_quote_body (quoted_text, (text) ? text : "", ">", limit);
1039                 quote_html_add_to_gstring (result_string, quoted_text->str);
1040                 g_string_free (quoted_text, TRUE);
1041                 if (attachments) {
1042                         gchar *attachments_string = quoted_attachments (attachments);
1043                         quote_html_add_to_gstring (result_string, attachments_string);
1044                         g_free (attachments_string);
1045                 }
1046                 g_string_append (result_string, "</pre>");
1047         }
1048         g_string_append (result_string, "</body>");
1049         g_string_append (result_string, "</html>");
1050
1051         return g_string_free (result_string, FALSE);
1052 }
1053
1054 static gint 
1055 cmp_offsets_reverse (const url_match_t *match1, const url_match_t *match2)
1056 {
1057         return match2->offset - match1->offset;
1058 }
1059
1060 static gint url_matches_block = 0;
1061 static url_match_pattern_t patterns[] = MAIL_VIEWER_URL_MATCH_PATTERNS;
1062 static GMutex *url_patterns_mutex = NULL;
1063
1064
1065 static gboolean
1066 compile_patterns ()
1067 {
1068         guint i;
1069         const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
1070         for (i = 0; i != pattern_num; ++i) {
1071                 patterns[i].preg = g_slice_new0 (regex_t);
1072                 
1073                 /* this should not happen */
1074                 if (regcomp (patterns[i].preg, patterns[i].regex,
1075                              REG_ICASE|REG_EXTENDED|REG_NEWLINE) != 0) {
1076                         g_warning ("%s: error in regexp:\n%s\n", __FUNCTION__, patterns[i].regex);
1077                         return FALSE;
1078                 }
1079         }
1080         return TRUE;
1081 }
1082
1083 static void 
1084 free_patterns ()
1085 {
1086         guint i;
1087         const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
1088         for (i = 0; i != pattern_num; ++i) {
1089                 regfree (patterns[i].preg);
1090                 g_slice_free  (regex_t, patterns[i].preg);
1091         } /* don't free patterns itself -- it's static */
1092 }
1093
1094 void
1095 modest_text_utils_hyperlinkify_begin (void)
1096 {
1097
1098         if (url_patterns_mutex == NULL) {
1099                 url_patterns_mutex = g_mutex_new ();
1100         }
1101         g_mutex_lock (url_patterns_mutex);
1102         if (url_matches_block == 0)
1103                 compile_patterns ();
1104         url_matches_block ++;
1105         g_mutex_unlock (url_patterns_mutex);
1106 }
1107
1108 void
1109 modest_text_utils_hyperlinkify_end (void)
1110 {
1111         g_mutex_lock (url_patterns_mutex);
1112         url_matches_block--;
1113         if (url_matches_block <= 0)
1114                 free_patterns ();
1115         g_mutex_unlock (url_patterns_mutex);
1116 }
1117
1118
1119 static GSList*
1120 get_url_matches (GString *txt, gint offset)
1121 {
1122         regmatch_t rm;
1123         guint rv, i, tmp_offset = 0;
1124         GSList *match_list = NULL;
1125
1126         const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
1127
1128         /* initalize the regexps */
1129         modest_text_utils_hyperlinkify_begin ();
1130
1131         /* find all the matches */
1132         for (i = 0; i != pattern_num; ++i) {
1133                 tmp_offset     = offset;        
1134                 while (1) {
1135                         url_match_t *match;
1136                         gboolean is_submatch;
1137                         GSList *cursor;
1138                         
1139                         if ((rv = regexec (patterns[i].preg, txt->str + tmp_offset, 1, &rm, 0)) != 0) {
1140                                 g_return_val_if_fail (rv == REG_NOMATCH, NULL); /* this should not happen */
1141                                 break; /* try next regexp */ 
1142                         }
1143                         if (rm.rm_so == -1)
1144                                 break;
1145                         
1146                         is_submatch = FALSE;
1147                         /* check  old matches to see if this has already been matched */
1148                         cursor = match_list;
1149                         while (cursor && !is_submatch) {
1150                                 const url_match_t *old_match =
1151                                         (const url_match_t *) cursor->data;
1152                                 guint new_offset = tmp_offset + rm.rm_so;
1153                                 is_submatch = (new_offset >  old_match->offset &&
1154                                                new_offset <  old_match->offset + old_match->len);
1155                                 cursor = g_slist_next (cursor);
1156                         }
1157
1158                         if (!is_submatch) {
1159                                 /* make a list of our matches (<offset, len, prefix> tupels)*/
1160                                 match = g_slice_new (url_match_t);
1161                                 match->offset = tmp_offset + rm.rm_so;
1162                                 match->len    = rm.rm_eo - rm.rm_so;
1163                                 match->prefix = patterns[i].prefix;
1164                                 match_list = g_slist_prepend (match_list, match);
1165                         }               
1166                         tmp_offset += rm.rm_eo;
1167                 }
1168         }
1169
1170         modest_text_utils_hyperlinkify_end ();
1171         
1172         /* now sort the list, so the matches are in reverse order of occurence.
1173          * that way, we can do the replacements starting from the end, so we don't need
1174          * to recalculate the offsets
1175          */
1176         match_list = g_slist_sort (match_list,
1177                                    (GCompareFunc)cmp_offsets_reverse); 
1178         return match_list;      
1179 }
1180
1181
1182
1183 /* replace all occurences of needle in haystack with repl*/
1184 static gchar*
1185 replace_string (const gchar *haystack, const gchar *needle, gchar repl)
1186 {
1187         gchar *str, *cursor;
1188
1189         if (!haystack || !needle || strlen(needle) == 0)
1190                 return haystack ? g_strdup(haystack) : NULL;
1191         
1192         str = g_strdup (haystack);
1193
1194         for (cursor = str; cursor && *cursor; ++cursor) {
1195                 if (g_str_has_prefix (cursor, needle)) {
1196                         cursor[0] = repl;
1197                         memmove (cursor + 1,
1198                                  cursor + strlen (needle),
1199                                  strlen (cursor + strlen (needle)) + 1);
1200                 }
1201         }
1202         
1203         return str;
1204 }
1205
1206 static void
1207 hyperlinkify_plain_text (GString *txt, gint offset)
1208 {
1209         GSList *cursor;
1210         GSList *match_list = get_url_matches (txt, offset);
1211
1212         /* we will work backwards, so the offsets stay valid */
1213         for (cursor = match_list; cursor; cursor = cursor->next) {
1214
1215                 url_match_t *match = (url_match_t*) cursor->data;
1216                 gchar *url  = g_strndup (txt->str + match->offset, match->len);
1217                 gchar *repl = NULL; /* replacement  */
1218
1219                 /* the string still contains $(MARK_AMP_URI_STR)"amp;" for each
1220                  * '&' in the original, because of the text->html conversion.
1221                  * in the href-URL (and only there), we must convert that back to
1222                  * '&'
1223                  */
1224                 gchar *href_url = replace_string (url, MARK_AMP_URI_STR "amp;", '&');
1225                 
1226                 /* the prefix is NULL: use the one that is already there */
1227                 repl = g_strdup_printf ("<a href=\"%s%s\">%s</a>",
1228                                         match->prefix ? match->prefix : EMPTY_STRING, 
1229                                         href_url, url);
1230
1231                 /* replace the old thing with our hyperlink
1232                  * replacement thing */
1233                 g_string_erase  (txt, match->offset, match->len);
1234                 g_string_insert (txt, match->offset, repl);
1235                 
1236                 g_free (url);
1237                 g_free (repl);
1238                 g_free (href_url);
1239
1240                 g_slice_free (url_match_t, match);      
1241         }
1242         
1243         g_slist_free (match_list);
1244 }
1245
1246 void
1247 modest_text_utils_hyperlinkify (GString *string_buffer)
1248 {
1249         gchar *after_body;
1250         gint offset = 0;
1251
1252         after_body = strstr (string_buffer->str, "<body>");
1253         if (after_body != NULL)
1254                 offset = after_body - string_buffer->str;
1255         hyperlinkify_plain_text (string_buffer, offset);
1256 }
1257
1258
1259 /* for optimization reasons, we change the string in-place */
1260 void
1261 modest_text_utils_get_display_address (gchar *address)
1262 {
1263         int i;
1264
1265         g_return_if_fail (address);
1266         
1267         if (!address)
1268                 return;
1269         
1270         /* should not be needed, and otherwise, we probably won't screw up the address
1271          * more than it already is :) 
1272          * g_return_val_if_fail (g_utf8_validate (address, -1, NULL), NULL);
1273          * */
1274         
1275         /* remove leading whitespace */
1276         if (address[0] == ' ')
1277                 g_strchug (address);
1278                 
1279         for (i = 0; address[i]; ++i) {
1280                 if (address[i] == '<') {
1281                         if (G_UNLIKELY(i == 0)) {
1282                                 break; /* there's nothing else, leave it */
1283                         }else {
1284                                 address[i] = '\0'; /* terminate the string here */
1285                                 break;
1286                         }
1287                 }
1288         }
1289
1290         g_strchomp (address);
1291 }
1292
1293
1294 gchar *
1295 modest_text_utils_get_display_addresses (const gchar *recipients)
1296 {
1297         gchar *addresses;
1298         GSList *recipient_list;
1299
1300         addresses = NULL;
1301         recipient_list = modest_text_utils_split_addresses_list (recipients);
1302         if (recipient_list) {
1303                 GString *add_string = g_string_sized_new (strlen (recipients));
1304                 GSList *iter = recipient_list;
1305                 gboolean first = TRUE;
1306
1307                 while (iter) {
1308                         /* Strings are changed in place */
1309                         modest_text_utils_get_display_address ((gchar *) iter->data);
1310                         if (G_UNLIKELY (first)) {
1311                                 g_string_append_printf (add_string, "%s", (gchar *) iter->data);
1312                                 first = FALSE;
1313                         } else {
1314                                 g_string_append_printf (add_string, ", %s", (gchar *) iter->data);
1315                         }
1316                         iter = g_slist_next (iter);
1317                 }
1318                 g_slist_foreach (recipient_list, (GFunc) g_free, NULL);
1319                 g_slist_free (recipient_list);
1320                 addresses = g_string_free (add_string, FALSE);
1321         }
1322
1323         return addresses;
1324 }
1325
1326
1327 gchar *
1328 modest_text_utils_get_email_address (const gchar *full_address)
1329 {
1330         const gchar *left, *right;
1331
1332         g_return_val_if_fail (full_address, NULL);
1333         
1334         if (!full_address)
1335                 return NULL;
1336         
1337         g_return_val_if_fail (g_utf8_validate (full_address, -1, NULL), NULL);
1338         
1339         left = g_strrstr_len (full_address, strlen(full_address), "<");
1340         if (left == NULL)
1341                 return g_strdup (full_address);
1342
1343         right = g_strstr_len (left, strlen(left), ">");
1344         if (right == NULL)
1345                 return g_strdup (full_address);
1346
1347         return g_strndup (left + 1, right - left - 1);
1348 }
1349
1350 gint 
1351 modest_text_utils_get_subject_prefix_len (const gchar *sub)
1352 {
1353         gint prefix_len = 0;    
1354
1355         g_return_val_if_fail (sub, 0);
1356
1357         if (!sub)
1358                 return 0;
1359         
1360         /* optimization: "Re", "RE", "re","Fwd", "FWD", "fwd","FW","Fw", "fw" */
1361         if (sub[0] != 'R' && sub[0] != 'F' && sub[0] != 'r' && sub[0] != 'f')
1362                 return 0;
1363         else if (sub[0] && sub[1] != 'e' && sub[1] != 'E' && sub[1] != 'w' && sub[1] != 'W')
1364                 return 0;
1365
1366         prefix_len = 2;
1367         if (sub[2] == 'd')
1368                 ++prefix_len;
1369
1370         /* skip over a [...] block */
1371         if (sub[prefix_len] == '[') {
1372                 int c = prefix_len + 1;
1373                 while (sub[c] && sub[c] != ']')
1374                         ++c;
1375                 if (!sub[c])
1376                         return 0; /* no end to the ']' found */
1377                 else
1378                         prefix_len = c + 1;
1379         }
1380
1381         /* did we find the ':' ? */
1382         if (sub[prefix_len] == ':') {
1383                 ++prefix_len;
1384                 if (sub[prefix_len] == ' ')
1385                         ++prefix_len;
1386                 prefix_len += modest_text_utils_get_subject_prefix_len (sub + prefix_len);
1387 /*              g_warning ("['%s','%s']", sub, (char*) sub + prefix_len); */
1388                 return prefix_len;
1389         } else
1390                 return 0;
1391 }
1392
1393
1394 gint
1395 modest_text_utils_utf8_strcmp (const gchar* s1, const gchar *s2, gboolean insensitive)
1396 {
1397
1398 /* work even when s1 and/or s2 == NULL */
1399         if (G_UNLIKELY(s1 == s2))
1400                 return 0;
1401         if (G_UNLIKELY(!s1))
1402                 return -1;
1403         if (G_UNLIKELY(!s2))
1404                 return 1;
1405         
1406         /* if it's not case sensitive */
1407         if (!insensitive) {
1408
1409                 /* optimization: shortcut if first char is ascii */ 
1410                 if (((s1[0] & 0x80)== 0) && ((s2[0] & 0x80) == 0) &&
1411                     (s1[0] != s2[0])) 
1412                         return s1[0] - s2[0];
1413                 
1414                 return g_utf8_collate (s1, s2);
1415
1416         } else {
1417                 gint result;
1418                 gchar *n1, *n2;
1419
1420                 /* optimization: shortcut if first char is ascii */ 
1421                 if (((s1[0] & 0x80) == 0) && ((s2[0] & 0x80) == 0) &&
1422                     (tolower(s1[0]) != tolower (s2[0]))) 
1423                         return tolower(s1[0]) - tolower(s2[0]);
1424                 
1425                 n1 = g_utf8_strdown (s1, -1);
1426                 n2 = g_utf8_strdown (s2, -1);
1427                 
1428                 result = g_utf8_collate (n1, n2);
1429                 
1430                 g_free (n1);
1431                 g_free (n2);
1432         
1433                 return result;
1434         }
1435 }
1436
1437
1438 const gchar*
1439 modest_text_utils_get_display_date (time_t date)
1440 {
1441 #define DATE_BUF_SIZE 64 
1442         static gchar date_buf[DATE_BUF_SIZE];
1443         
1444         /* calculate the # of days since epoch for 
1445          * for today and for the date provided 
1446          * based on idea from pvanhoof */
1447         int day      = time(NULL) / (24 * 60 * 60);
1448         int date_day = date       / (24 * 60 * 60);
1449
1450         /* if it's today, show the time, if it's not today, show the date instead */
1451
1452         /* TODO: take into account the system config for 24/12h */
1453 #ifdef MODEST_TOOLKIT_HILDON2
1454         if (day == date_day) /* is the date today? */
1455                 modest_text_utils_strftime (date_buf, DATE_BUF_SIZE, _HL("wdgt_va_24h_time"), date);
1456         else 
1457                 modest_text_utils_strftime (date_buf, DATE_BUF_SIZE, _HL("wdgt_va_date"), date); 
1458 #else
1459         if (day == date_day) /* is the date today? */
1460                 modest_text_utils_strftime (date_buf, DATE_BUF_SIZE, "%X", date);
1461         else 
1462                 modest_text_utils_strftime (date_buf, DATE_BUF_SIZE, "%x", date); 
1463 #endif
1464
1465         return date_buf; /* this is a static buffer, don't free! */
1466 }
1467
1468
1469
1470 gboolean
1471 modest_text_utils_validate_folder_name (const gchar *folder_name)
1472 {
1473         /* based on http://msdn2.microsoft.com/en-us/library/aa365247.aspx,
1474          * with some extras */
1475         
1476         guint len;
1477         gint i;
1478         const gchar **cursor = NULL;
1479         const gchar *forbidden_names[] = { /* windows does not like these */
1480                 "CON", "PRN", "AUX", "NUL", ".", "..", "cur", "tmp", "new", 
1481                 NULL /* cur, tmp, new are reserved for Maildir */
1482         };
1483         
1484         /* cannot be NULL */
1485         if (!folder_name) 
1486                 return FALSE;
1487
1488         /* cannot be empty */
1489         len = strlen(folder_name);
1490         if (len == 0)
1491                 return FALSE;
1492         
1493         /* cannot start with a dot, vfat does not seem to like that */
1494         if (folder_name[0] == '.')
1495                 return FALSE;
1496
1497         /* cannot start or end with a space */
1498         if (g_ascii_isspace(folder_name[0]) || g_ascii_isspace(folder_name[len - 1]))
1499                 return FALSE; 
1500
1501         /* cannot contain a forbidden char */   
1502         for (i = 0; i < len; i++)
1503                 if (modest_text_utils_is_forbidden_char (folder_name[i], FOLDER_NAME_FORBIDDEN_CHARS))
1504                         return FALSE;
1505
1506         /* Cannot contain Windows port numbers. I'd like to use GRegex
1507            but it's still not available in Maemo. sergio */
1508         if (!g_ascii_strncasecmp (folder_name, "LPT", 3) ||
1509             !g_ascii_strncasecmp (folder_name, "COM", 3)) {
1510                 glong val;
1511                 gchar *endptr;
1512
1513                 /* We skip the first 3 characters for the
1514                    comparison */
1515                 val = strtol(folder_name+3, &endptr, 10);
1516
1517                 /* If the conversion to long succeeded then the string
1518                    is not valid for us */
1519                 if (*endptr == '\0')
1520                         return FALSE;
1521                 else
1522                         return TRUE;
1523         }
1524         
1525         /* cannot contain a forbidden word */
1526         if (len <= 4) {
1527                 for (cursor = forbidden_names; cursor && *cursor; ++cursor) {
1528                         if (g_ascii_strcasecmp (folder_name, *cursor) == 0)
1529                                 return FALSE;
1530                 }
1531         }
1532
1533         return TRUE; /* it's valid! */
1534 }
1535
1536
1537
1538 gboolean
1539 modest_text_utils_validate_domain_name (const gchar *domain)
1540 {
1541         gboolean valid = FALSE;
1542         regex_t rx;
1543         const gchar* domain_regex = "^([a-z0-9-]*[a-z0-9]\\.)+[a-z0-9-]*[a-z0-9]$";
1544
1545         g_return_val_if_fail (domain, FALSE);
1546         
1547         if (!domain)
1548                 return FALSE;
1549         
1550         memset (&rx, 0, sizeof(regex_t)); /* coverity wants this... */
1551                 
1552         /* domain name: all alphanum or '-' or '.',
1553          * but beginning/ending in alphanum */  
1554         if (regcomp (&rx, domain_regex, REG_ICASE|REG_EXTENDED|REG_NOSUB)) {
1555                 g_warning ("BUG: error in regexp");
1556                 return FALSE;
1557         }
1558         
1559         valid = (regexec (&rx, domain, 1, NULL, 0) == 0);
1560         regfree (&rx);
1561                 
1562         return valid;
1563 }
1564
1565
1566
1567 gboolean
1568 modest_text_utils_validate_email_address (const gchar *email_address,
1569                                           const gchar **invalid_char_position)
1570 {
1571         int count = 0;
1572         const gchar *c = NULL, *domain = NULL;
1573         static gchar *rfc822_specials = "()<>@,;:\\\"[]&";
1574         
1575         if (invalid_char_position)
1576                 *invalid_char_position = NULL;
1577         
1578         g_return_val_if_fail (email_address, FALSE);
1579         
1580         /* check that the email adress contains exactly one @ */
1581         if (!strstr(email_address, "@") || 
1582                         (strstr(email_address, "@") != g_strrstr(email_address, "@"))) 
1583                 return FALSE;
1584         
1585         /* first we validate the name portion (name@domain) */
1586         for (c = email_address;  *c;  c++) {
1587                 if (*c == '\"' && 
1588                     (c == email_address || 
1589                      *(c - 1) == '.' || 
1590                      *(c - 1) == '\"')) {
1591                         while (*++c) {
1592                                 if (*c == '\"') 
1593                                         break;
1594                                 if (*c == '\\' && (*++c == ' ')) 
1595                                         continue;
1596                                 if (*c <= ' ' || *c >= 127) 
1597                                         return FALSE;
1598                         }
1599                         if (!*c++) 
1600                                 return FALSE;
1601                         if (*c == '@') 
1602                                 break;
1603                         if (*c != '.') 
1604                                 return FALSE;
1605                         continue;
1606                 }
1607                 if (*c == '@') 
1608                         break;
1609                 if (*c <= ' ' || *c >= 127) 
1610                         return FALSE;
1611                 if (strchr(rfc822_specials, *c)) {
1612                         if (invalid_char_position)
1613                                 *invalid_char_position = c;
1614                         return FALSE;
1615                 }
1616         }
1617         if (c == email_address || *(c - 1) == '.') 
1618                 return FALSE;
1619
1620         /* next we validate the domain portion (name@domain) */
1621         if (!*(domain = ++c)) 
1622                 return FALSE;
1623         do {
1624                 if (*c == '.') {
1625                         if (c == domain || *(c - 1) == '.' || *(c + 1) == '\0') 
1626                                 return FALSE;
1627                         count++;
1628                 }
1629                 if (*c <= ' ' || *c >= 127) 
1630                         return FALSE;
1631                 if (strchr(rfc822_specials, *c)) {
1632                         if (invalid_char_position)
1633                                 *invalid_char_position = c;
1634                         return FALSE;
1635                 }
1636         } while (*++c);
1637
1638         return (count >= 1) ? TRUE : FALSE;
1639 }
1640
1641 gboolean 
1642 modest_text_utils_validate_recipient (const gchar *recipient, const gchar **invalid_char_position)
1643 {
1644         gchar *stripped, *current;
1645         gchar *right_part;
1646         gboolean has_error = FALSE;
1647
1648         if (invalid_char_position)
1649                 *invalid_char_position = NULL;
1650         
1651         g_return_val_if_fail (recipient, FALSE);
1652         
1653         if (modest_text_utils_validate_email_address (recipient, invalid_char_position))
1654                 return TRUE;
1655
1656         stripped = g_strdup (recipient);
1657         stripped = g_strstrip (stripped);
1658         current = stripped;
1659
1660         if (*current == '\0') {
1661                 g_free (stripped);
1662                 return FALSE;
1663         }
1664
1665         /* quoted string */
1666         if (*current == '\"') {
1667                 current = g_utf8_next_char (current);
1668                 has_error = TRUE;
1669                 for (; *current != '\0'; current = g_utf8_next_char (current)) {
1670                         if (*current == '\\') {
1671                                 /* TODO: This causes a warning, which breaks the build, 
1672                                  * because a gchar cannot be < 0.
1673                                  * murrayc. 
1674                                 if (current[1] <0) {
1675                                         has_error = TRUE;
1676                                         break;
1677                                 }
1678                                 */
1679                         } else if (*current == '\"') {
1680                                 has_error = FALSE;
1681                                 current = g_utf8_next_char (current);
1682                                 break;
1683                         }
1684                 }
1685         } else {
1686                 has_error = TRUE;
1687                 for (current = stripped ; *current != '\0'; current = g_utf8_next_char (current)) {
1688                         if (*current == '<') {
1689                                 has_error = FALSE;
1690                                 break;
1691                         }
1692                 }
1693         }
1694                 
1695         if (has_error) {
1696                 g_free (stripped);
1697                 return FALSE;
1698         }
1699
1700         right_part = g_strdup (current);
1701         g_free (stripped);
1702         right_part = g_strstrip (right_part);
1703
1704         if (g_str_has_prefix (right_part, "<") &&
1705             g_str_has_suffix (right_part, ">")) {
1706                 gchar *address;
1707                 gboolean valid;
1708
1709                 address = g_strndup (right_part+1, strlen (right_part) - 2);
1710                 g_free (right_part);
1711                 valid = modest_text_utils_validate_email_address (address, invalid_char_position);
1712                 g_free (address);
1713                 return valid;
1714         } else {
1715                 g_free (right_part);
1716                 return FALSE;
1717         }
1718 }
1719
1720
1721 gchar *
1722 modest_text_utils_get_display_size (guint64 size)
1723 {
1724         const guint KB=1024;
1725         const guint MB=1024 * KB;
1726         const guint GB=1024 * MB;
1727
1728         if (size == 0)
1729                 return g_strdup_printf (_FM("sfil_li_size_kb"), (int) 0);
1730         if (0 <= size && size < KB)
1731                 return g_strdup_printf (_FM("sfil_li_size_1kb_99kb"), (int) 1);
1732         else if (KB <= size && size < 100 * KB)
1733                 return g_strdup_printf (_FM("sfil_li_size_1kb_99kb"), (int) size / KB);
1734         else if (100*KB <= size && size < MB)
1735                 return g_strdup_printf (_FM("sfil_li_size_100kb_1mb"), (int) size / KB);
1736         else if (MB <= size && size < 10*MB)
1737                 return g_strdup_printf (_FM("sfil_li_size_1mb_10mb"), (float) size / MB);
1738         else if (10*MB <= size && size < GB)
1739                 return g_strdup_printf (_FM("sfil_li_size_10mb_1gb"), (float) size / MB);
1740         else
1741                 return g_strdup_printf (_FM("sfil_li_size_1gb_or_greater"), (float) size / GB);
1742 }
1743
1744 static gchar *
1745 get_email_from_address (const gchar * address)
1746 {
1747         gchar *left_limit, *right_limit;
1748
1749         left_limit = strstr (address, "<");
1750         right_limit = g_strrstr (address, ">");
1751
1752         if ((left_limit == NULL)||(right_limit == NULL)|| (left_limit > right_limit))
1753                 return g_strdup (address);
1754         else
1755                 return g_strndup (left_limit + 1, (right_limit - left_limit) - 1);
1756 }
1757
1758 gchar *
1759 modest_text_utils_get_color_string (GdkColor *color)
1760 {
1761         g_return_val_if_fail (color, NULL);
1762
1763         return g_strdup_printf ("#%x%x%x%x%x%x%x%x%x%x%x%x",
1764                                 (color->red >> 12)   & 0xf, (color->red >> 8)   & 0xf,
1765                                 (color->red >>  4)   & 0xf, (color->red)        & 0xf,
1766                                 (color->green >> 12) & 0xf, (color->green >> 8) & 0xf,
1767                                 (color->green >>  4) & 0xf, (color->green)      & 0xf,
1768                                 (color->blue >> 12)  & 0xf, (color->blue >> 8)  & 0xf,
1769                                 (color->blue >>  4)  & 0xf, (color->blue)       & 0xf);
1770 }
1771
1772 gchar *
1773 modest_text_utils_text_buffer_get_text (GtkTextBuffer *buffer)
1774 {
1775         GtkTextIter start, end;
1776         gchar *slice, *current;
1777         GString *result = g_string_new ("");
1778
1779         g_return_val_if_fail (buffer && GTK_IS_TEXT_BUFFER (buffer), NULL);
1780         
1781         gtk_text_buffer_get_start_iter (buffer, &start);
1782         gtk_text_buffer_get_end_iter (buffer, &end);
1783
1784         slice = gtk_text_buffer_get_slice (buffer, &start, &end, FALSE);
1785         current = slice;
1786
1787         while (current && current != '\0') {
1788                 if (g_utf8_get_char (current) == 0xFFFC) {
1789                         result = g_string_append_c (result, ' ');
1790                         current = g_utf8_next_char (current);
1791                 } else {
1792                         gchar *next = g_utf8_strchr (current, -1, 0xFFFC);
1793                         if (next == NULL) {
1794                                 result = g_string_append (result, current);
1795                         } else {
1796                                 result = g_string_append_len (result, current, next - current);
1797                         }
1798                         current = next;
1799                 }
1800         }
1801         g_free (slice);
1802
1803         return g_string_free (result, FALSE);
1804         
1805 }
1806
1807 gboolean
1808 modest_text_utils_is_forbidden_char (const gchar character,
1809                                      ModestTextUtilsForbiddenCharType type)
1810 {
1811         gint i, len;
1812         const gchar *forbidden_chars = NULL;
1813         
1814         /* We need to get the length in the switch because the
1815            compiler needs to know the size at compile time */
1816         switch (type) {
1817         case ACCOUNT_TITLE_FORBIDDEN_CHARS:
1818                 forbidden_chars = account_title_forbidden_chars;
1819                 len = G_N_ELEMENTS (account_title_forbidden_chars);
1820                 break;
1821         case FOLDER_NAME_FORBIDDEN_CHARS:
1822                 forbidden_chars = folder_name_forbidden_chars;
1823                 len = G_N_ELEMENTS (folder_name_forbidden_chars);
1824                 break;
1825         case USER_NAME_FORBIDDEN_NAMES:
1826                 forbidden_chars = user_name_forbidden_chars;
1827                 len = G_N_ELEMENTS (user_name_forbidden_chars);
1828                 break;
1829         default:
1830                 g_return_val_if_reached (TRUE);
1831         }
1832
1833         for (i = 0; i < len ; i++)
1834                 if (forbidden_chars[i] == character)
1835                         return TRUE;
1836
1837         return FALSE; /* it's valid! */
1838 }
1839
1840 gchar *      
1841 modest_text_utils_label_get_selection (GtkLabel *label)
1842 {
1843         gint start, end;
1844         gchar *selection;
1845
1846         if (gtk_label_get_selection_bounds (GTK_LABEL (label), &start, &end)) {
1847                 const gchar *start_offset;
1848                 const gchar *end_offset;
1849                 start_offset = gtk_label_get_text (GTK_LABEL (label));
1850                 start_offset = g_utf8_offset_to_pointer (start_offset, start);
1851                 end_offset = gtk_label_get_text (GTK_LABEL (label));
1852                 end_offset = g_utf8_offset_to_pointer (end_offset, end);
1853                 selection = g_strndup (start_offset, end_offset - start_offset);
1854                 return selection;
1855         } else {
1856                 return g_strdup ("");
1857         }
1858 }
1859
1860 static gboolean
1861 _forward_search_image_char (gunichar ch,
1862                             gpointer userdata)
1863 {
1864         return (ch == 0xFFFC);
1865 }
1866
1867 gboolean
1868 modest_text_utils_buffer_selection_is_valid (GtkTextBuffer *buffer)
1869 {
1870         gboolean result;
1871         GtkTextIter start, end;
1872
1873         g_return_val_if_fail (GTK_IS_TEXT_BUFFER (buffer), FALSE);
1874
1875         result = gtk_text_buffer_get_has_selection (GTK_TEXT_BUFFER (buffer));
1876
1877         /* check there are no images in selection */
1878         if (result) {
1879                 gtk_text_buffer_get_selection_bounds (buffer, &start, &end);
1880                 if (gtk_text_iter_get_char (&start)== 0xFFFC)
1881                         result = FALSE;
1882                 else {
1883                         gtk_text_iter_backward_char (&end);
1884                         if (gtk_text_iter_forward_find_char (&start, _forward_search_image_char,
1885                                                              NULL, &end))
1886                                 result = FALSE;
1887                 }
1888                                     
1889         }
1890
1891         return result;
1892 }
1893
1894 static void
1895 remove_quotes (gchar **quotes)
1896 {
1897         if (g_str_has_prefix (*quotes, "\"") && g_str_has_suffix (*quotes, "\"")) {
1898                 gchar *result;
1899                 result = g_strndup ((*quotes)+1, strlen (*quotes) - 2);
1900                 g_free (*quotes);
1901                 *quotes = result;
1902         }
1903 }
1904
1905 gchar *
1906 modest_text_utils_escape_mnemonics (const gchar *text)
1907 {
1908         const gchar *p;
1909         GString *result = NULL;
1910
1911         if (text == NULL)
1912                 return NULL;
1913
1914         result = g_string_new ("");
1915         for (p = text; *p != '\0'; p++) {
1916                 if (*p == '_')
1917                         result = g_string_append (result, "__");
1918                 else
1919                         result = g_string_append_c (result, *p);
1920         }
1921         
1922         return g_string_free (result, FALSE);
1923 }
1924
1925 gchar *
1926 modest_text_utils_simplify_recipients (const gchar *recipients)
1927 {
1928         GSList *addresses, *node;
1929         GString *result;
1930         gboolean is_first = TRUE;
1931
1932         if (recipients == NULL)
1933                 return g_strdup ("");
1934
1935         addresses = modest_text_utils_split_addresses_list (recipients);
1936         result = g_string_new ("");
1937
1938         for (node = addresses; node != NULL; node = g_slist_next (node)) {
1939                 const gchar *address = (const gchar *) node->data;
1940                 gchar *left_limit, *right_limit;
1941
1942                 left_limit = strstr (address, "<");
1943                 right_limit = g_strrstr (address, ">");
1944
1945                 if (is_first)
1946                         is_first = FALSE;
1947                 else
1948                         result = g_string_append (result, ", ");
1949
1950                 if ((left_limit == NULL)||(right_limit == NULL)|| (left_limit > right_limit)) {
1951                         result = g_string_append (result, address);
1952                 } else {
1953                         gchar *name_side;
1954                         gchar *email_side;
1955                         name_side = g_strndup (address, left_limit - address);
1956                         name_side = g_strstrip (name_side);
1957                         remove_quotes (&name_side);
1958                         email_side = get_email_from_address (address);
1959                         if (name_side && email_side && !strcmp (name_side, email_side)) {
1960                                 result = g_string_append (result, email_side);
1961                         } else {
1962                                 result = g_string_append (result, address);
1963                         }
1964                         g_free (name_side);
1965                         g_free (email_side);
1966                 }
1967
1968         }
1969         g_slist_foreach (addresses, (GFunc)g_free, NULL);
1970         g_slist_free (addresses);
1971
1972         return g_string_free (result, FALSE);
1973
1974 }
1975
1976 GSList *
1977 modest_text_utils_remove_duplicate_addresses_list (GSList *address_list)
1978 {
1979         GSList *new_list, *iter;
1980         GHashTable *table;
1981
1982         g_return_val_if_fail (address_list, NULL);
1983
1984         table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
1985
1986         new_list = address_list;
1987         iter = address_list;
1988         while (iter) {
1989                 const gchar* address = (const gchar*)iter->data;
1990
1991                 /* We need only the email to just compare it and not
1992                    the full address which would make "a <a@a.com>"
1993                    different from "a@a.com" */
1994                 const gchar *email = get_email_from_address (address);
1995
1996                 /* ignore the address if already seen */
1997                 if (g_hash_table_lookup (table, email) == 0) {
1998                         g_hash_table_insert (table, (gchar*)email, GINT_TO_POINTER(1));
1999                         iter = g_slist_next (iter);
2000                 } else {
2001                         GSList *tmp = g_slist_next (iter);
2002                         new_list = g_slist_delete_link (new_list, iter);
2003                         iter = tmp;
2004                 }
2005         }
2006
2007         g_hash_table_unref (table);
2008
2009         return new_list;
2010 }
2011
2012 gchar *
2013 modest_text_utils_get_secure_header (const gchar *value,
2014                                      const gchar *header)
2015 {
2016         const gint max_len = 128;
2017         gchar *new_value = NULL;
2018         gchar *needle = g_strrstr (value, header);
2019
2020         if (needle && value != needle)
2021                 new_value = g_strdup (needle + strlen (header));
2022
2023         if (!new_value)
2024                 new_value = g_strdup (value);
2025
2026         /* Do a max length check to prevent DoS attacks caused by huge
2027            malformed headers */
2028         if (g_utf8_validate (new_value, -1, NULL)) {
2029                 if (g_utf8_strlen (new_value, -1) > max_len) {
2030                         gchar *tmp = g_malloc0 (max_len * 4);
2031                         g_utf8_strncpy (tmp, (const gchar *) new_value, max_len);
2032                         g_free (new_value);
2033                         new_value = tmp;
2034                 }
2035         } else {
2036                 if (strlen (new_value) > max_len) {
2037                         gchar *tmp = g_malloc0 (max_len);
2038                         strncpy (new_value, tmp, max_len);
2039                         g_free (new_value);
2040                         new_value = tmp;
2041                 }
2042         }
2043
2044         return new_value;
2045 }