8161ae525541d813301f9a4f68198387bb4cbf85
[modest] / src / modest-text-utils.c
1 /* Copyright (c) 2006, Nokia Corporation
2  * All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are
6  * met:
7  *
8  * * Redistributions of source code must retain the above copyright
9  *   notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  *   notice, this list of conditions and the following disclaimer in the
12  *   documentation and/or other materials provided with the distribution.
13  * * Neither the name of the Nokia Corporation nor the names of its
14  *   contributors may be used to endorse or promote products derived from
15  *   this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
18  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
20  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
21  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30
31
32 #ifndef _GNU_SOURCE
33 #define _GNU_SOURCE
34 #endif /*_GNU_SOURCE*/
35 #include <string.h> /* for strcasestr */
36
37
38 #include <glib.h>
39 #include <stdlib.h>
40 #include <glib/gi18n.h>
41 #include <regex.h>
42 #include <modest-tny-platform-factory.h>
43 #include <modest-text-utils.h>
44 #include <modest-runtime.h>
45 #include <ctype.h>
46
47 #ifdef HAVE_CONFIG_H
48 #include <config.h>
49 #endif /*HAVE_CONFIG_H */
50
51 /* defines */
52 #define FORWARD_STRING _("mcen_ia_editor_original_message")
53 #define FROM_STRING _("mail_va_from")
54 #define SENT_STRING _("mcen_fi_message_properties_sent")
55 #define TO_STRING _("mail_va_to")
56 #define SUBJECT_STRING _("mail_va_subject")
57 #define EMPTY_STRING ""
58
59 /*
60  * do the hyperlinkification only for texts < 50 Kb,
61  * as it's quite slow. Without this, e.g. mail with
62  * an uuencoded part (which is not recognized as attachment,
63  * will hang modest
64  */
65 #define HYPERLINKIFY_MAX_LENGTH (1024*50)
66
67 /*
68  * we need these regexps to find URLs in plain text e-mails
69  */
70 typedef struct _url_match_pattern_t url_match_pattern_t;
71 struct _url_match_pattern_t {
72         gchar   *regex;
73         regex_t *preg;
74         gchar   *prefix;
75 };
76
77 typedef struct _url_match_t url_match_t;
78 struct _url_match_t {
79         guint offset;
80         guint len;
81         const gchar* prefix;
82 };
83
84
85 /*
86  * we mark the ampersand with \007 when converting text->html
87  * because after text->html we do hyperlink detecting, which
88  * could be screwed up by the ampersand.
89  * ie. 1<3 ==> 1\007lt;3
90  */
91 #define MARK_AMP '\007'
92 #define MARK_AMP_STR "\007"
93
94 /* mark &amp; separately, because they are parts of urls.
95  * ie. a&b => a\006amp;b, but a>b => a\007gt;b
96  *
97  * we need to handle '&' separately, because it can be part of URIs
98  * (as in href="http://foo.bar?a=1&b=1"), so inside those URIs
99  * we need to re-replace \006amp; with '&' again, while outside uri's
100  * it will be '&amp;'
101  * 
102  * yes, it's messy, but a consequence of doing text->html first, then hyperlinkify
103  */
104 #define MARK_AMP_URI '\006'
105 #define MARK_AMP_URI_STR "\006"
106
107
108 /* note: match MARK_AMP_URI_STR as well, because after txt->html, a '&' will look like $(MARK_AMP_URI_STR)"amp;" */
109 #define MAIL_VIEWER_URL_MATCH_PATTERNS  {                               \
110         { "(feed:|)(file|rtsp|http|ftp|https|mms|mmsh|webcal|feed|rtsp|rdp|lastfm|sip)://[-a-z0-9_$.+!*(),;:@%=\?/~#&" MARK_AMP_URI_STR \
111                         "]+[-a-z0-9_$%&" MARK_AMP_URI_STR "=?/~#]",     \
112           NULL, NULL },\
113         { "www\\.[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]",\
114                         NULL, "http://" },                              \
115         { "ftp\\.[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]",\
116           NULL, "ftp://" },\
117         { "(jabberto|voipto|sipto|sip|chatto|skype|xmpp):[-_a-z@0-9.+]+", \
118            NULL, NULL},                                             \
119         { "mailto:[-_a-z0-9.\\+]+@[-_a-z0-9.]+",                    \
120           NULL, NULL},\
121         { "[-_a-z0-9.\\+]+@[-_a-z0-9.]+",\
122           NULL, "mailto:"}\
123         }
124
125 const gchar account_title_forbidden_chars[] = {
126         '\\', '/', ':', '*', '?', '\'', '<', '>', '|', '^'
127 };
128 const gchar folder_name_forbidden_chars[] = {
129         '<', '>', ':', '\'', '/', '\\', '|', '?', '*', '^', '%', '$', '#', '&'
130 };
131 const gchar user_name_forbidden_chars[] = {
132         '<', '>'
133 };
134 const guint ACCOUNT_TITLE_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (account_title_forbidden_chars);
135 const guint FOLDER_NAME_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (folder_name_forbidden_chars);
136 const guint USER_NAME_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (user_name_forbidden_chars);
137
138 /* private */
139 static gchar*   cite                    (const time_t sent_date, const gchar *from);
140 static void     hyperlinkify_plain_text (GString *txt, gint offset);
141 static gint     cmp_offsets_reverse     (const url_match_t *match1, const url_match_t *match2);
142 static GSList*  get_url_matches         (GString *txt, gint offset);
143
144 static GString* get_next_line           (const char *b, const gsize blen, const gchar * iter);
145 static int      get_indent_level        (const char *l);
146 static void     unquote_line            (GString * l, const gchar *quote_symbol);
147 static void     append_quoted           (GString * buf, const gchar *quote_symbol,
148                                          const int indent, const GString * str, 
149                                          const int cutpoint);
150 static int      get_breakpoint_utf8     (const gchar * s, const gint indent, const gint limit);
151 static int      get_breakpoint_ascii    (const gchar * s, const gint indent, const gint limit);
152 static int      get_breakpoint          (const gchar * s, const gint indent, const gint limit);
153
154 static gchar*   modest_text_utils_quote_plain_text (const gchar *text, 
155                                                     const gchar *cite, 
156                                                     const gchar *signature,
157                                                     GList *attachments, 
158                                                     int limit);
159
160 static gchar*   modest_text_utils_quote_html       (const gchar *text, 
161                                                     const gchar *cite,
162                                                     const gchar *signature,
163                                                     GList *attachments,
164                                                     int limit);
165 static gchar*   get_email_from_address (const gchar *address);
166
167
168 /* ******************************************************************* */
169 /* ************************* PUBLIC FUNCTIONS ************************ */
170 /* ******************************************************************* */
171
172 gchar *
173 modest_text_utils_quote (const gchar *text, 
174                          const gchar *content_type,
175                          const gchar *signature,
176                          const gchar *from,
177                          const time_t sent_date, 
178                          GList *attachments,
179                          int limit)
180 {
181         gchar *retval, *cited;
182
183         g_return_val_if_fail (text, NULL);
184         g_return_val_if_fail (content_type, NULL);
185
186         cited = cite (sent_date, from);
187         
188         if (content_type && strcmp (content_type, "text/html") == 0)
189                 /* TODO: extract the <body> of the HTML and pass it to
190                    the function */
191                 retval = modest_text_utils_quote_html (text, cited, signature, attachments, limit);
192         else
193                 retval = modest_text_utils_quote_plain_text (text, cited, signature, attachments, limit);
194         
195         g_free (cited);
196         
197         return retval;
198 }
199
200
201 gchar *
202 modest_text_utils_cite (const gchar *text,
203                         const gchar *content_type,
204                         const gchar *signature,
205                         const gchar *from,
206                         time_t sent_date)
207 {
208         gchar *retval;
209         gchar *tmp_sig;
210         
211         g_return_val_if_fail (text, NULL);
212         g_return_val_if_fail (content_type, NULL);
213         
214         if (!signature) {
215                 tmp_sig = g_strdup (text);
216         } else {
217                 tmp_sig = g_strconcat (text, "\n", MODEST_TEXT_UTILS_SIGNATURE_MARKER, "\n", signature, NULL);
218         }
219
220         if (strcmp (content_type, "text/html") == 0) {
221                 retval = modest_text_utils_convert_to_html_body (tmp_sig, -1, TRUE);
222                 g_free (tmp_sig);
223         } else {
224                 retval = tmp_sig;
225         }
226
227         return retval;
228 }
229
230 static gchar *
231 forward_cite (const gchar *from,
232               const gchar *sent,
233               const gchar *to,
234               const gchar *subject)
235 {
236         g_return_val_if_fail (sent, NULL);
237         
238         return g_strdup_printf ("%s\n%s %s\n%s %s\n%s %s\n%s %s\n", 
239                                 FORWARD_STRING, 
240                                 FROM_STRING, (from)?from:"",
241                                 SENT_STRING, sent,
242                                 TO_STRING, (to)?to:"",
243                                 SUBJECT_STRING, (subject)?subject:"");
244 }
245
246 gchar * 
247 modest_text_utils_inline (const gchar *text,
248                           const gchar *content_type,
249                           const gchar *signature,
250                           const gchar *from,
251                           time_t sent_date,
252                           const gchar *to,
253                           const gchar *subject)
254 {
255         gchar sent_str[101];
256         gchar *cited;
257         gchar *retval;
258         
259         g_return_val_if_fail (text, NULL);
260         g_return_val_if_fail (content_type, NULL);
261         
262         modest_text_utils_strftime (sent_str, 100, "%c", sent_date);
263
264         cited = forward_cite (from, sent_str, to, subject);
265         
266         if (content_type && strcmp (content_type, "text/html") == 0)
267                 retval = modest_text_utils_quote_html (text, cited, signature, NULL, 80);
268         else
269                 retval = modest_text_utils_quote_plain_text (text, cited, signature, NULL, 80);
270         
271         g_free (cited);
272         return retval;
273 }
274
275 /* just to prevent warnings:
276  * warning: `%x' yields only last 2 digits of year in some locales
277  */
278 gsize
279 modest_text_utils_strftime(char *s, gsize max, const char *fmt, time_t timet)
280 {
281         struct tm tm;
282
283         /* To prevent possible problems in strftime that could leave
284            garbage in the s variable */
285         if (s)
286                 s[0] = '\0';
287         else
288                 return 0;
289
290         /* does not work on old maemo glib: 
291          *   g_date_set_time_t (&date, timet);
292          */
293         localtime_r (&timet, &tm);
294         return strftime(s, max, fmt, &tm);
295 }
296
297 gchar *
298 modest_text_utils_derived_subject (const gchar *subject, const gchar *prefix)
299 {
300         gchar *tmp, *subject_dup, *retval;
301         gint prefix_len;
302
303         g_return_val_if_fail (prefix, NULL);
304
305         if (!subject || subject[0] == '\0')
306                 subject = _("mail_va_no_subject");
307
308         subject_dup = g_strdup (subject);
309         tmp = g_strchug (subject_dup);
310
311         /* We do not want things like "Re: Re: Re:" or "Fw: Fw:" so
312            delete the previous ones */
313         prefix_len = strlen (prefix);
314         do {
315                 if (g_str_has_prefix (tmp, prefix)) {
316                         tmp += prefix_len;
317                         tmp = g_strchug (tmp);
318                 } else {
319                         gchar *prefix_down, *tmp_down;
320
321                         /* We need this to properly check the cases of
322                            some clients adding FW: instead of Fw: for
323                            example */
324                         prefix_down = g_utf8_strdown (prefix, -1);
325                         tmp_down = g_utf8_strdown (tmp, -1);
326                         if (g_str_has_prefix (tmp_down, prefix_down)) {
327                                 tmp += prefix_len;
328                                 tmp = g_strchug (tmp);
329                                 g_free (prefix_down);
330                                 g_free (tmp_down);
331                         } else {
332                                 g_free (prefix_down);
333                                 g_free (tmp_down);
334                                 break;
335                         }
336                 }
337         } while (tmp);
338
339         retval = g_strdup_printf ("%s %s", prefix, tmp);
340         g_free (subject_dup);
341
342         return retval;
343 }
344
345 gchar*
346 modest_text_utils_remove_address (const gchar *address_list, const gchar *address)
347 {
348         gchar *dup, *token, *ptr = NULL, *result;
349         GString *filtered_emails;
350         gchar *email_address;
351
352         g_return_val_if_fail (address_list, NULL);
353         
354         if (!address)
355                 return g_strdup (address_list);
356
357         email_address = get_email_from_address (address);
358         
359         /* search for substring */
360         if (!strstr ((const char *) address_list, (const char *) email_address)) {
361                 g_free (email_address);
362                 return g_strdup (address_list);
363         }
364
365         dup = g_strdup (address_list);
366         filtered_emails = g_string_new (NULL);
367         
368         token = strtok_r (dup, ",", &ptr);
369
370         while (token != NULL) {
371                 /* Add to list if not found */
372                 if (!strstr ((const char *) token, (const char *) email_address)) {
373                         if (filtered_emails->len == 0)
374                                 g_string_append_printf (filtered_emails, "%s", g_strstrip (token));
375                         else
376                                 g_string_append_printf (filtered_emails, ",%s", g_strstrip (token));
377                 }
378                 token = strtok_r (NULL, ",", &ptr);
379         }
380         result = filtered_emails->str;
381
382         /* Clean */
383         g_free (email_address);
384         g_free (dup);
385         g_string_free (filtered_emails, FALSE);
386
387         return result;
388 }
389
390
391 gchar*
392 modest_text_utils_remove_duplicate_addresses (const gchar *address_list)
393 {
394         GSList *addresses, *cursor;
395         GHashTable *table;
396         gchar *new_list = NULL;
397         
398         g_return_val_if_fail (address_list, NULL);
399
400         table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
401         addresses = modest_text_utils_split_addresses_list (address_list);
402
403         cursor = addresses;
404         while (cursor) {
405                 const gchar* address = (const gchar*)cursor->data;
406
407                 /* We need only the email to just compare it and not
408                    the full address which would make "a <a@a.com>"
409                    different from "a@a.com" */
410                 const gchar *email = get_email_from_address (address);
411
412                 /* ignore the address if already seen */
413                 if (g_hash_table_lookup (table, email) == 0) {
414                         gchar *tmp;
415
416                         /* Include the full address and not only the
417                            email in the returned list */
418                         if (!new_list) {
419                                 tmp = g_strdup (address);
420                         } else {
421                                 tmp = g_strjoin (",", new_list, address, NULL);
422                                 g_free (new_list);
423                         }
424                         new_list = tmp;
425                         
426                         g_hash_table_insert (table, (gchar*)email, GINT_TO_POINTER(1));
427                 }
428                 cursor = g_slist_next (cursor);
429         }
430
431         g_hash_table_unref (table);
432         g_slist_foreach (addresses, (GFunc)g_free, NULL);
433         g_slist_free (addresses);
434
435         if (new_list == NULL)
436                 new_list = g_strdup ("");
437
438         return new_list;
439 }
440
441
442 static void
443 modest_text_utils_convert_buffer_to_html_start (GString *html, const gchar *data, gssize n)
444 {
445         guint           i;
446         gboolean        space_seen = FALSE;
447         guint           break_dist = 0; /* distance since last break point */
448
449         if (n == -1)
450                 n = strlen (data);
451
452         /* replace with special html chars where needed*/
453         for (i = 0; i != n; ++i)  {
454                 guchar kar = data[i];
455                 
456                 if (space_seen && kar != ' ') {
457                         g_string_append (html, " ");
458                         space_seen = FALSE;
459                 }
460                 
461                 /* we artificially insert a breakpoint (newline)
462                  * after 256, to make sure our lines are not so long
463                  * they will DOS the regexping later
464                  * Also, check that kar is ASCII to make sure that we
465                  * don't break a UTF8 char in two
466                  */
467                 if (++break_dist >= 256 && kar < 127) {
468                         g_string_append_c (html, '\n');
469                         break_dist = 0;
470                 }
471                 
472                 switch (kar) {
473                 case 0:
474                 case MARK_AMP:
475                 case MARK_AMP_URI:      
476                         /* this is a temp place holder for '&'; we can only
477                                 * set the real '&' after hyperlink translation, otherwise
478                                 * we might screw that up */
479                         break; /* ignore embedded \0s and MARK_AMP */   
480                 case '<'  : g_string_append (html, MARK_AMP_STR "lt;");   break;
481                 case '>'  : g_string_append (html, MARK_AMP_STR "gt;");   break;
482                 case '&'  : g_string_append (html, MARK_AMP_URI_STR "amp;");  break; /* special case */
483                 case '"'  : g_string_append (html, MARK_AMP_STR "quot;");  break;
484
485                 /* don't convert &apos; --> wpeditor will try to re-convert it... */    
486                 //case '\'' : g_string_append (html, "&apos;"); break;
487                 case '\n' : g_string_append (html, "<br>\n");break_dist= 0; break;
488                 case '\t' : g_string_append (html, MARK_AMP_STR "nbsp;" MARK_AMP_STR "nbsp;" MARK_AMP_STR "nbsp; ");
489                         break_dist=0; break; /* note the space at the end*/
490                 case ' ':
491                         break_dist = 0;
492                         if (space_seen) { /* second space in a row */
493                                 g_string_append (html, "&nbsp; ");
494                         } else
495                                 space_seen = TRUE;
496                         break;
497                 default:
498                         g_string_append_c (html, kar);
499                 }
500         }
501 }
502
503
504 static void
505 modest_text_utils_convert_buffer_to_html_finish (GString *html)
506 {
507         int i;
508         /* replace all our MARK_AMPs with real ones */
509         for (i = 0; i != html->len; ++i)
510                 if ((html->str)[i] == MARK_AMP || (html->str)[i] == MARK_AMP_URI)
511                         (html->str)[i] = '&';
512 }
513
514
515 gchar*
516 modest_text_utils_convert_to_html (const gchar *data)
517 {
518         GString         *html;      
519         gsize           len;
520
521         g_return_val_if_fail (data, NULL);
522         
523         if (!data)
524                 return NULL;
525
526         len = strlen (data);
527         html = g_string_sized_new (1.5 * len);  /* just a  guess... */
528
529         g_string_append_printf (html,
530                                 "<html><head>"
531                                 "<meta http-equiv=\"content-type\" content=\"text/html; charset=utf8\">"
532                                 "</head>"
533                                 "<body>");
534
535         modest_text_utils_convert_buffer_to_html_start (html, data, -1);
536         
537         g_string_append (html, "</body></html>");
538
539         if (len <= HYPERLINKIFY_MAX_LENGTH)
540                 hyperlinkify_plain_text (html, 0);
541
542         modest_text_utils_convert_buffer_to_html_finish (html);
543         
544         return g_string_free (html, FALSE);
545 }
546
547 gchar *
548 modest_text_utils_convert_to_html_body (const gchar *data, gssize n, gboolean hyperlinkify)
549 {
550         GString         *html;      
551
552         g_return_val_if_fail (data, NULL);
553
554         if (!data)
555                 return NULL;
556
557         if (n == -1) 
558                 n = strlen (data);
559         html = g_string_sized_new (1.5 * n);    /* just a  guess... */
560
561         modest_text_utils_convert_buffer_to_html_start (html, data, n);
562
563         if (hyperlinkify && (n < HYPERLINKIFY_MAX_LENGTH))
564                 hyperlinkify_plain_text (html, 0);
565
566         modest_text_utils_convert_buffer_to_html_finish (html);
567         
568         return g_string_free (html, FALSE);
569 }
570
571 void
572 modest_text_utils_get_addresses_indexes (const gchar *addresses, GSList **start_indexes, GSList **end_indexes)
573 {
574         gchar *current, *start, *last_blank;
575         gint start_offset = 0, current_offset = 0;
576
577         g_return_if_fail (start_indexes != NULL);
578         g_return_if_fail (end_indexes != NULL);
579
580         start = (gchar *) addresses;
581         current = start;
582         last_blank = start;
583
584         while (*current != '\0') {
585                 if ((start == current)&&((*current == ' ')||(*current == ',')||(*current == ';'))) {
586                         start = g_utf8_next_char (start);
587                         start_offset++;
588                         last_blank = current;
589                 } else if ((*current == ',')||(*current == ';')) {
590                         gint *start_index, *end_index;
591                         start_index = g_new0(gint, 1);
592                         end_index = g_new0(gint, 1);
593                         *start_index = start_offset;
594                         *end_index = current_offset;
595                         *start_indexes = g_slist_prepend (*start_indexes, start_index);
596                         *end_indexes = g_slist_prepend (*end_indexes, end_index);
597                         start = g_utf8_next_char (current);
598                         start_offset = current_offset + 1;
599                         last_blank = start;
600                 } else if (*current == '"') {
601                         current = g_utf8_next_char (current);
602                         current_offset ++;
603                         while ((*current != '"')&&(*current != '\0')) {
604                                 current = g_utf8_next_char (current);
605                                 current_offset ++;
606                         }
607                 }
608                                 
609                 current = g_utf8_next_char (current);
610                 current_offset ++;
611         }
612
613         if (start != current) {
614                         gint *start_index, *end_index;
615                         start_index = g_new0(gint, 1);
616                         end_index = g_new0(gint, 1);
617                         *start_index = start_offset;
618                         *end_index = current_offset;
619                         *start_indexes = g_slist_prepend (*start_indexes, start_index);
620                         *end_indexes = g_slist_prepend (*end_indexes, end_index);
621         }
622         
623         *start_indexes = g_slist_reverse (*start_indexes);
624         *end_indexes = g_slist_reverse (*end_indexes);
625
626         return;
627 }
628
629
630 GSList *
631 modest_text_utils_split_addresses_list (const gchar *addresses)
632 {
633         GSList *head;
634         const gchar *my_addrs = addresses;
635         const gchar *end;
636         gchar *addr;
637         gboolean after_at = FALSE;
638
639         g_return_val_if_fail (addresses, NULL);
640         
641         /* skip any space, ',', ';' at the start */
642         while (my_addrs && (my_addrs[0] == ' ' || my_addrs[0] == ',' || my_addrs[0] == ';'))
643                ++my_addrs;
644
645         /* are we at the end of addresses list? */
646         if (!my_addrs[0])
647                 return NULL;
648         
649         /* nope, we are at the start of some address
650          * now, let's find the end of the address */
651         end = my_addrs + 1;
652         while (end[0] && end[0] != ';' && !(after_at && end[0] == ',')) {
653                 if (end[0] == '\"') {
654                         while (end[0] && end[0] != '\"')
655                                 ++end;
656                 }
657                 if (end[0] == '@') {
658                         after_at = TRUE;
659                 }
660                 if ((end[0] && end[0] == '>')&&(end[1] && end[1] == ',')) {
661                         ++end;
662                         break;
663                 }
664                 ++end;
665         }
666
667         /* we got the address; copy it and remove trailing whitespace */
668         addr = g_strndup (my_addrs, end - my_addrs);
669         g_strchomp (addr);
670
671         head = g_slist_append (NULL, addr);
672         head->next = modest_text_utils_split_addresses_list (end); /* recurse */
673
674         return head;
675 }
676
677
678 void
679 modest_text_utils_address_range_at_position (const gchar *recipients_list,
680                                              guint position,
681                                              guint *start,
682                                              guint *end)
683 {
684         gchar *current = NULL;
685         gint range_start = 0;
686         gint range_end = 0;
687         gint index;
688         gboolean is_quoted = FALSE;
689
690         g_return_if_fail (recipients_list);
691         g_return_if_fail (position < g_utf8_strlen(recipients_list, -1));
692                 
693         index = 0;
694         for (current = (gchar *) recipients_list; *current != '\0';
695              current = g_utf8_find_next_char (current, NULL)) {
696                 gunichar c = g_utf8_get_char (current);
697
698                 if ((c == ',') && (!is_quoted)) {
699                         if (index < position) {
700                                 range_start = index + 1;
701                         } else {
702                                 break;
703                         }
704                 } else if (c == '\"') {
705                         is_quoted = !is_quoted;
706                 } else if ((c == ' ') &&(range_start == index)) {
707                         range_start ++;
708                 }
709                 index ++;
710                 range_end = index;
711         }
712
713         if (start)
714                 *start = range_start;
715         if (end)
716                 *end = range_end;
717 }
718
719 gchar *
720 modest_text_utils_address_with_standard_length (const gchar *recipients_list)
721 {
722         gchar ** splitted;
723         gchar ** current;
724         GString *buffer = g_string_new ("");
725
726         splitted = g_strsplit (recipients_list, "\n", 0);
727         current = splitted;
728         while (*current) {
729                 gchar *line;
730                 if (current != splitted)
731                         buffer = g_string_append_c (buffer, '\n');
732                 line = g_strndup (*splitted, 1000);
733                 buffer = g_string_append (buffer, line);
734                 g_free (line);
735                 current++;
736         }
737
738         g_strfreev (splitted);
739
740         return g_string_free (buffer, FALSE);
741 }
742
743
744 /* ******************************************************************* */
745 /* ************************* UTILIY FUNCTIONS ************************ */
746 /* ******************************************************************* */
747
748 static GString *
749 get_next_line (const gchar * b, const gsize blen, const gchar * iter)
750 {
751         GString *gs;
752         const gchar *i0;
753         
754         if (iter > b + blen)
755                 return g_string_new("");
756         
757         i0 = iter;
758         while (iter[0]) {
759                 if (iter[0] == '\n')
760                         break;
761                 iter++;
762         }
763         gs = g_string_new_len (i0, iter - i0);
764         return gs;
765 }
766 static int
767 get_indent_level (const char *l)
768 {
769         int indent = 0;
770
771         while (l[0]) {
772                 if (l[0] == '>') {
773                         indent++;
774                         if (l[1] == ' ') {
775                                 l++;
776                         }
777                 } else {
778                         break;
779                 }
780                 l++;
781
782         }
783
784         /*      if we hit the signature marker "-- ", we return -(indent + 1). This
785          *      stops reformatting.
786          */
787         if (strcmp (l, MODEST_TEXT_UTILS_SIGNATURE_MARKER) == 0) {
788                 return -1 - indent;
789         } else {
790                 return indent;
791         }
792 }
793
794 static void
795 unquote_line (GString * l, const gchar *quote_symbol)
796 {
797         gchar *p;
798         gint quote_len;
799
800         p = l->str;
801         quote_len = strlen (quote_symbol);
802         while (p[0]) {
803                 if (g_str_has_prefix (p, quote_symbol)) {
804                         if (p[quote_len] == ' ') {
805                                 p += quote_len;
806                         }
807                 } else {
808                         break;
809                 }
810                 p++;
811         }
812         g_string_erase (l, 0, p - l->str);
813 }
814
815 static void
816 append_quoted (GString * buf, const gchar *quote_symbol,
817                int indent, const GString * str,
818                const int cutpoint)
819 {
820         int i;
821         gchar *quote_concat;
822
823         indent = indent < 0 ? abs (indent) - 1 : indent;
824         quote_concat = g_strconcat (quote_symbol, " ", NULL);
825         for (i = 0; i <= indent; i++) {
826                 g_string_append (buf, quote_concat);
827         }
828         g_free (quote_concat);
829         if (cutpoint > 0) {
830                 g_string_append_len (buf, str->str, cutpoint);
831         } else {
832                 g_string_append (buf, str->str);
833         }
834         g_string_append (buf, "\n");
835 }
836
837 static int
838 get_breakpoint_utf8 (const gchar * s, gint indent, const gint limit)
839 {
840         gint index = 0;
841         const gchar *pos, *last;
842         gunichar *uni;
843
844         indent = indent < 0 ? abs (indent) - 1 : indent;
845
846         last = NULL;
847         pos = s;
848         uni = g_utf8_to_ucs4_fast (s, -1, NULL);
849         while (pos[0]) {
850                 if ((index + 2 * indent > limit) && last) {
851                         g_free (uni);
852                         return last - s;
853                 }
854                 if (g_unichar_isspace (uni[index])) {
855                         last = pos;
856                 }
857                 pos = g_utf8_next_char (pos);
858                 index++;
859         }
860         g_free (uni);
861         return strlen (s);
862 }
863
864 static int
865 get_breakpoint_ascii (const gchar * s, const gint indent, const gint limit)
866 {
867         gint i, last;
868
869         last = strlen (s);
870         if (last + 2 * indent < limit)
871                 return last;
872
873         for (i = strlen (s); i > 0; i--) {
874                 if (s[i] == ' ') {
875                         if (i + 2 * indent <= limit) {
876                                 return i;
877                         } else {
878                                 last = i;
879                         }
880                 }
881         }
882         return last;
883 }
884
885 static int
886 get_breakpoint (const gchar * s, const gint indent, const gint limit)
887 {
888
889         if (g_utf8_validate (s, -1, NULL)) {
890                 return get_breakpoint_utf8 (s, indent, limit);
891         } else {                /* assume ASCII */
892                 //g_warning("invalid UTF-8 in msg");
893                 return get_breakpoint_ascii (s, indent, limit);
894         }
895 }
896
897 static gchar *
898 cite (const time_t sent_date, const gchar *from)
899 {
900         return g_strdup (_("mcen_ia_editor_original_message"));
901 }
902
903 static gchar *
904 quoted_attachments (GList *attachments)
905 {
906         GList *node = NULL;
907         GString *result = g_string_new ("");
908         for (node = attachments; node != NULL; node = g_list_next (node)) {
909                 gchar *filename = (gchar *) node->data;
910                 g_string_append_printf ( result, "%s %s\n", _("mcen_ia_editor_attach_filename"), filename);
911         }
912
913         return g_string_free (result, FALSE);
914
915 }
916
917 static GString *
918 modest_text_utils_quote_body (GString *output, const gchar *text,
919                               const gchar *quote_symbol,
920                               int limit)
921 {
922
923         const gchar *iter;
924         gsize len;
925         gint indent, breakpoint, rem_indent = 0;
926         GString *l, *remaining;
927
928         iter = text;
929         len = strlen(text);
930         remaining = g_string_new ("");
931         do {
932                 l = get_next_line (text, len, iter);
933                 iter = iter + l->len + 1;
934                 indent = get_indent_level (l->str);
935                 unquote_line (l, quote_symbol);
936
937                 if (remaining->len) {
938                         if (l->len && indent == rem_indent) {
939                                 g_string_prepend (l, " ");
940                                 g_string_prepend (l, remaining->str);
941                         } else {
942                                 do {
943                                         breakpoint =
944                                                 get_breakpoint (remaining->str,
945                                                                 rem_indent,
946                                                                 limit);
947                                         append_quoted (output, quote_symbol, rem_indent,
948                                                        remaining, breakpoint);
949                                         g_string_erase (remaining, 0,
950                                                         breakpoint);
951                                         if (remaining->str[0] == ' ') {
952                                                 g_string_erase (remaining, 0,
953                                                                 1);
954                                         }
955                                 } while (remaining->len);
956                         }
957                 }
958                 g_string_free (remaining, TRUE);
959                 breakpoint = get_breakpoint (l->str, indent, limit);
960                 remaining = g_string_new (l->str + breakpoint);
961                 if (remaining->str[0] == ' ') {
962                         g_string_erase (remaining, 0, 1);
963                 }
964                 rem_indent = indent;
965                 append_quoted (output, quote_symbol, indent, l, breakpoint);
966                 g_string_free (l, TRUE);
967         } while ((iter < text + len) || (remaining->str[0]));
968
969         return output;
970 }
971
972 static gchar *
973 modest_text_utils_quote_plain_text (const gchar *text, 
974                                     const gchar *cite, 
975                                     const gchar *signature,
976                                     GList *attachments,
977                                     int limit)
978 {
979         GString *q;
980         gchar *attachments_string = NULL;
981
982         q = g_string_new ("");
983
984         if (signature != NULL) {
985                 g_string_append_printf (q, "\n%s\n", MODEST_TEXT_UTILS_SIGNATURE_MARKER);
986                 q = g_string_append (q, signature);
987         }
988
989         q = g_string_append (q, "\n");
990         q = g_string_append (q, cite);
991         q = g_string_append_c (q, '\n');
992
993         q = modest_text_utils_quote_body (q, text, ">", limit);
994
995         attachments_string = quoted_attachments (attachments);
996         q = g_string_append (q, attachments_string);
997         g_free (attachments_string);
998
999         return g_string_free (q, FALSE);
1000 }
1001
1002 static void
1003 quote_html_add_to_gstring (GString *string,
1004                            const gchar *text)
1005 {
1006         if (text && strcmp (text, "")) {
1007                 gchar *html_text = modest_text_utils_convert_to_html_body (text, -1, TRUE);
1008                 g_string_append_printf (string, "%s<br/>", html_text);
1009                 g_free (html_text);
1010         }
1011 }
1012
1013 static gchar*
1014 modest_text_utils_quote_html (const gchar *text, 
1015                               const gchar *cite, 
1016                               const gchar *signature,
1017                               GList *attachments,
1018                               int limit)
1019 {
1020         GString *result_string;
1021
1022         result_string = 
1023                 g_string_new ( \
1024                               "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n" \
1025                               "<html>\n"                                \
1026                               "<body>\n<br/>\n");
1027
1028         if (text || cite || signature) {
1029                 GString *quoted_text;
1030                 g_string_append (result_string, "<pre>\n");
1031                 if (signature) {
1032                         quote_html_add_to_gstring (result_string, MODEST_TEXT_UTILS_SIGNATURE_MARKER);
1033                         quote_html_add_to_gstring (result_string, signature);
1034                 }
1035                 quote_html_add_to_gstring (result_string, cite);
1036                 quoted_text = g_string_new ("");
1037                 quoted_text = modest_text_utils_quote_body (quoted_text, (text) ? text : "", ">", limit);
1038                 quote_html_add_to_gstring (result_string, quoted_text->str);
1039                 g_string_free (quoted_text, TRUE);
1040                 if (attachments) {
1041                         gchar *attachments_string = quoted_attachments (attachments);
1042                         quote_html_add_to_gstring (result_string, attachments_string);
1043                         g_free (attachments_string);
1044                 }
1045                 g_string_append (result_string, "</pre>");
1046         }
1047         g_string_append (result_string, "</body>");
1048         g_string_append (result_string, "</html>");
1049
1050         return g_string_free (result_string, FALSE);
1051 }
1052
1053 static gint 
1054 cmp_offsets_reverse (const url_match_t *match1, const url_match_t *match2)
1055 {
1056         return match2->offset - match1->offset;
1057 }
1058
1059 static gint url_matches_block = 0;
1060 static url_match_pattern_t patterns[] = MAIL_VIEWER_URL_MATCH_PATTERNS;
1061 static GMutex *url_patterns_mutex = NULL;
1062
1063
1064 static gboolean
1065 compile_patterns ()
1066 {
1067         guint i;
1068         const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
1069         for (i = 0; i != pattern_num; ++i) {
1070                 patterns[i].preg = g_slice_new0 (regex_t);
1071                 
1072                 /* this should not happen */
1073                 if (regcomp (patterns[i].preg, patterns[i].regex,
1074                              REG_ICASE|REG_EXTENDED|REG_NEWLINE) != 0) {
1075                         g_warning ("%s: error in regexp:\n%s\n", __FUNCTION__, patterns[i].regex);
1076                         return FALSE;
1077                 }
1078         }
1079         return TRUE;
1080 }
1081
1082 static void 
1083 free_patterns ()
1084 {
1085         guint i;
1086         const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
1087         for (i = 0; i != pattern_num; ++i) {
1088                 regfree (patterns[i].preg);
1089                 g_slice_free  (regex_t, patterns[i].preg);
1090         } /* don't free patterns itself -- it's static */
1091 }
1092
1093 void
1094 modest_text_utils_hyperlinkify_begin (void)
1095 {
1096
1097         if (url_patterns_mutex == NULL) {
1098                 url_patterns_mutex = g_mutex_new ();
1099         }
1100         g_mutex_lock (url_patterns_mutex);
1101         if (url_matches_block == 0)
1102                 compile_patterns ();
1103         url_matches_block ++;
1104         g_mutex_unlock (url_patterns_mutex);
1105 }
1106
1107 void
1108 modest_text_utils_hyperlinkify_end (void)
1109 {
1110         g_mutex_lock (url_patterns_mutex);
1111         url_matches_block--;
1112         if (url_matches_block <= 0)
1113                 free_patterns ();
1114         g_mutex_unlock (url_patterns_mutex);
1115 }
1116
1117
1118 static GSList*
1119 get_url_matches (GString *txt, gint offset)
1120 {
1121         regmatch_t rm;
1122         guint rv, i, tmp_offset = 0;
1123         GSList *match_list = NULL;
1124
1125         const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
1126
1127         /* initalize the regexps */
1128         modest_text_utils_hyperlinkify_begin ();
1129
1130         /* find all the matches */
1131         for (i = 0; i != pattern_num; ++i) {
1132                 tmp_offset     = offset;        
1133                 while (1) {
1134                         url_match_t *match;
1135                         gboolean is_submatch;
1136                         GSList *cursor;
1137                         
1138                         if ((rv = regexec (patterns[i].preg, txt->str + tmp_offset, 1, &rm, 0)) != 0) {
1139                                 g_return_val_if_fail (rv == REG_NOMATCH, NULL); /* this should not happen */
1140                                 break; /* try next regexp */ 
1141                         }
1142                         if (rm.rm_so == -1)
1143                                 break;
1144                         
1145                         is_submatch = FALSE;
1146                         /* check  old matches to see if this has already been matched */
1147                         cursor = match_list;
1148                         while (cursor && !is_submatch) {
1149                                 const url_match_t *old_match =
1150                                         (const url_match_t *) cursor->data;
1151                                 guint new_offset = tmp_offset + rm.rm_so;
1152                                 is_submatch = (new_offset >  old_match->offset &&
1153                                                new_offset <  old_match->offset + old_match->len);
1154                                 cursor = g_slist_next (cursor);
1155                         }
1156
1157                         if (!is_submatch) {
1158                                 /* make a list of our matches (<offset, len, prefix> tupels)*/
1159                                 match = g_slice_new (url_match_t);
1160                                 match->offset = tmp_offset + rm.rm_so;
1161                                 match->len    = rm.rm_eo - rm.rm_so;
1162                                 match->prefix = patterns[i].prefix;
1163                                 match_list = g_slist_prepend (match_list, match);
1164                         }               
1165                         tmp_offset += rm.rm_eo;
1166                 }
1167         }
1168
1169         modest_text_utils_hyperlinkify_end ();
1170         
1171         /* now sort the list, so the matches are in reverse order of occurence.
1172          * that way, we can do the replacements starting from the end, so we don't need
1173          * to recalculate the offsets
1174          */
1175         match_list = g_slist_sort (match_list,
1176                                    (GCompareFunc)cmp_offsets_reverse); 
1177         return match_list;      
1178 }
1179
1180
1181
1182 /* replace all occurences of needle in haystack with repl*/
1183 static gchar*
1184 replace_string (const gchar *haystack, const gchar *needle, gchar repl)
1185 {
1186         gchar *str, *cursor;
1187
1188         if (!haystack || !needle || strlen(needle) == 0)
1189                 return haystack ? g_strdup(haystack) : NULL;
1190         
1191         str = g_strdup (haystack);
1192
1193         for (cursor = str; cursor && *cursor; ++cursor) {
1194                 if (g_str_has_prefix (cursor, needle)) {
1195                         cursor[0] = repl;
1196                         memmove (cursor + 1,
1197                                  cursor + strlen (needle),
1198                                  strlen (cursor + strlen (needle)) + 1);
1199                 }
1200         }
1201         
1202         return str;
1203 }
1204
1205 static void
1206 hyperlinkify_plain_text (GString *txt, gint offset)
1207 {
1208         GSList *cursor;
1209         GSList *match_list = get_url_matches (txt, offset);
1210
1211         /* we will work backwards, so the offsets stay valid */
1212         for (cursor = match_list; cursor; cursor = cursor->next) {
1213
1214                 url_match_t *match = (url_match_t*) cursor->data;
1215                 gchar *url  = g_strndup (txt->str + match->offset, match->len);
1216                 gchar *repl = NULL; /* replacement  */
1217
1218                 /* the string still contains $(MARK_AMP_URI_STR)"amp;" for each
1219                  * '&' in the original, because of the text->html conversion.
1220                  * in the href-URL (and only there), we must convert that back to
1221                  * '&'
1222                  */
1223                 gchar *href_url = replace_string (url, MARK_AMP_URI_STR "amp;", '&');
1224                 
1225                 /* the prefix is NULL: use the one that is already there */
1226                 repl = g_strdup_printf ("<a href=\"%s%s\">%s</a>",
1227                                         match->prefix ? match->prefix : EMPTY_STRING, 
1228                                         href_url, url);
1229
1230                 /* replace the old thing with our hyperlink
1231                  * replacement thing */
1232                 g_string_erase  (txt, match->offset, match->len);
1233                 g_string_insert (txt, match->offset, repl);
1234                 
1235                 g_free (url);
1236                 g_free (repl);
1237                 g_free (href_url);
1238
1239                 g_slice_free (url_match_t, match);      
1240         }
1241         
1242         g_slist_free (match_list);
1243 }
1244
1245 void
1246 modest_text_utils_hyperlinkify (GString *string_buffer)
1247 {
1248         gchar *after_body;
1249         gint offset = 0;
1250
1251         after_body = strstr (string_buffer->str, "<body>");
1252         if (after_body != NULL)
1253                 offset = after_body - string_buffer->str;
1254         hyperlinkify_plain_text (string_buffer, offset);
1255 }
1256
1257
1258 /* for optimization reasons, we change the string in-place */
1259 void
1260 modest_text_utils_get_display_address (gchar *address)
1261 {
1262         int i;
1263
1264         g_return_if_fail (address);
1265         
1266         if (!address)
1267                 return;
1268         
1269         /* should not be needed, and otherwise, we probably won't screw up the address
1270          * more than it already is :) 
1271          * g_return_val_if_fail (g_utf8_validate (address, -1, NULL), NULL);
1272          * */
1273         
1274         /* remove leading whitespace */
1275         if (address[0] == ' ')
1276                 g_strchug (address);
1277                 
1278         for (i = 0; address[i]; ++i) {
1279                 if (address[i] == '<') {
1280                         if (G_UNLIKELY(i == 0)) {
1281                                 break; /* there's nothing else, leave it */
1282                         }else {
1283                                 address[i] = '\0'; /* terminate the string here */
1284                                 break;
1285                         }
1286                 }
1287         }
1288
1289         g_strchomp (address);
1290 }
1291
1292
1293 gchar *
1294 modest_text_utils_get_display_addresses (const gchar *recipients)
1295 {
1296         gchar *addresses;
1297         GSList *recipient_list;
1298
1299         addresses = NULL;
1300         recipient_list = modest_text_utils_split_addresses_list (recipients);
1301         if (recipient_list) {
1302                 GString *add_string = g_string_sized_new (strlen (recipients));
1303                 GSList *iter = recipient_list;
1304                 gboolean first = TRUE;
1305
1306                 while (iter) {
1307                         /* Strings are changed in place */
1308                         modest_text_utils_get_display_address ((gchar *) iter->data);
1309                         if (G_UNLIKELY (first)) {
1310                                 g_string_append_printf (add_string, "%s", (gchar *) iter->data);
1311                                 first = FALSE;
1312                         } else {
1313                                 g_string_append_printf (add_string, ", %s", (gchar *) iter->data);
1314                         }
1315                         iter = g_slist_next (iter);
1316                 }
1317                 g_slist_foreach (recipient_list, (GFunc) g_free, NULL);
1318                 g_slist_free (recipient_list);
1319                 addresses = g_string_free (add_string, FALSE);
1320         }
1321
1322         return addresses;
1323 }
1324
1325
1326 gchar *
1327 modest_text_utils_get_email_address (const gchar *full_address)
1328 {
1329         const gchar *left, *right;
1330
1331         g_return_val_if_fail (full_address, NULL);
1332         
1333         if (!full_address)
1334                 return NULL;
1335         
1336         g_return_val_if_fail (g_utf8_validate (full_address, -1, NULL), NULL);
1337         
1338         left = g_strrstr_len (full_address, strlen(full_address), "<");
1339         if (left == NULL)
1340                 return g_strdup (full_address);
1341
1342         right = g_strstr_len (left, strlen(left), ">");
1343         if (right == NULL)
1344                 return g_strdup (full_address);
1345
1346         return g_strndup (left + 1, right - left - 1);
1347 }
1348
1349 gint 
1350 modest_text_utils_get_subject_prefix_len (const gchar *sub)
1351 {
1352         gint prefix_len = 0;    
1353
1354         g_return_val_if_fail (sub, 0);
1355
1356         if (!sub)
1357                 return 0;
1358         
1359         /* optimization: "Re", "RE", "re","Fwd", "FWD", "fwd","FW","Fw", "fw" */
1360         if (sub[0] != 'R' && sub[0] != 'F' && sub[0] != 'r' && sub[0] != 'f')
1361                 return 0;
1362         else if (sub[0] && sub[1] != 'e' && sub[1] != 'E' && sub[1] != 'w' && sub[1] != 'W')
1363                 return 0;
1364
1365         prefix_len = 2;
1366         if (sub[2] == 'd')
1367                 ++prefix_len;
1368
1369         /* skip over a [...] block */
1370         if (sub[prefix_len] == '[') {
1371                 int c = prefix_len + 1;
1372                 while (sub[c] && sub[c] != ']')
1373                         ++c;
1374                 if (!sub[c])
1375                         return 0; /* no end to the ']' found */
1376                 else
1377                         prefix_len = c + 1;
1378         }
1379
1380         /* did we find the ':' ? */
1381         if (sub[prefix_len] == ':') {
1382                 ++prefix_len;
1383                 if (sub[prefix_len] == ' ')
1384                         ++prefix_len;
1385                 prefix_len += modest_text_utils_get_subject_prefix_len (sub + prefix_len);
1386 /*              g_warning ("['%s','%s']", sub, (char*) sub + prefix_len); */
1387                 return prefix_len;
1388         } else
1389                 return 0;
1390 }
1391
1392
1393 gint
1394 modest_text_utils_utf8_strcmp (const gchar* s1, const gchar *s2, gboolean insensitive)
1395 {
1396
1397 /* work even when s1 and/or s2 == NULL */
1398         if (G_UNLIKELY(s1 == s2))
1399                 return 0;
1400         if (G_UNLIKELY(!s1))
1401                 return -1;
1402         if (G_UNLIKELY(!s2))
1403                 return 1;
1404         
1405         /* if it's not case sensitive */
1406         if (!insensitive) {
1407
1408                 /* optimization: shortcut if first char is ascii */ 
1409                 if (((s1[0] & 0x80)== 0) && ((s2[0] & 0x80) == 0) &&
1410                     (s1[0] != s2[0])) 
1411                         return s1[0] - s2[0];
1412                 
1413                 return g_utf8_collate (s1, s2);
1414
1415         } else {
1416                 gint result;
1417                 gchar *n1, *n2;
1418
1419                 /* optimization: shortcut if first char is ascii */ 
1420                 if (((s1[0] & 0x80) == 0) && ((s2[0] & 0x80) == 0) &&
1421                     (tolower(s1[0]) != tolower (s2[0]))) 
1422                         return tolower(s1[0]) - tolower(s2[0]);
1423                 
1424                 n1 = g_utf8_strdown (s1, -1);
1425                 n2 = g_utf8_strdown (s2, -1);
1426                 
1427                 result = g_utf8_collate (n1, n2);
1428                 
1429                 g_free (n1);
1430                 g_free (n2);
1431         
1432                 return result;
1433         }
1434 }
1435
1436
1437 const gchar*
1438 modest_text_utils_get_display_date (time_t date)
1439 {
1440 #define DATE_BUF_SIZE 64 
1441         static gchar date_buf[DATE_BUF_SIZE];
1442         
1443         /* calculate the # of days since epoch for 
1444          * for today and for the date provided 
1445          * based on idea from pvanhoof */
1446         int day      = time(NULL) / (24 * 60 * 60);
1447         int date_day = date       / (24 * 60 * 60);
1448
1449         /* if it's today, show the time, if it's not today, show the date instead */
1450
1451         /* TODO: take into account the system config for 24/12h */
1452 #ifdef MODEST_TOOLKIT_HILDON2
1453         if (day == date_day) /* is the date today? */
1454                 modest_text_utils_strftime (date_buf, DATE_BUF_SIZE, _HL("wdgt_va_24h_time"), date);
1455         else 
1456                 modest_text_utils_strftime (date_buf, DATE_BUF_SIZE, _HL("wdgt_va_date"), date); 
1457 #else
1458         if (day == date_day) /* is the date today? */
1459                 modest_text_utils_strftime (date_buf, DATE_BUF_SIZE, "%X", date);
1460         else 
1461                 modest_text_utils_strftime (date_buf, DATE_BUF_SIZE, "%x", date); 
1462 #endif
1463
1464         return date_buf; /* this is a static buffer, don't free! */
1465 }
1466
1467
1468
1469 gboolean
1470 modest_text_utils_validate_folder_name (const gchar *folder_name)
1471 {
1472         /* based on http://msdn2.microsoft.com/en-us/library/aa365247.aspx,
1473          * with some extras */
1474         
1475         guint len;
1476         gint i;
1477         const gchar **cursor = NULL;
1478         const gchar *forbidden_names[] = { /* windows does not like these */
1479                 "CON", "PRN", "AUX", "NUL", ".", "..", "cur", "tmp", "new", 
1480                 NULL /* cur, tmp, new are reserved for Maildir */
1481         };
1482         
1483         /* cannot be NULL */
1484         if (!folder_name) 
1485                 return FALSE;
1486
1487         /* cannot be empty */
1488         len = strlen(folder_name);
1489         if (len == 0)
1490                 return FALSE;
1491         
1492         /* cannot start with a dot, vfat does not seem to like that */
1493         if (folder_name[0] == '.')
1494                 return FALSE;
1495
1496         /* cannot start or end with a space */
1497         if (g_ascii_isspace(folder_name[0]) || g_ascii_isspace(folder_name[len - 1]))
1498                 return FALSE; 
1499
1500         /* cannot contain a forbidden char */   
1501         for (i = 0; i < len; i++)
1502                 if (modest_text_utils_is_forbidden_char (folder_name[i], FOLDER_NAME_FORBIDDEN_CHARS))
1503                         return FALSE;
1504
1505         /* Cannot contain Windows port numbers. I'd like to use GRegex
1506            but it's still not available in Maemo. sergio */
1507         if (!g_ascii_strncasecmp (folder_name, "LPT", 3) ||
1508             !g_ascii_strncasecmp (folder_name, "COM", 3)) {
1509                 glong val;
1510                 gchar *endptr;
1511
1512                 /* We skip the first 3 characters for the
1513                    comparison */
1514                 val = strtol(folder_name+3, &endptr, 10);
1515
1516                 /* If the conversion to long succeeded then the string
1517                    is not valid for us */
1518                 if (*endptr == '\0')
1519                         return FALSE;
1520                 else
1521                         return TRUE;
1522         }
1523         
1524         /* cannot contain a forbidden word */
1525         if (len <= 4) {
1526                 for (cursor = forbidden_names; cursor && *cursor; ++cursor) {
1527                         if (g_ascii_strcasecmp (folder_name, *cursor) == 0)
1528                                 return FALSE;
1529                 }
1530         }
1531
1532         return TRUE; /* it's valid! */
1533 }
1534
1535
1536
1537 gboolean
1538 modest_text_utils_validate_domain_name (const gchar *domain)
1539 {
1540         gboolean valid = FALSE;
1541         regex_t rx;
1542         const gchar* domain_regex = "^([a-z0-9-]*[a-z0-9]\\.)+[a-z0-9-]*[a-z0-9]$";
1543
1544         g_return_val_if_fail (domain, FALSE);
1545         
1546         if (!domain)
1547                 return FALSE;
1548         
1549         memset (&rx, 0, sizeof(regex_t)); /* coverity wants this... */
1550                 
1551         /* domain name: all alphanum or '-' or '.',
1552          * but beginning/ending in alphanum */  
1553         if (regcomp (&rx, domain_regex, REG_ICASE|REG_EXTENDED|REG_NOSUB)) {
1554                 g_warning ("BUG: error in regexp");
1555                 return FALSE;
1556         }
1557         
1558         valid = (regexec (&rx, domain, 1, NULL, 0) == 0);
1559         regfree (&rx);
1560                 
1561         return valid;
1562 }
1563
1564
1565
1566 gboolean
1567 modest_text_utils_validate_email_address (const gchar *email_address,
1568                                           const gchar **invalid_char_position)
1569 {
1570         int count = 0;
1571         const gchar *c = NULL, *domain = NULL;
1572         static gchar *rfc822_specials = "()<>@,;:\\\"[]&";
1573         
1574         if (invalid_char_position)
1575                 *invalid_char_position = NULL;
1576         
1577         g_return_val_if_fail (email_address, FALSE);
1578         
1579         /* check that the email adress contains exactly one @ */
1580         if (!strstr(email_address, "@") || 
1581                         (strstr(email_address, "@") != g_strrstr(email_address, "@"))) 
1582                 return FALSE;
1583         
1584         /* first we validate the name portion (name@domain) */
1585         for (c = email_address;  *c;  c++) {
1586                 if (*c == '\"' && 
1587                     (c == email_address || 
1588                      *(c - 1) == '.' || 
1589                      *(c - 1) == '\"')) {
1590                         while (*++c) {
1591                                 if (*c == '\"') 
1592                                         break;
1593                                 if (*c == '\\' && (*++c == ' ')) 
1594                                         continue;
1595                                 if (*c <= ' ' || *c >= 127) 
1596                                         return FALSE;
1597                         }
1598                         if (!*c++) 
1599                                 return FALSE;
1600                         if (*c == '@') 
1601                                 break;
1602                         if (*c != '.') 
1603                                 return FALSE;
1604                         continue;
1605                 }
1606                 if (*c == '@') 
1607                         break;
1608                 if (*c <= ' ' || *c >= 127) 
1609                         return FALSE;
1610                 if (strchr(rfc822_specials, *c)) {
1611                         if (invalid_char_position)
1612                                 *invalid_char_position = c;
1613                         return FALSE;
1614                 }
1615         }
1616         if (c == email_address || *(c - 1) == '.') 
1617                 return FALSE;
1618
1619         /* next we validate the domain portion (name@domain) */
1620         if (!*(domain = ++c)) 
1621                 return FALSE;
1622         do {
1623                 if (*c == '.') {
1624                         if (c == domain || *(c - 1) == '.' || *(c + 1) == '\0') 
1625                                 return FALSE;
1626                         count++;
1627                 }
1628                 if (*c <= ' ' || *c >= 127) 
1629                         return FALSE;
1630                 if (strchr(rfc822_specials, *c)) {
1631                         if (invalid_char_position)
1632                                 *invalid_char_position = c;
1633                         return FALSE;
1634                 }
1635         } while (*++c);
1636
1637         return (count >= 1) ? TRUE : FALSE;
1638 }
1639
1640 gboolean 
1641 modest_text_utils_validate_recipient (const gchar *recipient, const gchar **invalid_char_position)
1642 {
1643         gchar *stripped, *current;
1644         gchar *right_part;
1645         gboolean has_error = FALSE;
1646
1647         if (invalid_char_position)
1648                 *invalid_char_position = NULL;
1649         
1650         g_return_val_if_fail (recipient, FALSE);
1651         
1652         if (modest_text_utils_validate_email_address (recipient, invalid_char_position))
1653                 return TRUE;
1654
1655         stripped = g_strdup (recipient);
1656         stripped = g_strstrip (stripped);
1657         current = stripped;
1658
1659         if (*current == '\0') {
1660                 g_free (stripped);
1661                 return FALSE;
1662         }
1663
1664         /* quoted string */
1665         if (*current == '\"') {
1666                 current = g_utf8_next_char (current);
1667                 has_error = TRUE;
1668                 for (; *current != '\0'; current = g_utf8_next_char (current)) {
1669                         if (*current == '\\') {
1670                                 /* TODO: This causes a warning, which breaks the build, 
1671                                  * because a gchar cannot be < 0.
1672                                  * murrayc. 
1673                                 if (current[1] <0) {
1674                                         has_error = TRUE;
1675                                         break;
1676                                 }
1677                                 */
1678                         } else if (*current == '\"') {
1679                                 has_error = FALSE;
1680                                 current = g_utf8_next_char (current);
1681                                 break;
1682                         }
1683                 }
1684         } else {
1685                 has_error = TRUE;
1686                 for (current = stripped ; *current != '\0'; current = g_utf8_next_char (current)) {
1687                         if (*current == '<') {
1688                                 has_error = FALSE;
1689                                 break;
1690                         }
1691                 }
1692         }
1693                 
1694         if (has_error) {
1695                 g_free (stripped);
1696                 return FALSE;
1697         }
1698
1699         right_part = g_strdup (current);
1700         g_free (stripped);
1701         right_part = g_strstrip (right_part);
1702
1703         if (g_str_has_prefix (right_part, "<") &&
1704             g_str_has_suffix (right_part, ">")) {
1705                 gchar *address;
1706                 gboolean valid;
1707
1708                 address = g_strndup (right_part+1, strlen (right_part) - 2);
1709                 g_free (right_part);
1710                 valid = modest_text_utils_validate_email_address (address, invalid_char_position);
1711                 g_free (address);
1712                 return valid;
1713         } else {
1714                 g_free (right_part);
1715                 return FALSE;
1716         }
1717 }
1718
1719
1720 gchar *
1721 modest_text_utils_get_display_size (guint64 size)
1722 {
1723         const guint KB=1024;
1724         const guint MB=1024 * KB;
1725         const guint GB=1024 * MB;
1726
1727         if (size == 0)
1728                 return g_strdup_printf (_FM("sfil_li_size_kb"), (int) 0);
1729         if (0 <= size && size < KB)
1730                 return g_strdup_printf (_FM("sfil_li_size_1kb_99kb"), (int) 1);
1731         else if (KB <= size && size < 100 * KB)
1732                 return g_strdup_printf (_FM("sfil_li_size_1kb_99kb"), (int) size / KB);
1733         else if (100*KB <= size && size < MB)
1734                 return g_strdup_printf (_FM("sfil_li_size_100kb_1mb"), (int) size / KB);
1735         else if (MB <= size && size < 10*MB)
1736                 return g_strdup_printf (_FM("sfil_li_size_1mb_10mb"), (float) size / MB);
1737         else if (10*MB <= size && size < GB)
1738                 return g_strdup_printf (_FM("sfil_li_size_10mb_1gb"), (float) size / MB);
1739         else
1740                 return g_strdup_printf (_FM("sfil_li_size_1gb_or_greater"), (float) size / GB);
1741 }
1742
1743 static gchar *
1744 get_email_from_address (const gchar * address)
1745 {
1746         gchar *left_limit, *right_limit;
1747
1748         left_limit = strstr (address, "<");
1749         right_limit = g_strrstr (address, ">");
1750
1751         if ((left_limit == NULL)||(right_limit == NULL)|| (left_limit > right_limit))
1752                 return g_strdup (address);
1753         else
1754                 return g_strndup (left_limit + 1, (right_limit - left_limit) - 1);
1755 }
1756
1757 gchar *
1758 modest_text_utils_get_color_string (GdkColor *color)
1759 {
1760         g_return_val_if_fail (color, NULL);
1761
1762         return g_strdup_printf ("#%x%x%x%x%x%x%x%x%x%x%x%x",
1763                                 (color->red >> 12)   & 0xf, (color->red >> 8)   & 0xf,
1764                                 (color->red >>  4)   & 0xf, (color->red)        & 0xf,
1765                                 (color->green >> 12) & 0xf, (color->green >> 8) & 0xf,
1766                                 (color->green >>  4) & 0xf, (color->green)      & 0xf,
1767                                 (color->blue >> 12)  & 0xf, (color->blue >> 8)  & 0xf,
1768                                 (color->blue >>  4)  & 0xf, (color->blue)       & 0xf);
1769 }
1770
1771 gchar *
1772 modest_text_utils_text_buffer_get_text (GtkTextBuffer *buffer)
1773 {
1774         GtkTextIter start, end;
1775         gchar *slice, *current;
1776         GString *result = g_string_new ("");
1777
1778         g_return_val_if_fail (buffer && GTK_IS_TEXT_BUFFER (buffer), NULL);
1779         
1780         gtk_text_buffer_get_start_iter (buffer, &start);
1781         gtk_text_buffer_get_end_iter (buffer, &end);
1782
1783         slice = gtk_text_buffer_get_slice (buffer, &start, &end, FALSE);
1784         current = slice;
1785
1786         while (current && current != '\0') {
1787                 if (g_utf8_get_char (current) == 0xFFFC) {
1788                         result = g_string_append_c (result, ' ');
1789                         current = g_utf8_next_char (current);
1790                 } else {
1791                         gchar *next = g_utf8_strchr (current, -1, 0xFFFC);
1792                         if (next == NULL) {
1793                                 result = g_string_append (result, current);
1794                         } else {
1795                                 result = g_string_append_len (result, current, next - current);
1796                         }
1797                         current = next;
1798                 }
1799         }
1800         g_free (slice);
1801
1802         return g_string_free (result, FALSE);
1803         
1804 }
1805
1806 gboolean
1807 modest_text_utils_is_forbidden_char (const gchar character,
1808                                      ModestTextUtilsForbiddenCharType type)
1809 {
1810         gint i, len;
1811         const gchar *forbidden_chars = NULL;
1812         
1813         /* We need to get the length in the switch because the
1814            compiler needs to know the size at compile time */
1815         switch (type) {
1816         case ACCOUNT_TITLE_FORBIDDEN_CHARS:
1817                 forbidden_chars = account_title_forbidden_chars;
1818                 len = G_N_ELEMENTS (account_title_forbidden_chars);
1819                 break;
1820         case FOLDER_NAME_FORBIDDEN_CHARS:
1821                 forbidden_chars = folder_name_forbidden_chars;
1822                 len = G_N_ELEMENTS (folder_name_forbidden_chars);
1823                 break;
1824         case USER_NAME_FORBIDDEN_NAMES:
1825                 forbidden_chars = user_name_forbidden_chars;
1826                 len = G_N_ELEMENTS (user_name_forbidden_chars);
1827                 break;
1828         default:
1829                 g_return_val_if_reached (TRUE);
1830         }
1831
1832         for (i = 0; i < len ; i++)
1833                 if (forbidden_chars[i] == character)
1834                         return TRUE;
1835
1836         return FALSE; /* it's valid! */
1837 }
1838
1839 gchar *      
1840 modest_text_utils_label_get_selection (GtkLabel *label)
1841 {
1842         gint start, end;
1843         gchar *selection;
1844
1845         if (gtk_label_get_selection_bounds (GTK_LABEL (label), &start, &end)) {
1846                 const gchar *start_offset;
1847                 const gchar *end_offset;
1848                 start_offset = gtk_label_get_text (GTK_LABEL (label));
1849                 start_offset = g_utf8_offset_to_pointer (start_offset, start);
1850                 end_offset = gtk_label_get_text (GTK_LABEL (label));
1851                 end_offset = g_utf8_offset_to_pointer (end_offset, end);
1852                 selection = g_strndup (start_offset, end_offset - start_offset);
1853                 return selection;
1854         } else {
1855                 return g_strdup ("");
1856         }
1857 }
1858
1859 static gboolean
1860 _forward_search_image_char (gunichar ch,
1861                             gpointer userdata)
1862 {
1863         return (ch == 0xFFFC);
1864 }
1865
1866 gboolean
1867 modest_text_utils_buffer_selection_is_valid (GtkTextBuffer *buffer)
1868 {
1869         gboolean result;
1870         GtkTextIter start, end;
1871
1872         g_return_val_if_fail (GTK_IS_TEXT_BUFFER (buffer), FALSE);
1873
1874         result = gtk_text_buffer_get_has_selection (GTK_TEXT_BUFFER (buffer));
1875
1876         /* check there are no images in selection */
1877         if (result) {
1878                 gtk_text_buffer_get_selection_bounds (buffer, &start, &end);
1879                 if (gtk_text_iter_get_char (&start)== 0xFFFC)
1880                         result = FALSE;
1881                 else {
1882                         gtk_text_iter_backward_char (&end);
1883                         if (gtk_text_iter_forward_find_char (&start, _forward_search_image_char,
1884                                                              NULL, &end))
1885                                 result = FALSE;
1886                 }
1887                                     
1888         }
1889
1890         return result;
1891 }
1892
1893 static void
1894 remove_quotes (gchar **quotes)
1895 {
1896         if (g_str_has_prefix (*quotes, "\"") && g_str_has_suffix (*quotes, "\"")) {
1897                 gchar *result;
1898                 result = g_strndup ((*quotes)+1, strlen (*quotes) - 2);
1899                 g_free (*quotes);
1900                 *quotes = result;
1901         }
1902 }
1903
1904 gchar *
1905 modest_text_utils_escape_mnemonics (const gchar *text)
1906 {
1907         const gchar *p;
1908         GString *result = NULL;
1909
1910         if (text == NULL)
1911                 return NULL;
1912
1913         result = g_string_new ("");
1914         for (p = text; *p != '\0'; p++) {
1915                 if (*p == '_')
1916                         result = g_string_append (result, "__");
1917                 else
1918                         result = g_string_append_c (result, *p);
1919         }
1920         
1921         return g_string_free (result, FALSE);
1922 }
1923
1924 gchar *
1925 modest_text_utils_simplify_recipients (const gchar *recipients)
1926 {
1927         GSList *addresses, *node;
1928         GString *result;
1929         gboolean is_first = TRUE;
1930
1931         if (recipients == NULL)
1932                 return g_strdup ("");
1933
1934         addresses = modest_text_utils_split_addresses_list (recipients);
1935         result = g_string_new ("");
1936
1937         for (node = addresses; node != NULL; node = g_slist_next (node)) {
1938                 const gchar *address = (const gchar *) node->data;
1939                 gchar *left_limit, *right_limit;
1940
1941                 left_limit = strstr (address, "<");
1942                 right_limit = g_strrstr (address, ">");
1943
1944                 if (is_first)
1945                         is_first = FALSE;
1946                 else
1947                         result = g_string_append (result, ", ");
1948
1949                 if ((left_limit == NULL)||(right_limit == NULL)|| (left_limit > right_limit)) {
1950                         result = g_string_append (result, address);
1951                 } else {
1952                         gchar *name_side;
1953                         gchar *email_side;
1954                         name_side = g_strndup (address, left_limit - address);
1955                         name_side = g_strstrip (name_side);
1956                         remove_quotes (&name_side);
1957                         email_side = get_email_from_address (address);
1958                         if (name_side && email_side && !strcmp (name_side, email_side)) {
1959                                 result = g_string_append (result, email_side);
1960                         } else {
1961                                 result = g_string_append (result, address);
1962                         }
1963                         g_free (name_side);
1964                         g_free (email_side);
1965                 }
1966
1967         }
1968         g_slist_foreach (addresses, (GFunc)g_free, NULL);
1969         g_slist_free (addresses);
1970
1971         return g_string_free (result, FALSE);
1972
1973 }
1974
1975 GSList *
1976 modest_text_utils_remove_duplicate_addresses_list (GSList *address_list)
1977 {
1978         GSList *new_list, *iter;
1979         GHashTable *table;
1980
1981         g_return_val_if_fail (address_list, NULL);
1982
1983         table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
1984
1985         new_list = address_list;
1986         iter = address_list;
1987         while (iter) {
1988                 const gchar* address = (const gchar*)iter->data;
1989
1990                 /* We need only the email to just compare it and not
1991                    the full address which would make "a <a@a.com>"
1992                    different from "a@a.com" */
1993                 const gchar *email = get_email_from_address (address);
1994
1995                 /* ignore the address if already seen */
1996                 if (g_hash_table_lookup (table, email) == 0) {
1997                         g_hash_table_insert (table, (gchar*)email, GINT_TO_POINTER(1));
1998                         iter = g_slist_next (iter);
1999                 } else {
2000                         GSList *tmp = g_slist_next (iter);
2001                         new_list = g_slist_delete_link (new_list, iter);
2002                         iter = tmp;
2003                 }
2004         }
2005
2006         g_hash_table_unref (table);
2007
2008         return new_list;
2009 }
2010
2011 gchar *
2012 modest_text_utils_get_secure_header (const gchar *value,
2013                                      const gchar *header)
2014 {
2015         const gint max_len = 128;
2016         gchar *new_value = NULL;
2017         gchar *needle = g_strrstr (value, header);
2018
2019         if (needle && value != needle)
2020                 new_value = g_strdup (needle + strlen (header));
2021
2022         if (!new_value)
2023                 new_value = g_strdup (value);
2024
2025         /* Do a max length check to prevent DoS attacks caused by huge
2026            malformed headers */
2027         if (g_utf8_validate (new_value, -1, NULL)) {
2028                 if (g_utf8_strlen (new_value, -1) > max_len) {
2029                         gchar *tmp = g_malloc0 (max_len * 4);
2030                         g_utf8_strncpy (tmp, (const gchar *) new_value, max_len);
2031                         g_free (new_value);
2032                         new_value = tmp;
2033                 }
2034         } else {
2035                 if (strlen (new_value) > max_len) {
2036                         gchar *tmp = g_malloc0 (max_len);
2037                         strncpy (new_value, tmp, max_len);
2038                         g_free (new_value);
2039                         new_value = tmp;
2040                 }
2041         }
2042
2043         return new_value;
2044 }