* src/modest-text-utils.[ch]:
[modest] / src / modest-text-utils.c
1 /* Copyright (c) 2006, Nokia Corporation
2  * All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are
6  * met:
7  *
8  * * Redistributions of source code must retain the above copyright
9  *   notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  *   notice, this list of conditions and the following disclaimer in the
12  *   documentation and/or other materials provided with the distribution.
13  * * Neither the name of the Nokia Corporation nor the names of its
14  *   contributors may be used to endorse or promote products derived from
15  *   this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
18  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
20  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
21  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30
31 #include <glib.h>
32 #include <string.h>
33 #include <stdlib.h>
34 #include <glib/gi18n.h>
35 #include <regex.h>
36 #include <modest-tny-platform-factory.h>
37 #include <modest-text-utils.h>
38 #include <modest-runtime.h>
39
40
41 #ifdef HAVE_CONFIG_H
42 #include <config.h>
43 #endif /*HAVE_CONFIG_H */
44
45 /* defines */
46 #define FORWARD_STRING _("-----Forwarded Message-----")
47 #define FROM_STRING _("From:")
48 #define SENT_STRING _("Sent:")
49 #define TO_STRING _("To:")
50 #define SUBJECT_STRING _("Subject:")
51 #define EMPTY_STRING ""
52
53 /*
54  * do the hyperlinkification only for texts < 50 Kb,
55  * as it's quite slow. Without this, e.g. mail with
56  * an uuencoded part (which is not recognized as attachment,
57  * will hang modest
58  */
59 #define HYPERLINKIFY_MAX_LENGTH (1024*50)
60
61 /*
62  * we need these regexps to find URLs in plain text e-mails
63  */
64 typedef struct _url_match_pattern_t url_match_pattern_t;
65 struct _url_match_pattern_t {
66         gchar   *regex;
67         regex_t *preg;
68         gchar   *prefix;
69 };
70
71 typedef struct _url_match_t url_match_t;
72 struct _url_match_t {
73         guint offset;
74         guint len;
75         const gchar* prefix;
76 };
77
78 #define MAIL_VIEWER_URL_MATCH_PATTERNS  {                               \
79         { "(file|rtsp|http|ftp|https)://[-A-Za-z0-9_$.+!*(),;:@%&=?/~#]+[-A-Za-z0-9_$%&=?/~#]",\
80           NULL, NULL },\
81         { "www\\.[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-A-Za-z0-9_$.+!*(),;:@%&=?/~#]*[^]}\\),?!;:\"]?)?",\
82           NULL, "http://" },\
83         { "ftp\\.[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-A-Za-z0-9_$.+!*(),;:@%&=?/~#]*[^]}\\),?!;:\"]?)?",\
84           NULL, "ftp://" },\
85         { "(voipto|callto|chatto|jabberto|xmpp):[-_a-z@0-9.\\+]+", \
86            NULL, NULL},                                             \
87         { "mailto:[-_a-z0-9.\\+]+@[-_a-z0-9.]+",                    \
88           NULL, NULL},\
89         { "[-_a-z0-9.\\+]+@[-_a-z0-9.]+",\
90           NULL, "mailto:"}\
91         }
92
93 /* private */
94 static gchar*   cite                    (const time_t sent_date, const gchar *from);
95 static void     hyperlinkify_plain_text (GString *txt);
96 static gint     cmp_offsets_reverse     (const url_match_t *match1, const url_match_t *match2);
97 static GSList*  get_url_matches         (GString *txt);
98
99 static GString* get_next_line           (const char *b, const gsize blen, const gchar * iter);
100 static int      get_indent_level        (const char *l);
101 static void     unquote_line            (GString * l);
102 static void     append_quoted           (GString * buf, const int indent, const GString * str, 
103                                          const int cutpoint);
104 static int      get_breakpoint_utf8     (const gchar * s, const gint indent, const gint limit);
105 static int      get_breakpoint_ascii    (const gchar * s, const gint indent, const gint limit);
106 static int      get_breakpoint          (const gchar * s, const gint indent, const gint limit);
107
108 static gchar*   modest_text_utils_quote_plain_text (const gchar *text, 
109                                                     const gchar *cite, 
110                                                     const gchar *signature,
111                                                     GList *attachments, 
112                                                     int limit);
113
114 static gchar*   modest_text_utils_quote_html       (const gchar *text, 
115                                                     const gchar *cite,
116                                                     const gchar *signature,
117                                                     GList *attachments,
118                                                     int limit);
119 static gchar*   get_email_from_address (const gchar *address);
120
121
122 /* ******************************************************************* */
123 /* ************************* PUBLIC FUNCTIONS ************************ */
124 /* ******************************************************************* */
125
126 gchar *
127 modest_text_utils_quote (const gchar *text, 
128                          const gchar *content_type,
129                          const gchar *signature,
130                          const gchar *from,
131                          const time_t sent_date, 
132                          GList *attachments,
133                          int limit)
134 {
135         gchar *retval, *cited;
136
137         g_return_val_if_fail (text, NULL);
138         g_return_val_if_fail (content_type, NULL);
139
140         cited = cite (sent_date, from);
141         
142         if (content_type && strcmp (content_type, "text/html") == 0)
143                 /* TODO: extract the <body> of the HTML and pass it to
144                    the function */
145                 retval = modest_text_utils_quote_html (text, cited, signature, attachments, limit);
146         else
147                 retval = modest_text_utils_quote_plain_text (text, cited, signature, attachments, limit);
148         
149         g_free (cited);
150         
151         return retval;
152 }
153
154
155 gchar *
156 modest_text_utils_cite (const gchar *text,
157                         const gchar *content_type,
158                         const gchar *signature,
159                         const gchar *from,
160                         time_t sent_date)
161 {
162         gchar *retval;
163         gchar *tmp_sig;
164
165         g_return_val_if_fail (text, NULL);
166         g_return_val_if_fail (content_type, NULL);
167
168         if (!signature)
169                 retval = g_strdup ("");
170         else if (!strcmp(content_type, "text/html")) {
171                 tmp_sig = g_strconcat ("\n", signature, NULL);
172                 retval = modest_text_utils_convert_to_html_body(tmp_sig);
173                 g_free (tmp_sig);
174         } else {
175                 retval = g_strconcat ("\n", signature, NULL);
176         }
177
178         return retval;
179 }
180
181 static gchar *
182 forward_cite (const gchar *from,
183                     const gchar *sent,
184                     const gchar *to,
185                     const gchar *subject)
186 {
187         return g_strdup_printf ("%s\n%s %s\n%s %s\n%s %s\n%s %s\n", 
188                                 FORWARD_STRING, 
189                                 FROM_STRING, (from)?from:"",
190                                 SENT_STRING, sent,
191                                 TO_STRING, (to)?to:"",
192                                 SUBJECT_STRING, (subject)?subject:"");
193 }
194
195 gchar * 
196 modest_text_utils_inline (const gchar *text,
197                           const gchar *content_type,
198                           const gchar *signature,
199                           const gchar *from,
200                           time_t sent_date,
201                           const gchar *to,
202                           const gchar *subject)
203 {
204         gchar sent_str[101];
205         gchar *cited;
206         gchar *retval;
207         
208         g_return_val_if_fail (text, NULL);
209         g_return_val_if_fail (content_type, NULL);
210         
211         modest_text_utils_strftime (sent_str, 100, "%c", sent_date);
212
213         cited = forward_cite (from, sent_str, to, subject);
214         
215         if (content_type && strcmp (content_type, "text/html") == 0)
216                 retval = modest_text_utils_quote_html (text, cited, signature, NULL, 80);
217         else
218                 retval = modest_text_utils_quote_plain_text (text, cited, signature, NULL, 80);
219         
220         g_free (cited);
221         return retval;
222 }
223
224 /* just to prevent warnings:
225  * warning: `%x' yields only last 2 digits of year in some locales
226  */
227 gsize
228 modest_text_utils_strftime(char *s, gsize max, const char *fmt, time_t timet)
229 {
230         struct tm tm;
231
232         /* does not work on old maemo glib: 
233          *   g_date_set_time_t (&date, timet);
234          */
235         localtime_r (&timet, &tm);
236         return strftime(s, max, fmt, &tm);
237 }
238
239 gchar *
240 modest_text_utils_derived_subject (const gchar *subject, const gchar *prefix)
241 {
242         gchar *tmp;
243
244         g_return_val_if_fail (prefix, NULL);
245         
246         if (!subject)
247                 return g_strdup (prefix);
248
249         tmp = g_strchug (g_strdup (subject));
250
251         if (!strncmp (tmp, prefix, strlen (prefix))) {
252                 return tmp;
253         } else {
254                 g_free (tmp);
255                 return g_strdup_printf ("%s %s", prefix, subject);
256         }
257 }
258
259 gchar*
260 modest_text_utils_remove_address (const gchar *address_list, const gchar *address)
261 {
262         gchar *dup, *token, *ptr, *result;
263         GString *filtered_emails;
264         gchar *email_address;
265
266         g_return_val_if_fail (address_list, NULL);
267
268         if (!address)
269                 return g_strdup (address_list);
270
271         email_address = get_email_from_address (address);
272         
273         /* search for substring */
274         if (!strstr ((const char *) address_list, (const char *) email_address)) {
275                 g_free (email_address);
276                 return g_strdup (address_list);
277         }
278
279         dup = g_strdup (address_list);
280         filtered_emails = g_string_new (NULL);
281         
282         token = strtok_r (dup, ",", &ptr);
283
284         while (token != NULL) {
285                 /* Add to list if not found */
286                 if (!strstr ((const char *) token, (const char *) email_address)) {
287                         if (filtered_emails->len == 0)
288                                 g_string_append_printf (filtered_emails, "%s", g_strstrip (token));
289                         else
290                                 g_string_append_printf (filtered_emails, ",%s", g_strstrip (token));
291                 }
292                 token = strtok_r (NULL, ",", &ptr);
293         }
294         result = filtered_emails->str;
295
296         /* Clean */
297         g_free (email_address);
298         g_free (dup);
299         g_string_free (filtered_emails, FALSE);
300
301         return result;
302 }
303
304 static void
305 modest_text_utils_convert_buffer_to_html (GString *html, const gchar *data)
306 {
307         guint            i;
308         gboolean        space_seen = FALSE;
309         gsize           len;
310         guint           break_dist = 0; /* distance since last break point */
311
312         len = strlen (data);
313
314         /* replace with special html chars where needed*/
315         for (i = 0; i != len; ++i)  {
316                 char kar = data[i];
317                 
318                 if (space_seen && kar != ' ') {
319                         g_string_append_c (html, ' ');
320                         space_seen = FALSE;
321                 }
322                 
323                 /* we artificially insert a breakpoint (newline)
324                  * after 256, to make sure our lines are not so long
325                  * they will DOS the regexping later
326                  */
327                 if (++break_dist == 256) {
328                         g_string_append_c (html, '\n');
329                         break_dist = 0;
330                 }
331                 
332                 switch (kar) {
333                 case 0:  break; /* ignore embedded \0s */       
334                 case '<'  : g_string_append (html, "&lt;");   break;
335                 case '>'  : g_string_append (html, "&gt;");   break;
336                 case '&'  : g_string_append (html, "&amp;");  break;
337                 case '"'  : g_string_append (html, "&quot;");  break;
338                 case '\'' : g_string_append (html, "&apos;"); break;
339                 case '\n' : g_string_append (html, "<br>\n");              break_dist= 0; break;
340                 case '\t' : g_string_append (html, "&nbsp;&nbsp;&nbsp; "); break_dist=0; break; /* note the space at the end*/
341                 case ' ':
342                         break_dist = 0;
343                         if (space_seen) { /* second space in a row */
344                                 g_string_append (html, "&nbsp; ");
345                                 space_seen = FALSE;
346                         } else
347                                 space_seen = TRUE;
348                         break;
349                 default:
350                         g_string_append_c (html, kar);
351                 }
352         }
353 }
354
355 gchar*
356 modest_text_utils_convert_to_html (const gchar *data)
357 {
358         GString         *html;      
359         gsize           len;
360         
361         if (!data)
362                 return NULL;
363
364         len = strlen (data);
365         html = g_string_sized_new (1.5 * len);  /* just a  guess... */
366
367         g_string_append_printf (html,
368                                 "<html><head>"
369                                 "<meta http-equiv=\"content-type\" content=\"text/html; charset=utf8\">"
370                                 "</head>"
371                                 "<body>");
372
373         modest_text_utils_convert_buffer_to_html (html, data);
374         
375         g_string_append (html, "</body></html>");
376
377         if (len <= HYPERLINKIFY_MAX_LENGTH)
378                 hyperlinkify_plain_text (html);
379
380         return g_string_free (html, FALSE);
381 }
382
383 gchar *
384 modest_text_utils_convert_to_html_body (const gchar *data)
385 {
386         GString         *html;      
387         gsize           len;
388         
389         if (!data)
390                 return NULL;
391
392         len = strlen (data);
393         html = g_string_sized_new (1.5 * len);  /* just a  guess... */
394
395         modest_text_utils_convert_buffer_to_html (html, data);
396
397         if (len < HYPERLINKIFY_MAX_LENGTH)
398                 hyperlinkify_plain_text (html);
399
400         return g_string_free (html, FALSE);
401 }
402
403 void
404 modest_text_utils_get_addresses_indexes (const gchar *addresses, GSList **start_indexes, GSList **end_indexes)
405 {
406         gchar *current, *start, *last_blank;
407         gint start_offset = 0, current_offset = 0;
408
409         g_return_if_fail (start_indexes != NULL);
410         g_return_if_fail (end_indexes != NULL);
411
412         start = (gchar *) addresses;
413         current = start;
414         last_blank = start;
415
416         while (*current != '\0') {
417                 if ((start == current)&&((*current == ' ')||(*current == ',')||(*current == ';'))) {
418                         start = g_utf8_next_char (start);
419                         start_offset++;
420                         last_blank = current;
421                 } else if ((*current == ',')||(*current == ';')) {
422                         gint *start_index, *end_index;
423                         start_index = g_new0(gint, 1);
424                         end_index = g_new0(gint, 1);
425                         *start_index = start_offset;
426                         *end_index = current_offset;
427                         *start_indexes = g_slist_prepend (*start_indexes, start_index);
428                         *end_indexes = g_slist_prepend (*end_indexes, end_index);
429                         start = g_utf8_next_char (current);
430                         start_offset = current_offset + 1;
431                         last_blank = start;
432                 } else if (*current == '"') {
433                         current = g_utf8_next_char (current);
434                         current_offset ++;
435                         while ((*current != '"')&&(*current != '\0')) {
436                                 current = g_utf8_next_char (current);
437                                 current_offset ++;
438                         }
439                 }
440                                 
441                 current = g_utf8_next_char (current);
442                 current_offset ++;
443         }
444
445         if (start != current) {
446                         gint *start_index, *end_index;
447                         start_index = g_new0(gint, 1);
448                         end_index = g_new0(gint, 1);
449                         *start_index = start_offset;
450                         *end_index = current_offset;
451                         *start_indexes = g_slist_prepend (*start_indexes, start_index);
452                         *end_indexes = g_slist_prepend (*end_indexes, end_index);
453         }
454         
455         *start_indexes = g_slist_reverse (*start_indexes);
456         *end_indexes = g_slist_reverse (*end_indexes);
457
458         return;
459 }
460
461 GSList *
462 modest_text_utils_split_addresses_list (const gchar *addresses)
463 {
464         gchar *current, *start, *last_blank;
465         GSList *result = NULL;
466
467         start = (gchar *) addresses;
468         current = start;
469         last_blank = start;
470
471         while (*current != '\0') {
472                 if ((start == current)&&((*current == ' ')||(*current == ',')||(*current == ';'))) {
473                         start = g_utf8_next_char (start);
474                         last_blank = current;
475                 } else if ((*current == ',')||(*current == ';')) {
476                         gchar *new_address = NULL;
477                         new_address = g_strndup (start, current - last_blank);
478                         result = g_slist_prepend (result, new_address);
479                         start = g_utf8_next_char (current);
480                         last_blank = start;
481                 } else if (*current == '\"') {
482                         if (current == start) {
483                                 current = g_utf8_next_char (current);
484                                 start = g_utf8_next_char (start);
485                         }
486                         while ((*current != '\"')&&(*current != '\0'))
487                                 current = g_utf8_next_char (current);
488                 }
489                                 
490                 current = g_utf8_next_char (current);
491         }
492
493         if (start != current) {
494                 gchar *new_address = NULL;
495                 new_address = g_strndup (start, current - last_blank);
496                 result = g_slist_prepend (result, new_address);
497         }
498
499         result = g_slist_reverse (result);
500         return result;
501
502 }
503
504 void
505 modest_text_utils_address_range_at_position (const gchar *recipients_list,
506                                              gint position,
507                                              gint *start,
508                                              gint *end)
509 {
510         gchar *current = NULL;
511         gint range_start = 0;
512         gint range_end = 0;
513         gint index;
514         gboolean is_quoted = FALSE;
515
516         index = 0;
517         for (current = (gchar *) recipients_list; *current != '\0'; current = g_utf8_find_next_char (current, NULL)) {
518                 gunichar c = g_utf8_get_char (current);
519
520                 if ((c == ',') && (!is_quoted)) {
521                         if (index < position) {
522                                 range_start = index + 1;
523                         } else {
524                                 break;
525                         }
526                 } else if (c == '\"') {
527                         is_quoted = !is_quoted;
528                 } else if ((c == ' ') &&(range_start == index)) {
529                         range_start ++;
530                 }
531                 index ++;
532                 range_end = index;
533         }
534
535         if (start)
536                 *start = range_start;
537         if (end)
538                 *end = range_end;
539 }
540
541
542 /* ******************************************************************* */
543 /* ************************* UTILIY FUNCTIONS ************************ */
544 /* ******************************************************************* */
545
546 static GString *
547 get_next_line (const gchar * b, const gsize blen, const gchar * iter)
548 {
549         GString *gs;
550         const gchar *i0;
551         
552         if (iter > b + blen)
553                 return g_string_new("");
554         
555         i0 = iter;
556         while (iter[0]) {
557                 if (iter[0] == '\n')
558                         break;
559                 iter++;
560         }
561         gs = g_string_new_len (i0, iter - i0);
562         return gs;
563 }
564 static int
565 get_indent_level (const char *l)
566 {
567         int indent = 0;
568
569         while (l[0]) {
570                 if (l[0] == '>') {
571                         indent++;
572                         if (l[1] == ' ') {
573                                 l++;
574                         }
575                 } else {
576                         break;
577                 }
578                 l++;
579
580         }
581
582         /*      if we hit the signature marker "-- ", we return -(indent + 1). This
583          *      stops reformatting.
584          */
585         if (strcmp (l, "-- ") == 0) {
586                 return -1 - indent;
587         } else {
588                 return indent;
589         }
590 }
591
592 static void
593 unquote_line (GString * l)
594 {
595         gchar *p;
596
597         p = l->str;
598         while (p[0]) {
599                 if (p[0] == '>') {
600                         if (p[1] == ' ') {
601                                 p++;
602                         }
603                 } else {
604                         break;
605                 }
606                 p++;
607         }
608         g_string_erase (l, 0, p - l->str);
609 }
610
611 static void
612 append_quoted (GString * buf, int indent, const GString * str,
613                const int cutpoint)
614 {
615         int i;
616
617         indent = indent < 0 ? abs (indent) - 1 : indent;
618         for (i = 0; i <= indent; i++) {
619                 g_string_append (buf, "> ");
620         }
621         if (cutpoint > 0) {
622                 g_string_append_len (buf, str->str, cutpoint);
623         } else {
624                 g_string_append (buf, str->str);
625         }
626         g_string_append (buf, "\n");
627 }
628
629 static int
630 get_breakpoint_utf8 (const gchar * s, gint indent, const gint limit)
631 {
632         gint index = 0;
633         const gchar *pos, *last;
634         gunichar *uni;
635
636         indent = indent < 0 ? abs (indent) - 1 : indent;
637
638         last = NULL;
639         pos = s;
640         uni = g_utf8_to_ucs4_fast (s, -1, NULL);
641         while (pos[0]) {
642                 if ((index + 2 * indent > limit) && last) {
643                         g_free (uni);
644                         return last - s;
645                 }
646                 if (g_unichar_isspace (uni[index])) {
647                         last = pos;
648                 }
649                 pos = g_utf8_next_char (pos);
650                 index++;
651         }
652         g_free (uni);
653         return strlen (s);
654 }
655
656 static int
657 get_breakpoint_ascii (const gchar * s, const gint indent, const gint limit)
658 {
659         gint i, last;
660
661         last = strlen (s);
662         if (last + 2 * indent < limit)
663                 return last;
664
665         for (i = strlen (s); i > 0; i--) {
666                 if (s[i] == ' ') {
667                         if (i + 2 * indent <= limit) {
668                                 return i;
669                         } else {
670                                 last = i;
671                         }
672                 }
673         }
674         return last;
675 }
676
677 static int
678 get_breakpoint (const gchar * s, const gint indent, const gint limit)
679 {
680
681         if (g_utf8_validate (s, -1, NULL)) {
682                 return get_breakpoint_utf8 (s, indent, limit);
683         } else {                /* assume ASCII */
684                 //g_warning("invalid UTF-8 in msg");
685                 return get_breakpoint_ascii (s, indent, limit);
686         }
687 }
688
689 static gchar *
690 cite (const time_t sent_date, const gchar *from)
691 {
692         return g_strdup (_("mcen_ia_editor_original_message"));
693 }
694
695 static gchar *
696 quoted_attachments (GList *attachments)
697 {
698         GList *node = NULL;
699         GString *result = g_string_new ("");
700         for (node = attachments; node != NULL; node = g_list_next (node)) {
701                 gchar *filename = (gchar *) node->data;
702                 g_string_append_printf ( result, "%s %s\n", _("mcen_ia_editor_attach_filename"), filename);
703         }
704
705         return g_string_free (result, FALSE);
706
707 }
708
709 static gchar *
710 modest_text_utils_quote_plain_text (const gchar *text, 
711                                     const gchar *cite, 
712                                     const gchar *signature,
713                                     GList *attachments,
714                                     int limit)
715 {
716         const gchar *iter;
717         gint indent, breakpoint, rem_indent = 0;
718         GString *q, *l, *remaining;
719         gsize len;
720         gchar *attachments_string = NULL;
721
722         /* remaining will store the rest of the line if we have to break it */
723         q = g_string_new ("\n");
724         q = g_string_append (q, cite);
725         q = g_string_append_c (q, '\n');
726         remaining = g_string_new ("");
727
728         iter = text;
729         len = strlen(text);
730         do {
731                 l = get_next_line (text, len, iter);
732                 iter = iter + l->len + 1;
733                 indent = get_indent_level (l->str);
734                 unquote_line (l);
735
736                 if (remaining->len) {
737                         if (l->len && indent == rem_indent) {
738                                 g_string_prepend (l, " ");
739                                 g_string_prepend (l, remaining->str);
740                         } else {
741                                 do {
742                                         breakpoint =
743                                                 get_breakpoint (remaining->str,
744                                                                 rem_indent,
745                                                                 limit);
746                                         append_quoted (q, rem_indent,
747                                                        remaining, breakpoint);
748                                         g_string_erase (remaining, 0,
749                                                         breakpoint);
750                                         if (remaining->str[0] == ' ') {
751                                                 g_string_erase (remaining, 0,
752                                                                 1);
753                                         }
754                                 } while (remaining->len);
755                         }
756                 }
757                 g_string_free (remaining, TRUE);
758                 breakpoint = get_breakpoint (l->str, indent, limit);
759                 remaining = g_string_new (l->str + breakpoint);
760                 if (remaining->str[0] == ' ') {
761                         g_string_erase (remaining, 0, 1);
762                 }
763                 rem_indent = indent;
764                 append_quoted (q, indent, l, breakpoint);
765                 g_string_free (l, TRUE);
766         } while ((iter < text + len) || (remaining->str[0]));
767
768         attachments_string = quoted_attachments (attachments);
769         q = g_string_append (q, attachments_string);
770         g_free (attachments_string);
771
772         if (signature != NULL) {
773                 q = g_string_append_c (q, '\n');
774                 q = g_string_append (q, signature);
775         }
776
777         return g_string_free (q, FALSE);
778 }
779
780 static gchar*
781 modest_text_utils_quote_html (const gchar *text, 
782                               const gchar *cite, 
783                               const gchar *signature,
784                               GList *attachments,
785                               int limit)
786 {
787         gchar *result = NULL;
788         gchar *signature_result = NULL;
789         const gchar *format = \
790                 "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n" \
791                 "<html>\n" \
792                 "<body>\n" \
793                 "<br/>%s<br/>" \
794                 "<pre>%s<br/>%s<br/>%s</pre>\n" \
795                 "</body>\n" \
796                 "</html>\n";
797         gchar *attachments_string = NULL;
798         gchar *q_attachments_string = NULL;
799         gchar *q_cite = NULL;
800         gchar *html_text = NULL;
801
802         if (signature == NULL)
803                 signature_result = g_strdup ("");
804         else
805                 signature_result = modest_text_utils_convert_to_html_body (signature);
806
807         attachments_string = quoted_attachments (attachments);
808         q_attachments_string = modest_text_utils_convert_to_html_body (attachments_string);
809         q_cite = modest_text_utils_convert_to_html_body (cite);
810         html_text = modest_text_utils_convert_to_html_body (text);
811         result = g_strdup_printf (format, signature_result, q_cite, html_text, q_attachments_string);
812         g_free (q_cite);
813         g_free (html_text);
814         g_free (attachments_string);
815         g_free (q_attachments_string);
816         g_free (signature_result);
817         return result;
818 }
819
820 static gint 
821 cmp_offsets_reverse (const url_match_t *match1, const url_match_t *match2)
822 {
823         return match2->offset - match1->offset;
824 }
825
826
827 static GSList*
828 get_url_matches (GString *txt)
829 {
830         regmatch_t rm;
831         guint rv, i, offset = 0;
832         GSList *match_list = NULL;
833
834         static url_match_pattern_t patterns[] = MAIL_VIEWER_URL_MATCH_PATTERNS;
835         const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
836
837         /* initalize the regexps */
838         for (i = 0; i != pattern_num; ++i) {
839                 patterns[i].preg = g_slice_new0 (regex_t);
840
841                 /* this should not happen */
842                 g_return_val_if_fail (regcomp (patterns[i].preg, patterns[i].regex,
843                                                REG_ICASE|REG_EXTENDED|REG_NEWLINE) == 0, NULL);
844         }
845         /* find all the matches */
846         for (i = 0; i != pattern_num; ++i) {
847                 offset     = 0; 
848                 while (1) {
849                         url_match_t *match;
850                         gboolean is_submatch;
851                         GSList *cursor;
852                         
853                         if ((rv = regexec (patterns[i].preg, txt->str + offset, 1, &rm, 0)) != 0) {
854                                 g_return_val_if_fail (rv == REG_NOMATCH, NULL); /* this should not happen */
855                                 break; /* try next regexp */ 
856                         }
857                         if (rm.rm_so == -1)
858                                 break;
859                         
860                         is_submatch = FALSE;
861                         /* check  old matches to see if this has already been matched */
862                         cursor = match_list;
863                         while (cursor && !is_submatch) {
864                                 const url_match_t *old_match =
865                                         (const url_match_t *) cursor->data;
866                                 guint new_offset = offset + rm.rm_so;
867                                 is_submatch = (new_offset >  old_match->offset &&
868                                                new_offset <  old_match->offset + old_match->len);
869                                 cursor = g_slist_next (cursor);
870                         }
871
872                         if (!is_submatch) {
873                                 /* make a list of our matches (<offset, len, prefix> tupels)*/
874                                 match = g_slice_new (url_match_t);
875                                 match->offset = offset + rm.rm_so;
876                                 match->len    = rm.rm_eo - rm.rm_so;
877                                 match->prefix = patterns[i].prefix;
878                                 g_warning ("<%d, %d, %s>",  match->offset, match->len, match->prefix);
879                                 match_list = g_slist_prepend (match_list, match);
880                         }
881                                 
882                         offset += rm.rm_eo;
883                 }
884         }
885
886         for (i = 0; i != pattern_num; ++i) {
887                 regfree (patterns[i].preg);
888                 g_slice_free  (regex_t, patterns[i].preg);
889         } /* don't free patterns itself -- it's static */
890         
891         /* now sort the list, so the matches are in reverse order of occurence.
892          * that way, we can do the replacements starting from the end, so we don't need
893          * to recalculate the offsets
894          */
895         match_list = g_slist_sort (match_list,
896                                    (GCompareFunc)cmp_offsets_reverse); 
897         return match_list;      
898 }
899
900
901
902 static void
903 hyperlinkify_plain_text (GString *txt)
904 {
905         GSList *cursor;
906         GSList *match_list = get_url_matches (txt);
907
908         /* we will work backwards, so the offsets stay valid */
909         for (cursor = match_list; cursor; cursor = cursor->next) {
910
911                 url_match_t *match = (url_match_t*) cursor->data;
912                 gchar *url  = g_strndup (txt->str + match->offset, match->len);
913                 gchar *repl = NULL; /* replacement  */
914
915                 /* the prefix is NULL: use the one that is already there */
916                 repl = g_strdup_printf ("<a href=\"%s%s\">%s</a>",
917                                         match->prefix ? match->prefix : EMPTY_STRING, 
918                                         url, url);
919
920                 /* replace the old thing with our hyperlink
921                  * replacement thing */
922                 g_string_erase  (txt, match->offset, match->len);
923                 g_string_insert (txt, match->offset, repl);
924                 
925                 g_free (url);
926                 g_free (repl);
927
928                 g_slice_free (url_match_t, match);      
929         }
930         
931         g_slist_free (match_list);
932 }
933
934
935
936 gchar*
937 modest_text_utils_get_display_address (gchar *address)
938 {
939         gchar *cursor;
940         
941         if (!address)
942                 return NULL;
943         
944         g_return_val_if_fail (g_utf8_validate (address, -1, NULL), NULL);
945         
946         g_strchug (address); /* remove leading whitespace */
947
948         /*  <email@address> from display name */
949         cursor = g_strstr_len (address, strlen(address), "<");
950         if (cursor == address) /* there's nothing else? leave it */
951                 return address;
952         if (cursor) 
953                 cursor[0]='\0';
954
955         /* remove (bla bla) from display name */
956         cursor = g_strstr_len (address, strlen(address), "(");
957         if (cursor == address) /* there's nothing else? leave it */
958                 return address;
959         if (cursor) 
960                 cursor[0]='\0';
961
962         g_strchomp (address); /* remove trailing whitespace */
963
964         return address;
965 }
966
967 gchar *
968 modest_text_utils_get_email_address (const gchar *full_address)
969 {
970         const gchar *left, *right;
971         
972         if (!full_address)
973                 return NULL;
974         
975         g_return_val_if_fail (g_utf8_validate (full_address, -1, NULL), NULL);
976         
977         left = g_strrstr_len (full_address, strlen(full_address), "<");
978         if (left == NULL)
979                 return g_strdup (full_address);
980
981         right = g_strstr_len (left, strlen(left), ">");
982         if (right == NULL)
983                 return g_strdup (full_address);
984
985         return g_strndup (left + 1, right - left - 1);
986 }
987
988 gint 
989 modest_text_utils_get_subject_prefix_len (const gchar *sub)
990 {
991         gint i;
992         static const gchar* prefix[] = {
993                 "Re:", "RE:", "RV:", "re:"
994                 "Fwd:", "FWD:", "FW:", "fwd:", "Fw:", "fw:", NULL
995         };
996                 
997         if (!sub || (sub[0] != 'R' && sub[0] != 'F' && sub[0] != 'r' && sub[0] != 'f')) /* optimization */
998                 return 0;
999
1000         i = 0;
1001         
1002         while (prefix[i]) {
1003                 if (g_str_has_prefix(sub, prefix[i])) {
1004                         int prefix_len = strlen(prefix[i]); 
1005                         if (sub[prefix_len] == ' ')
1006                                 ++prefix_len; /* ignore space after prefix as well */
1007                         return prefix_len; 
1008                 }
1009                 ++i;
1010         }
1011         return 0;
1012 }
1013
1014
1015 gint
1016 modest_text_utils_utf8_strcmp (const gchar* s1, const gchar *s2, gboolean insensitive)
1017 {
1018         gint result = 0;
1019         gchar *n1, *n2;
1020
1021         /* work even when s1 and/or s2 == NULL */
1022         if (G_UNLIKELY(s1 == s2))
1023                 return 0;
1024
1025         /* if it's not case sensitive */
1026         if (!insensitive)
1027                 return strcmp (s1 ? s1 : "", s2 ? s2 : "");
1028         
1029         n1 = g_utf8_collate_key (s1 ? s1 : "", -1);
1030         n2 = g_utf8_collate_key (s2 ? s2 : "", -1);
1031         
1032         result = strcmp (n1, n2);
1033
1034         g_free (n1);
1035         g_free (n2);
1036         
1037         return result;
1038 }
1039
1040
1041 gchar*
1042 modest_text_utils_get_display_date (time_t date)
1043 {
1044         time_t now;
1045         static const guint BUF_SIZE = 64; 
1046         static const guint ONE_DAY = 24 * 60 * 60; /* seconds in one day */
1047         gchar date_buf[BUF_SIZE];  
1048         gchar today_buf [BUF_SIZE];  
1049
1050         modest_text_utils_strftime (date_buf, BUF_SIZE, "%x", date); 
1051
1052         now = time (NULL);
1053
1054         /* we check if the date is within the last 24h, if not, we don't
1055          * have to do the extra, expensive strftime, which was very visible
1056          * in the profiles.
1057          */
1058         if (abs(now - date) < ONE_DAY) {
1059                 
1060                 /* it's within the last 24 hours, but double check */
1061                 /* use the localized dates */
1062                 modest_text_utils_strftime (today_buf,  BUF_SIZE, "%x", now); 
1063
1064                 /* if it's today, use the time instead */
1065                 if (strcmp (date_buf, today_buf) == 0)
1066                         modest_text_utils_strftime (date_buf, BUF_SIZE, "%X", date);
1067         }
1068         
1069         return g_strdup(date_buf);
1070 }
1071
1072 gboolean
1073 modest_text_utils_validate_email_address (const gchar *email_address, const gchar **invalid_char_position)
1074 {
1075         int count = 0;
1076         const gchar *c = NULL, *domain = NULL;
1077         static gchar *rfc822_specials = "()<>@,;:\\\"[]&";
1078
1079         if (invalid_char_position != NULL)
1080                 *invalid_char_position = NULL;
1081
1082         /* first we validate the name portion (name@domain) */
1083         for (c = email_address;  *c;  c++) {
1084                 if (*c == '\"' && 
1085                     (c == email_address || 
1086                      *(c - 1) == '.' || 
1087                      *(c - 1) == '\"')) {
1088                         while (*++c) {
1089                                 if (*c == '\"') 
1090                                         break;
1091                                 if (*c == '\\' && (*++c == ' ')) 
1092                                         continue;
1093                                 if (*c <= ' ' || *c >= 127) 
1094                                         return FALSE;
1095                         }
1096                         if (!*c++) 
1097                                 return FALSE;
1098                         if (*c == '@') 
1099                                 break;
1100                         if (*c != '.') 
1101                                 return FALSE;
1102                         continue;
1103                 }
1104                 if (*c == '@') 
1105                         break;
1106                 if (*c <= ' ' || *c >= 127) 
1107                         return FALSE;
1108                 if (strchr(rfc822_specials, *c)) {
1109                         if (invalid_char_position)
1110                                 *invalid_char_position = c;
1111                         return FALSE;
1112                 }
1113         }
1114         if (c == email_address || *(c - 1) == '.') 
1115                 return FALSE;
1116
1117         /* next we validate the domain portion (name@domain) */
1118         if (!*(domain = ++c)) 
1119                 return FALSE;
1120         do {
1121                 if (*c == '.') {
1122                         if (c == domain || *(c - 1) == '.') 
1123                                 return FALSE;
1124                         count++;
1125                 }
1126                 if (*c <= ' ' || *c >= 127) 
1127                         return FALSE;
1128                 if (strchr(rfc822_specials, *c)) {
1129                         if (invalid_char_position)
1130                                 *invalid_char_position = c;
1131                         return FALSE;
1132                 }
1133         } while (*++c);
1134
1135         return (count >= 1) ? TRUE : FALSE;
1136 }
1137
1138 gboolean 
1139 modest_text_utils_validate_recipient (const gchar *recipient, const gchar **invalid_char_position)
1140 {
1141         gchar *stripped, *current;
1142         gchar *right_part;
1143         gboolean has_error = FALSE;
1144
1145         if (modest_text_utils_validate_email_address (recipient, invalid_char_position))
1146                 return TRUE;
1147         stripped = g_strdup (recipient);
1148         stripped = g_strstrip (stripped);
1149         current = stripped;
1150
1151         if (*current == '\0') {
1152                 g_free (stripped);
1153                 return FALSE;
1154         }
1155
1156         /* quoted string */
1157         if (*current == '\"') {
1158                 current = g_utf8_next_char (current);
1159                 has_error = TRUE;
1160                 for (; *current != '\0'; current = g_utf8_next_char (current)) {
1161                         if (*current == '\\') {
1162                                 /* TODO: This causes a warning, which breaks the build, 
1163                                  * because a gchar cannot be < 0.
1164                                  * murrayc. 
1165                                 if (current[1] <0) {
1166                                         has_error = TRUE;
1167                                         break;
1168                                 }
1169                                 */
1170                         } else if (*current == '\"') {
1171                                 has_error = FALSE;
1172                                 current = g_utf8_next_char (current);
1173                                 break;
1174                         }
1175                 }
1176         } else {
1177                 has_error = TRUE;
1178                 for (current = stripped ; *current != '\0'; current = g_utf8_next_char (current)) {
1179                         if (*current == '<') {
1180                                 has_error = FALSE;
1181                                 break;
1182                         }
1183                 }
1184         }
1185                 
1186         if (has_error) {
1187                 g_free (stripped);
1188                 return FALSE;
1189         }
1190
1191         right_part = g_strdup (current);
1192         g_free (stripped);
1193         right_part = g_strstrip (right_part);
1194
1195         if (g_str_has_prefix (right_part, "<") &&
1196             g_str_has_suffix (right_part, ">")) {
1197                 gchar *address;
1198                 gboolean valid;
1199
1200                 address = g_strndup (right_part+1, strlen (right_part) - 2);
1201                 g_free (right_part);
1202                 valid = modest_text_utils_validate_email_address (address, invalid_char_position);
1203                 g_free (address);
1204                 return valid;
1205         } else {
1206                 g_free (right_part);
1207                 return FALSE;
1208         }
1209 }
1210
1211
1212 gchar *
1213 modest_text_utils_get_display_size (guint64 size)
1214 {
1215         const guint KB=1024;
1216         const guint MB=1024 * KB;
1217         const guint GB=1024 * MB;
1218
1219         if (size == 0)
1220                 return g_strdup_printf(_FM("sfil_li_size_kb"), 0);
1221         if (0 < size && size < KB)
1222                 return g_strdup_printf (_FM("sfil_li_size_kb"), 1);
1223         else if (KB <= size && size < 100 * KB)
1224                 return g_strdup_printf (_FM("sfil_li_size_1kb_99kb"), size / KB);
1225         else if (100*KB <= size && size < MB)
1226                 return g_strdup_printf (_FM("sfil_li_size_100kb_1mb"), (float) size / MB);
1227         else if (MB <= size && size < 10*MB)
1228                 return g_strdup_printf (_FM("sfil_li_size_1mb_10mb"), (float) size / MB);
1229         else if (10*MB <= size && size < GB)
1230                 return g_strdup_printf (_FM("sfil_li_size_10mb_1gb"), size / MB);
1231         else
1232                 return g_strdup_printf (_FM("sfil_li_size_1gb_or_greater"), (float) size / GB); 
1233 }
1234
1235 static gchar *
1236 get_email_from_address (const gchar * address)
1237 {
1238         gchar *left_limit, *right_limit;
1239
1240         left_limit = strstr (address, "<");
1241         right_limit = g_strrstr (address, ">");
1242
1243         if ((left_limit == NULL)||(right_limit == NULL)|| (left_limit > right_limit))
1244                 return g_strdup (address);
1245         else
1246                 return g_strndup (left_limit + 1, (right_limit - left_limit) - 1);
1247 }
1248
1249 gchar *      
1250 modest_text_utils_get_color_string (GdkColor *color)
1251 {
1252
1253         return g_strdup_printf ("#%x%x%x%x%x%x%x%x%x%x%x%x",
1254                                 (color->red >> 12)   & 0xf, (color->red >> 8)   & 0xf,
1255                                 (color->red >>  4)   & 0xf, (color->red)        & 0xf,
1256                                 (color->green >> 12) & 0xf, (color->green >> 8) & 0xf,
1257                                 (color->green >>  4) & 0xf, (color->green)      & 0xf,
1258                                 (color->blue >> 12)  & 0xf, (color->blue >> 8)  & 0xf,
1259                                 (color->blue >>  4)  & 0xf, (color->blue)       & 0xf);
1260 }
1261
1262 gchar *
1263 modest_text_utils_text_buffer_get_text (GtkTextBuffer *buffer)
1264 {
1265         GtkTextIter start, end;
1266         gchar *slice, *current;
1267         GString *result = g_string_new ("");
1268
1269         g_return_val_if_fail (GTK_IS_TEXT_BUFFER (buffer), NULL);
1270
1271         gtk_text_buffer_get_start_iter (buffer, &start);
1272         gtk_text_buffer_get_end_iter (buffer, &end);
1273
1274         slice = gtk_text_buffer_get_slice (buffer, &start, &end, FALSE);
1275         current = slice;
1276
1277         while (current && current != '\0') {
1278                 if (g_utf8_get_char (current) == 0xFFFC) {
1279                         result = g_string_append_c (result, ' ');
1280                         current = g_utf8_next_char (current);
1281                 } else {
1282                         gchar *next = g_utf8_strchr (current, -1, 0xFFFC);
1283                         if (next == NULL) {
1284                                 result = g_string_append (result, current);
1285                         } else {
1286                                 result = g_string_append_len (result, current, next - current);
1287                         }
1288                         current = next;
1289                 }
1290         }
1291         g_free (slice);
1292
1293         return g_string_free (result, FALSE);
1294         
1295 }