1 /* Copyright (c) 2006, Nokia Corporation
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of the Nokia Corporation nor the names of its
14 * contributors may be used to endorse or promote products derived from
15 * this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
18 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
20 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
21 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #endif /*_GNU_SOURCE*/
35 #include <string.h> /* for strcasestr */
40 #include <glib/gi18n.h>
42 #include <modest-tny-platform-factory.h>
43 #include <modest-text-utils.h>
44 #include <modest-runtime.h>
49 #endif /*HAVE_CONFIG_H */
52 #define FORWARD_STRING _("mcen_ia_editor_original_message")
53 #define FROM_STRING _("mail_va_from")
54 #define SENT_STRING _("mcen_fi_message_properties_sent")
55 #define TO_STRING _("mail_va_to")
56 #define SUBJECT_STRING _("mail_va_subject")
57 #define EMPTY_STRING ""
60 * do the hyperlinkification only for texts < 50 Kb,
61 * as it's quite slow. Without this, e.g. mail with
62 * an uuencoded part (which is not recognized as attachment,
65 #define HYPERLINKIFY_MAX_LENGTH (1024*50)
70 * we need these regexps to find URLs in plain text e-mails
72 typedef struct _url_match_pattern_t url_match_pattern_t;
73 struct _url_match_pattern_t {
79 typedef struct _url_match_t url_match_t;
88 * we mark the ampersand with \007 when converting text->html
89 * because after text->html we do hyperlink detecting, which
90 * could be screwed up by the ampersand.
91 * ie. 1<3 ==> 1\007lt;3
93 #define MARK_AMP '\007'
94 #define MARK_AMP_STR "\007"
96 /* mark & separately, because they are parts of urls.
97 * ie. a&b => a\006amp;b, but a>b => a\007gt;b
99 * we need to handle '&' separately, because it can be part of URIs
100 * (as in href="http://foo.bar?a=1&b=1"), so inside those URIs
101 * we need to re-replace \006amp; with '&' again, while outside uri's
104 * yes, it's messy, but a consequence of doing text->html first, then hyperlinkify
106 #define MARK_AMP_URI '\006'
107 #define MARK_AMP_URI_STR "\006"
110 /* note: match MARK_AMP_URI_STR as well, because after txt->html, a '&' will look like $(MARK_AMP_URI_STR)"amp;" */
111 #define MAIL_VIEWER_URL_MATCH_PATTERNS { \
112 { "(file|rtsp|http|ftp|https|mms|mmsh|rtsp|rdp|lastfm)://[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR \
113 "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]", \
115 { "www\\.[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]",\
117 { "ftp\\.[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]",\
119 { "(jabberto|voipto|sipto|sip|chatto|xmpp):[-_a-z@0-9.+]+", \
121 { "mailto:[-_a-z0-9.\\+]+@[-_a-z0-9.]+", \
123 { "[-_a-z0-9.\\+]+@[-_a-z0-9.]+",\
127 const gchar account_title_forbidden_chars[] = {
128 '\\', '/', ':', '*', '?', '\'', '<', '>', '|', '^'
130 const gchar folder_name_forbidden_chars[] = {
131 '<', '>', ':', '\'', '/', '\\', '|', '?', '*', '^', '%', '$', '#', '&'
133 const gchar user_name_forbidden_chars[] = {
136 const guint ACCOUNT_TITLE_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (account_title_forbidden_chars);
137 const guint FOLDER_NAME_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (folder_name_forbidden_chars);
138 const guint USER_NAME_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (user_name_forbidden_chars);
141 static gchar* cite (const time_t sent_date, const gchar *from);
142 static void hyperlinkify_plain_text (GString *txt, gint offset);
143 static gint cmp_offsets_reverse (const url_match_t *match1, const url_match_t *match2);
144 static GSList* get_url_matches (GString *txt, gint offset);
146 static GString* get_next_line (const char *b, const gsize blen, const gchar * iter);
147 static int get_indent_level (const char *l);
148 static void unquote_line (GString * l);
149 static void append_quoted (GString * buf, const int indent, const GString * str,
151 static int get_breakpoint_utf8 (const gchar * s, const gint indent, const gint limit);
152 static int get_breakpoint_ascii (const gchar * s, const gint indent, const gint limit);
153 static int get_breakpoint (const gchar * s, const gint indent, const gint limit);
155 static gchar* modest_text_utils_quote_plain_text (const gchar *text,
157 const gchar *signature,
161 static gchar* modest_text_utils_quote_html (const gchar *text,
163 const gchar *signature,
166 static gchar* get_email_from_address (const gchar *address);
169 /* ******************************************************************* */
170 /* ************************* PUBLIC FUNCTIONS ************************ */
171 /* ******************************************************************* */
174 modest_text_utils_quote (const gchar *text,
175 const gchar *content_type,
176 const gchar *signature,
178 const time_t sent_date,
182 gchar *retval, *cited;
184 g_return_val_if_fail (text, NULL);
185 g_return_val_if_fail (content_type, NULL);
187 cited = cite (sent_date, from);
189 if (content_type && strcmp (content_type, "text/html") == 0)
190 /* TODO: extract the <body> of the HTML and pass it to
192 retval = modest_text_utils_quote_html (text, cited, signature, attachments, limit);
194 retval = modest_text_utils_quote_plain_text (text, cited, signature, attachments, limit);
203 modest_text_utils_cite (const gchar *text,
204 const gchar *content_type,
205 const gchar *signature,
212 g_return_val_if_fail (text, NULL);
213 g_return_val_if_fail (content_type, NULL);
216 retval = g_strdup ("");
217 else if (strcmp(content_type, "text/html") == 0) {
218 tmp_sig = g_strconcat ("\n", signature, NULL);
219 retval = modest_text_utils_convert_to_html_body(tmp_sig, -1, TRUE);
222 retval = g_strconcat (text, "\n", signature, NULL);
229 forward_cite (const gchar *from,
232 const gchar *subject)
234 g_return_val_if_fail (sent, NULL);
236 return g_strdup_printf ("%s\n%s %s\n%s %s\n%s %s\n%s %s\n",
238 FROM_STRING, (from)?from:"",
240 TO_STRING, (to)?to:"",
241 SUBJECT_STRING, (subject)?subject:"");
245 modest_text_utils_inline (const gchar *text,
246 const gchar *content_type,
247 const gchar *signature,
251 const gchar *subject)
257 g_return_val_if_fail (text, NULL);
258 g_return_val_if_fail (content_type, NULL);
260 modest_text_utils_strftime (sent_str, 100, "%c", sent_date);
262 cited = forward_cite (from, sent_str, to, subject);
264 if (content_type && strcmp (content_type, "text/html") == 0)
265 retval = modest_text_utils_quote_html (text, cited, signature, NULL, 80);
267 retval = modest_text_utils_quote_plain_text (text, cited, signature, NULL, 80);
273 /* just to prevent warnings:
274 * warning: `%x' yields only last 2 digits of year in some locales
277 modest_text_utils_strftime(char *s, gsize max, const char *fmt, time_t timet)
281 /* does not work on old maemo glib:
282 * g_date_set_time_t (&date, timet);
284 localtime_r (&timet, &tm);
285 return strftime(s, max, fmt, &tm);
289 modest_text_utils_derived_subject (const gchar *subject, const gchar *prefix)
293 g_return_val_if_fail (prefix, NULL);
295 if (!subject || subject[0] == '\0')
296 subject = _("mail_va_no_subject");
298 tmp = g_strchug (g_strdup (subject));
300 if (!strncmp (tmp, prefix, strlen (prefix))) {
304 return g_strdup_printf ("%s %s", prefix, subject);
309 modest_text_utils_remove_address (const gchar *address_list, const gchar *address)
311 gchar *dup, *token, *ptr = NULL, *result;
312 GString *filtered_emails;
313 gchar *email_address;
315 g_return_val_if_fail (address_list, NULL);
318 return g_strdup (address_list);
320 email_address = get_email_from_address (address);
322 /* search for substring */
323 if (!strstr ((const char *) address_list, (const char *) email_address)) {
324 g_free (email_address);
325 return g_strdup (address_list);
328 dup = g_strdup (address_list);
329 filtered_emails = g_string_new (NULL);
331 token = strtok_r (dup, ",", &ptr);
333 while (token != NULL) {
334 /* Add to list if not found */
335 if (!strstr ((const char *) token, (const char *) email_address)) {
336 if (filtered_emails->len == 0)
337 g_string_append_printf (filtered_emails, "%s", g_strstrip (token));
339 g_string_append_printf (filtered_emails, ",%s", g_strstrip (token));
341 token = strtok_r (NULL, ",", &ptr);
343 result = filtered_emails->str;
346 g_free (email_address);
348 g_string_free (filtered_emails, FALSE);
355 modest_text_utils_remove_duplicate_addresses (const gchar *address_list)
357 GSList *addresses, *cursor;
359 gchar *new_list = NULL;
361 g_return_val_if_fail (address_list, NULL);
363 table = g_hash_table_new (g_str_hash, g_str_equal);
364 addresses = modest_text_utils_split_addresses_list (address_list);
368 const gchar* address = (const gchar*)cursor->data;
370 /* ignore the address if already seen */
371 if (g_hash_table_lookup (table, address) == 0) {
375 tmp = g_strdup (address);
377 tmp = g_strjoin (",", new_list, address, NULL);
382 g_hash_table_insert (table, (gchar*)address, GINT_TO_POINTER(1));
384 cursor = g_slist_next (cursor);
387 g_hash_table_destroy (table);
388 g_slist_foreach (addresses, (GFunc)g_free, NULL);
389 g_slist_free (addresses);
396 modest_text_utils_convert_buffer_to_html_start (GString *html, const gchar *data, gssize n)
399 gboolean space_seen = FALSE;
400 guint break_dist = 0; /* distance since last break point */
405 /* replace with special html chars where needed*/
406 for (i = 0; i != n; ++i) {
407 guchar kar = data[i];
409 if (space_seen && kar != ' ') {
410 g_string_append_c (html, ' ');
414 /* we artificially insert a breakpoint (newline)
415 * after 256, to make sure our lines are not so long
416 * they will DOS the regexping later
417 * Also, check that kar is ASCII to make sure that we
418 * don't break a UTF8 char in two
420 if (++break_dist >= 256 && kar < 127) {
421 g_string_append_c (html, '\n');
429 /* this is a temp place holder for '&'; we can only
430 * set the real '&' after hyperlink translation, otherwise
431 * we might screw that up */
432 break; /* ignore embedded \0s and MARK_AMP */
433 case '<' : g_string_append (html, MARK_AMP_STR "lt;"); break;
434 case '>' : g_string_append (html, MARK_AMP_STR "gt;"); break;
435 case '&' : g_string_append (html, MARK_AMP_URI_STR "amp;"); break; /* special case */
436 case '"' : g_string_append (html, MARK_AMP_STR "quot;"); break;
438 /* don't convert ' --> wpeditor will try to re-convert it... */
439 //case '\'' : g_string_append (html, "'"); break;
440 case '\n' : g_string_append (html, "<br>\n");break_dist= 0; break;
441 case '\t' : g_string_append (html, MARK_AMP_STR "nbsp;" MARK_AMP_STR "nbsp;" MARK_AMP_STR "nbsp; ");
442 break_dist=0; break; /* note the space at the end*/
445 if (space_seen) { /* second space in a row */
446 g_string_append (html, " ");
452 g_string_append_c (html, kar);
459 modest_text_utils_convert_buffer_to_html_finish (GString *html)
462 /* replace all our MARK_AMPs with real ones */
463 for (i = 0; i != html->len; ++i)
464 if ((html->str)[i] == MARK_AMP || (html->str)[i] == MARK_AMP_URI)
465 (html->str)[i] = '&';
470 modest_text_utils_convert_to_html (const gchar *data)
475 g_return_val_if_fail (data, NULL);
481 html = g_string_sized_new (1.5 * len); /* just a guess... */
483 g_string_append_printf (html,
485 "<meta http-equiv=\"content-type\" content=\"text/html; charset=utf8\">"
489 modest_text_utils_convert_buffer_to_html_start (html, data, -1);
491 g_string_append (html, "</body></html>");
493 if (len <= HYPERLINKIFY_MAX_LENGTH)
494 hyperlinkify_plain_text (html, 0);
496 modest_text_utils_convert_buffer_to_html_finish (html);
498 return g_string_free (html, FALSE);
502 modest_text_utils_convert_to_html_body (const gchar *data, gssize n, gboolean hyperlinkify)
506 g_return_val_if_fail (data, NULL);
513 html = g_string_sized_new (1.5 * n); /* just a guess... */
515 modest_text_utils_convert_buffer_to_html_start (html, data, n);
517 if (hyperlinkify && (n < HYPERLINKIFY_MAX_LENGTH))
518 hyperlinkify_plain_text (html, 0);
520 modest_text_utils_convert_buffer_to_html_finish (html);
522 return g_string_free (html, FALSE);
526 modest_text_utils_get_addresses_indexes (const gchar *addresses, GSList **start_indexes, GSList **end_indexes)
528 gchar *current, *start, *last_blank;
529 gint start_offset = 0, current_offset = 0;
531 g_return_if_fail (start_indexes != NULL);
532 g_return_if_fail (end_indexes != NULL);
534 start = (gchar *) addresses;
538 while (*current != '\0') {
539 if ((start == current)&&((*current == ' ')||(*current == ',')||(*current == ';'))) {
540 start = g_utf8_next_char (start);
542 last_blank = current;
543 } else if ((*current == ',')||(*current == ';')) {
544 gint *start_index, *end_index;
545 start_index = g_new0(gint, 1);
546 end_index = g_new0(gint, 1);
547 *start_index = start_offset;
548 *end_index = current_offset;
549 *start_indexes = g_slist_prepend (*start_indexes, start_index);
550 *end_indexes = g_slist_prepend (*end_indexes, end_index);
551 start = g_utf8_next_char (current);
552 start_offset = current_offset + 1;
554 } else if (*current == '"') {
555 current = g_utf8_next_char (current);
557 while ((*current != '"')&&(*current != '\0')) {
558 current = g_utf8_next_char (current);
563 current = g_utf8_next_char (current);
567 if (start != current) {
568 gint *start_index, *end_index;
569 start_index = g_new0(gint, 1);
570 end_index = g_new0(gint, 1);
571 *start_index = start_offset;
572 *end_index = current_offset;
573 *start_indexes = g_slist_prepend (*start_indexes, start_index);
574 *end_indexes = g_slist_prepend (*end_indexes, end_index);
577 *start_indexes = g_slist_reverse (*start_indexes);
578 *end_indexes = g_slist_reverse (*end_indexes);
585 modest_text_utils_split_addresses_list (const gchar *addresses)
588 const gchar *my_addrs = addresses;
592 g_return_val_if_fail (addresses, NULL);
594 /* skip any space, ',', ';' at the start */
595 while (my_addrs && (my_addrs[0] == ' ' || my_addrs[0] == ',' || my_addrs[0] == ';'))
598 /* are we at the end of addresses list? */
602 /* nope, we are at the start of some address
603 * now, let's find the end of the address */
605 while (end[0] && end[0] != ',' && end[0] != ';')
608 /* we got the address; copy it and remove trailing whitespace */
609 addr = g_strndup (my_addrs, end - my_addrs);
612 head = g_slist_append (NULL, addr);
613 head->next = modest_text_utils_split_addresses_list (end); /* recurse */
620 modest_text_utils_address_range_at_position (const gchar *recipients_list,
625 gchar *current = NULL;
626 gint range_start = 0;
629 gboolean is_quoted = FALSE;
631 g_return_if_fail (recipients_list);
632 g_return_if_fail (position < g_utf8_strlen(recipients_list, -1));
635 for (current = (gchar *) recipients_list; *current != '\0';
636 current = g_utf8_find_next_char (current, NULL)) {
637 gunichar c = g_utf8_get_char (current);
639 if ((c == ',') && (!is_quoted)) {
640 if (index < position) {
641 range_start = index + 1;
645 } else if (c == '\"') {
646 is_quoted = !is_quoted;
647 } else if ((c == ' ') &&(range_start == index)) {
655 *start = range_start;
661 modest_text_utils_address_with_standard_length (const gchar *recipients_list)
665 GString *buffer = g_string_new ("");
667 splitted = g_strsplit (recipients_list, "\n", 0);
671 if (current != splitted)
672 buffer = g_string_append_c (buffer, '\n');
673 line = g_strndup (*splitted, 1000);
674 buffer = g_string_append (buffer, line);
679 g_strfreev (splitted);
681 return g_string_free (buffer, FALSE);
685 /* ******************************************************************* */
686 /* ************************* UTILIY FUNCTIONS ************************ */
687 /* ******************************************************************* */
690 get_next_line (const gchar * b, const gsize blen, const gchar * iter)
696 return g_string_new("");
704 gs = g_string_new_len (i0, iter - i0);
708 get_indent_level (const char *l)
725 /* if we hit the signature marker "-- ", we return -(indent + 1). This
726 * stops reformatting.
728 if (strcmp (l, "-- ") == 0) {
736 unquote_line (GString * l)
751 g_string_erase (l, 0, p - l->str);
755 append_quoted (GString * buf, int indent, const GString * str,
760 indent = indent < 0 ? abs (indent) - 1 : indent;
761 for (i = 0; i <= indent; i++) {
762 g_string_append (buf, "> ");
765 g_string_append_len (buf, str->str, cutpoint);
767 g_string_append (buf, str->str);
769 g_string_append (buf, "\n");
773 get_breakpoint_utf8 (const gchar * s, gint indent, const gint limit)
776 const gchar *pos, *last;
779 indent = indent < 0 ? abs (indent) - 1 : indent;
783 uni = g_utf8_to_ucs4_fast (s, -1, NULL);
785 if ((index + 2 * indent > limit) && last) {
789 if (g_unichar_isspace (uni[index])) {
792 pos = g_utf8_next_char (pos);
800 get_breakpoint_ascii (const gchar * s, const gint indent, const gint limit)
805 if (last + 2 * indent < limit)
808 for (i = strlen (s); i > 0; i--) {
810 if (i + 2 * indent <= limit) {
821 get_breakpoint (const gchar * s, const gint indent, const gint limit)
824 if (g_utf8_validate (s, -1, NULL)) {
825 return get_breakpoint_utf8 (s, indent, limit);
826 } else { /* assume ASCII */
827 //g_warning("invalid UTF-8 in msg");
828 return get_breakpoint_ascii (s, indent, limit);
833 cite (const time_t sent_date, const gchar *from)
835 return g_strdup (_("mcen_ia_editor_original_message"));
839 quoted_attachments (GList *attachments)
842 GString *result = g_string_new ("");
843 for (node = attachments; node != NULL; node = g_list_next (node)) {
844 gchar *filename = (gchar *) node->data;
845 g_string_append_printf ( result, "%s %s\n", _("mcen_ia_editor_attach_filename"), filename);
848 return g_string_free (result, FALSE);
853 modest_text_utils_quote_plain_text (const gchar *text,
855 const gchar *signature,
860 gint indent, breakpoint, rem_indent = 0;
861 GString *q, *l, *remaining;
863 gchar *attachments_string = NULL;
865 q = g_string_new ("\n");
866 if (signature != NULL) {
867 q = g_string_append (q, signature);
868 q = g_string_append_c (q, '\n');
870 q = g_string_append (q, cite);
871 q = g_string_append_c (q, '\n');
873 /* remaining will store the rest of the line if we have to break it */
874 remaining = g_string_new ("");
879 l = get_next_line (text, len, iter);
880 iter = iter + l->len + 1;
881 indent = get_indent_level (l->str);
884 if (remaining->len) {
885 if (l->len && indent == rem_indent) {
886 g_string_prepend (l, " ");
887 g_string_prepend (l, remaining->str);
891 get_breakpoint (remaining->str,
894 append_quoted (q, rem_indent,
895 remaining, breakpoint);
896 g_string_erase (remaining, 0,
898 if (remaining->str[0] == ' ') {
899 g_string_erase (remaining, 0,
902 } while (remaining->len);
905 g_string_free (remaining, TRUE);
906 breakpoint = get_breakpoint (l->str, indent, limit);
907 remaining = g_string_new (l->str + breakpoint);
908 if (remaining->str[0] == ' ') {
909 g_string_erase (remaining, 0, 1);
912 append_quoted (q, indent, l, breakpoint);
913 g_string_free (l, TRUE);
914 } while ((iter < text + len) || (remaining->str[0]));
916 attachments_string = quoted_attachments (attachments);
917 q = g_string_append (q, attachments_string);
918 g_free (attachments_string);
920 return g_string_free (q, FALSE);
924 modest_text_utils_quote_html (const gchar *text,
926 const gchar *signature,
930 gchar *result = NULL;
931 gchar *signature_result = NULL;
932 const gchar *format = \
933 "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n" \
937 "<pre>%s<br/>%s<br/>%s</pre>\n" \
940 gchar *attachments_string = NULL;
941 gchar *q_attachments_string = NULL;
942 gchar *q_cite = NULL;
943 gchar *html_text = NULL;
945 if (signature == NULL)
946 signature_result = g_strdup ("");
948 signature_result = modest_text_utils_convert_to_html_body (signature, -1, TRUE);
950 attachments_string = quoted_attachments (attachments);
951 q_attachments_string = modest_text_utils_convert_to_html_body (attachments_string, -1, TRUE);
952 q_cite = modest_text_utils_convert_to_html_body (cite, -1, TRUE);
953 html_text = modest_text_utils_convert_to_html_body (text, -1, TRUE);
954 result = g_strdup_printf (format, signature_result, q_cite, html_text, q_attachments_string);
957 g_free (attachments_string);
958 g_free (q_attachments_string);
959 g_free (signature_result);
965 cmp_offsets_reverse (const url_match_t *match1, const url_match_t *match2)
967 return match2->offset - match1->offset;
970 static gboolean url_matches_block = 0;
971 static url_match_pattern_t patterns[] = MAIL_VIEWER_URL_MATCH_PATTERNS;
978 const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
979 for (i = 0; i != pattern_num; ++i) {
980 patterns[i].preg = g_slice_new0 (regex_t);
982 /* this should not happen */
983 if (regcomp (patterns[i].preg, patterns[i].regex,
984 REG_ICASE|REG_EXTENDED|REG_NEWLINE) != 0) {
985 g_warning ("%s: error in regexp:\n%s\n", __FUNCTION__, patterns[i].regex);
996 const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
997 for (i = 0; i != pattern_num; ++i) {
998 regfree (patterns[i].preg);
999 g_slice_free (regex_t, patterns[i].preg);
1000 } /* don't free patterns itself -- it's static */
1004 modest_text_utils_hyperlinkify_begin (void)
1006 if (url_matches_block == 0)
1007 compile_patterns ();
1008 url_matches_block ++;
1012 modest_text_utils_hyperlinkify_end (void)
1014 url_matches_block--;
1015 if (url_matches_block <= 0)
1021 get_url_matches (GString *txt, gint offset)
1024 guint rv, i, tmp_offset = 0;
1025 GSList *match_list = NULL;
1027 const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
1029 /* initalize the regexps */
1030 modest_text_utils_hyperlinkify_begin ();
1032 /* find all the matches */
1033 for (i = 0; i != pattern_num; ++i) {
1034 tmp_offset = offset;
1037 gboolean is_submatch;
1040 if ((rv = regexec (patterns[i].preg, txt->str + tmp_offset, 1, &rm, 0)) != 0) {
1041 g_return_val_if_fail (rv == REG_NOMATCH, NULL); /* this should not happen */
1042 break; /* try next regexp */
1047 is_submatch = FALSE;
1048 /* check old matches to see if this has already been matched */
1049 cursor = match_list;
1050 while (cursor && !is_submatch) {
1051 const url_match_t *old_match =
1052 (const url_match_t *) cursor->data;
1053 guint new_offset = tmp_offset + rm.rm_so;
1054 is_submatch = (new_offset > old_match->offset &&
1055 new_offset < old_match->offset + old_match->len);
1056 cursor = g_slist_next (cursor);
1060 /* make a list of our matches (<offset, len, prefix> tupels)*/
1061 match = g_slice_new (url_match_t);
1062 match->offset = tmp_offset + rm.rm_so;
1063 match->len = rm.rm_eo - rm.rm_so;
1064 match->prefix = patterns[i].prefix;
1065 match_list = g_slist_prepend (match_list, match);
1067 tmp_offset += rm.rm_eo;
1071 modest_text_utils_hyperlinkify_end ();
1073 /* now sort the list, so the matches are in reverse order of occurence.
1074 * that way, we can do the replacements starting from the end, so we don't need
1075 * to recalculate the offsets
1077 match_list = g_slist_sort (match_list,
1078 (GCompareFunc)cmp_offsets_reverse);
1084 /* replace all occurences of needle in haystack with repl*/
1086 replace_string (const gchar *haystack, const gchar *needle, gchar repl)
1088 gchar *str, *cursor;
1090 if (!haystack || !needle || strlen(needle) == 0)
1091 return haystack ? g_strdup(haystack) : NULL;
1093 str = g_strdup (haystack);
1095 for (cursor = str; cursor && *cursor; ++cursor) {
1096 if (g_str_has_prefix (cursor, needle)) {
1098 memmove (cursor + 1,
1099 cursor + strlen (needle),
1100 strlen (cursor + strlen (needle)) + 1);
1108 hyperlinkify_plain_text (GString *txt, gint offset)
1111 GSList *match_list = get_url_matches (txt, offset);
1113 /* we will work backwards, so the offsets stay valid */
1114 for (cursor = match_list; cursor; cursor = cursor->next) {
1116 url_match_t *match = (url_match_t*) cursor->data;
1117 gchar *url = g_strndup (txt->str + match->offset, match->len);
1118 gchar *repl = NULL; /* replacement */
1120 /* the string still contains $(MARK_AMP_URI_STR)"amp;" for each
1121 * '&' in the original, because of the text->html conversion.
1122 * in the href-URL (and only there), we must convert that back to
1125 gchar *href_url = replace_string (url, MARK_AMP_URI_STR "amp;", '&');
1127 /* the prefix is NULL: use the one that is already there */
1128 repl = g_strdup_printf ("<a href=\"%s%s\">%s</a>",
1129 match->prefix ? match->prefix : EMPTY_STRING,
1132 /* replace the old thing with our hyperlink
1133 * replacement thing */
1134 g_string_erase (txt, match->offset, match->len);
1135 g_string_insert (txt, match->offset, repl);
1141 g_slice_free (url_match_t, match);
1144 g_slist_free (match_list);
1148 modest_text_utils_hyperlinkify (GString *string_buffer)
1153 after_body = strstr (string_buffer->str, "<body>");
1154 if (after_body != NULL)
1155 offset = after_body - string_buffer->str;
1156 hyperlinkify_plain_text (string_buffer, offset);
1160 /* for optimization reasons, we change the string in-place */
1162 modest_text_utils_get_display_address (gchar *address)
1166 g_return_if_fail (address);
1171 /* should not be needed, and otherwise, we probably won't screw up the address
1172 * more than it already is :)
1173 * g_return_val_if_fail (g_utf8_validate (address, -1, NULL), NULL);
1176 /* remove leading whitespace */
1177 if (address[0] == ' ')
1178 g_strchug (address);
1180 for (i = 0; address[i]; ++i) {
1181 if (address[i] == '<') {
1182 if (G_UNLIKELY(i == 0))
1183 return; /* there's nothing else, leave it */
1185 address[i] = '\0'; /* terminate the string here */
1197 modest_text_utils_get_email_address (const gchar *full_address)
1199 const gchar *left, *right;
1201 g_return_val_if_fail (full_address, NULL);
1206 g_return_val_if_fail (g_utf8_validate (full_address, -1, NULL), NULL);
1208 left = g_strrstr_len (full_address, strlen(full_address), "<");
1210 return g_strdup (full_address);
1212 right = g_strstr_len (left, strlen(left), ">");
1214 return g_strdup (full_address);
1216 return g_strndup (left + 1, right - left - 1);
1220 modest_text_utils_get_subject_prefix_len (const gchar *sub)
1222 gint prefix_len = 0;
1224 g_return_val_if_fail (sub, 0);
1229 /* optimization: "Re", "RE", "re","Fwd", "FWD", "fwd","FW","Fw", "fw" */
1230 if (sub[0] != 'R' && sub[0] != 'F' && sub[0] != 'r' && sub[0] != 'f')
1232 else if (sub[0] && sub[1] != 'e' && sub[1] != 'E' && sub[1] != 'w' && sub[1] != 'W')
1239 /* skip over a [...] block */
1240 if (sub[prefix_len] == '[') {
1241 int c = prefix_len + 1;
1242 while (sub[c] && sub[c] != ']')
1245 return 0; /* no end to the ']' found */
1250 /* did we find the ':' ? */
1251 if (sub[prefix_len] == ':') {
1253 if (sub[prefix_len] == ' ')
1255 prefix_len += modest_text_utils_get_subject_prefix_len (sub + prefix_len);
1256 /* g_warning ("['%s','%s']", sub, (char*) sub + prefix_len); */
1264 modest_text_utils_utf8_strcmp (const gchar* s1, const gchar *s2, gboolean insensitive)
1267 /* work even when s1 and/or s2 == NULL */
1268 if (G_UNLIKELY(s1 == s2))
1270 if (G_UNLIKELY(!s1))
1272 if (G_UNLIKELY(!s2))
1275 /* if it's not case sensitive */
1278 /* optimization: shortcut if first char is ascii */
1279 if (((s1[0] & 0xf0)== 0) && ((s2[0] & 0xf0) == 0))
1280 return s1[0] - s2[0];
1282 return g_utf8_collate (s1, s2);
1288 /* optimization: short cut iif first char is ascii */
1289 if (((s1[0] & 0xf0) == 0) && ((s2[0] & 0xf0) == 0))
1290 return tolower(s1[0]) - tolower(s2[0]);
1292 n1 = g_utf8_strdown (s1, -1);
1293 n2 = g_utf8_strdown (s2, -1);
1295 result = g_utf8_collate (n1, n2);
1306 modest_text_utils_get_display_date (time_t date)
1308 #define DATE_BUF_SIZE 64
1309 static gchar date_buf[DATE_BUF_SIZE];
1311 /* calculate the # of days since epoch for
1312 * for today and for the date provided
1313 * based on idea from pvanhoof */
1314 int day = time(NULL) / (24 * 60 * 60);
1315 int date_day = date / (24 * 60 * 60);
1317 /* if it's today, show the time, if it's not today, show the date instead */
1319 if (day == date_day) /* is the date today? */
1320 modest_text_utils_strftime (date_buf, DATE_BUF_SIZE, "%X", date);
1322 modest_text_utils_strftime (date_buf, DATE_BUF_SIZE, "%x", date);
1324 return date_buf; /* this is a static buffer, don't free! */
1330 modest_text_utils_validate_folder_name (const gchar *folder_name)
1332 /* based on http://msdn2.microsoft.com/en-us/library/aa365247.aspx,
1333 * with some extras */
1337 const gchar **cursor = NULL;
1338 const gchar *forbidden_names[] = { /* windows does not like these */
1339 "CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6",
1340 "COM7", "COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
1341 ".", "..", "cur", "tmp", "new", NULL /* cur, tmp new are reserved for Maildir */
1344 /* cannot be NULL */
1348 /* cannot be empty */
1349 len = strlen(folder_name);
1353 /* cannot start with a dot, vfat does not seem to like that */
1354 if (folder_name[0] == '.')
1357 /* cannot start or end with a space */
1358 if (g_ascii_isspace(folder_name[0]) || g_ascii_isspace(folder_name[len - 1]))
1361 /* cannot contain a forbidden char */
1362 for (i = 0; i < len; i++)
1363 if (modest_text_utils_is_forbidden_char (folder_name[i], FOLDER_NAME_FORBIDDEN_CHARS))
1366 /* cannot contain a forbidden word */
1368 for (cursor = forbidden_names; cursor && *cursor; ++cursor) {
1369 if (g_ascii_strcasecmp (folder_name, *cursor) == 0)
1374 return TRUE; /* it's valid! */
1380 modest_text_utils_validate_domain_name (const gchar *domain)
1382 gboolean valid = FALSE;
1384 const gchar* domain_regex = "^([a-z0-9-]*[a-z0-9]\\.)+[a-z0-9-]*[a-z0-9]$";
1386 g_return_val_if_fail (domain, FALSE);
1391 memset (&rx, 0, sizeof(regex_t)); /* coverity wants this... */
1393 /* domain name: all alphanum or '-' or '.',
1394 * but beginning/ending in alphanum */
1395 if (regcomp (&rx, domain_regex, REG_ICASE|REG_EXTENDED|REG_NOSUB)) {
1396 g_warning ("BUG: error in regexp");
1400 valid = (regexec (&rx, domain, 1, NULL, 0) == 0);
1409 modest_text_utils_validate_email_address (const gchar *email_address,
1410 const gchar **invalid_char_position)
1413 const gchar *c = NULL, *domain = NULL;
1414 static gchar *rfc822_specials = "()<>@,;:\\\"[]&";
1416 if (invalid_char_position)
1417 *invalid_char_position = NULL;
1419 g_return_val_if_fail (email_address, FALSE);
1421 /* check that the email adress contains exactly one @ */
1422 if (!strstr(email_address, "@") ||
1423 (strstr(email_address, "@") != g_strrstr(email_address, "@")))
1426 /* first we validate the name portion (name@domain) */
1427 for (c = email_address; *c; c++) {
1429 (c == email_address ||
1431 *(c - 1) == '\"')) {
1435 if (*c == '\\' && (*++c == ' '))
1437 if (*c <= ' ' || *c >= 127)
1450 if (*c <= ' ' || *c >= 127)
1452 if (strchr(rfc822_specials, *c)) {
1453 if (invalid_char_position)
1454 *invalid_char_position = c;
1458 if (c == email_address || *(c - 1) == '.')
1461 /* next we validate the domain portion (name@domain) */
1462 if (!*(domain = ++c))
1466 if (c == domain || *(c - 1) == '.' || *(c + 1) == '\0')
1470 if (*c <= ' ' || *c >= 127)
1472 if (strchr(rfc822_specials, *c)) {
1473 if (invalid_char_position)
1474 *invalid_char_position = c;
1479 return (count >= 1) ? TRUE : FALSE;
1483 modest_text_utils_validate_recipient (const gchar *recipient, const gchar **invalid_char_position)
1485 gchar *stripped, *current;
1487 gboolean has_error = FALSE;
1489 if (invalid_char_position)
1490 *invalid_char_position = NULL;
1492 g_return_val_if_fail (recipient, FALSE);
1494 if (modest_text_utils_validate_email_address (recipient, invalid_char_position))
1497 stripped = g_strdup (recipient);
1498 stripped = g_strstrip (stripped);
1501 if (*current == '\0') {
1507 if (*current == '\"') {
1508 current = g_utf8_next_char (current);
1510 for (; *current != '\0'; current = g_utf8_next_char (current)) {
1511 if (*current == '\\') {
1512 /* TODO: This causes a warning, which breaks the build,
1513 * because a gchar cannot be < 0.
1515 if (current[1] <0) {
1520 } else if (*current == '\"') {
1522 current = g_utf8_next_char (current);
1528 for (current = stripped ; *current != '\0'; current = g_utf8_next_char (current)) {
1529 if (*current == '<') {
1541 right_part = g_strdup (current);
1543 right_part = g_strstrip (right_part);
1545 if (g_str_has_prefix (right_part, "<") &&
1546 g_str_has_suffix (right_part, ">")) {
1550 address = g_strndup (right_part+1, strlen (right_part) - 2);
1551 g_free (right_part);
1552 valid = modest_text_utils_validate_email_address (address, invalid_char_position);
1556 g_free (right_part);
1563 modest_text_utils_get_display_size (guint64 size)
1565 const guint KB=1024;
1566 const guint MB=1024 * KB;
1567 const guint GB=1024 * MB;
1570 return g_strdup_printf(_FM("sfil_li_size_kb"), 0);
1571 if (0 < size && size < KB)
1572 return g_strdup_printf (_FM("sfil_li_size_kb"), 1);
1573 else if (KB <= size && size < 100 * KB)
1574 return g_strdup_printf (_FM("sfil_li_size_1kb_99kb"), size / KB);
1575 else if (100*KB <= size && size < MB)
1576 return g_strdup_printf (_FM("sfil_li_size_100kb_1mb"), (float) size / MB);
1577 else if (MB <= size && size < 10*MB)
1578 return g_strdup_printf (_FM("sfil_li_size_1mb_10mb"), (float) size / MB);
1579 else if (10*MB <= size && size < GB)
1580 return g_strdup_printf (_FM("sfil_li_size_10mb_1gb"), size / MB);
1582 return g_strdup_printf (_FM("sfil_li_size_1gb_or_greater"), (float) size / GB);
1586 get_email_from_address (const gchar * address)
1588 gchar *left_limit, *right_limit;
1590 left_limit = strstr (address, "<");
1591 right_limit = g_strrstr (address, ">");
1593 if ((left_limit == NULL)||(right_limit == NULL)|| (left_limit > right_limit))
1594 return g_strdup (address);
1596 return g_strndup (left_limit + 1, (right_limit - left_limit) - 1);
1600 modest_text_utils_get_color_string (GdkColor *color)
1602 g_return_val_if_fail (color, NULL);
1604 return g_strdup_printf ("#%x%x%x%x%x%x%x%x%x%x%x%x",
1605 (color->red >> 12) & 0xf, (color->red >> 8) & 0xf,
1606 (color->red >> 4) & 0xf, (color->red) & 0xf,
1607 (color->green >> 12) & 0xf, (color->green >> 8) & 0xf,
1608 (color->green >> 4) & 0xf, (color->green) & 0xf,
1609 (color->blue >> 12) & 0xf, (color->blue >> 8) & 0xf,
1610 (color->blue >> 4) & 0xf, (color->blue) & 0xf);
1614 modest_text_utils_text_buffer_get_text (GtkTextBuffer *buffer)
1616 GtkTextIter start, end;
1617 gchar *slice, *current;
1618 GString *result = g_string_new ("");
1620 g_return_val_if_fail (buffer && GTK_IS_TEXT_BUFFER (buffer), NULL);
1622 gtk_text_buffer_get_start_iter (buffer, &start);
1623 gtk_text_buffer_get_end_iter (buffer, &end);
1625 slice = gtk_text_buffer_get_slice (buffer, &start, &end, FALSE);
1628 while (current && current != '\0') {
1629 if (g_utf8_get_char (current) == 0xFFFC) {
1630 result = g_string_append_c (result, ' ');
1631 current = g_utf8_next_char (current);
1633 gchar *next = g_utf8_strchr (current, -1, 0xFFFC);
1635 result = g_string_append (result, current);
1637 result = g_string_append_len (result, current, next - current);
1644 return g_string_free (result, FALSE);
1649 modest_text_utils_is_forbidden_char (const gchar character,
1650 ModestTextUtilsForbiddenCharType type)
1653 const gchar *forbidden_chars = NULL;
1655 /* We need to get the length in the switch because the
1656 compiler needs to know the size at compile time */
1658 case ACCOUNT_TITLE_FORBIDDEN_CHARS:
1659 forbidden_chars = account_title_forbidden_chars;
1660 len = G_N_ELEMENTS (account_title_forbidden_chars);
1662 case FOLDER_NAME_FORBIDDEN_CHARS:
1663 forbidden_chars = folder_name_forbidden_chars;
1664 len = G_N_ELEMENTS (folder_name_forbidden_chars);
1666 case USER_NAME_FORBIDDEN_NAMES:
1667 forbidden_chars = user_name_forbidden_chars;
1668 len = G_N_ELEMENTS (user_name_forbidden_chars);
1671 g_return_val_if_reached (TRUE);
1674 for (i = 0; i < len ; i++)
1675 if (forbidden_chars[i] == character)
1678 return FALSE; /* it's valid! */
1682 modest_text_utils_label_get_selection (GtkLabel *label)
1687 if (gtk_label_get_selection_bounds (GTK_LABEL (label), &start, &end)) {
1688 const gchar *start_offset;
1689 const gchar *end_offset;
1690 start_offset = gtk_label_get_text (GTK_LABEL (label));
1691 start_offset = g_utf8_offset_to_pointer (start_offset, start);
1692 end_offset = gtk_label_get_text (GTK_LABEL (label));
1693 end_offset = g_utf8_offset_to_pointer (end_offset, end);
1694 selection = g_strndup (start_offset, end_offset - start_offset);
1697 return g_strdup ("");
1702 _forward_search_image_char (gunichar ch,
1705 return (ch == 0xFFFC);
1709 modest_text_utils_buffer_selection_is_valid (GtkTextBuffer *buffer)
1712 GtkTextIter start, end;
1714 g_return_val_if_fail (GTK_IS_TEXT_BUFFER (buffer), FALSE);
1716 result = gtk_text_buffer_get_has_selection (GTK_TEXT_BUFFER (buffer));
1718 /* check there are no images in selection */
1720 gtk_text_buffer_get_selection_bounds (buffer, &start, &end);
1721 if (gtk_text_iter_get_char (&start)== 0xFFFC)
1724 gtk_text_iter_backward_char (&end);
1725 if (gtk_text_iter_forward_find_char (&start, _forward_search_image_char,
1736 modest_text_utils_escape_mnemonics (const gchar *text)
1739 GString *result = NULL;
1744 result = g_string_new ("");
1745 for (p = text; *p != '\0'; p++) {
1747 result = g_string_append (result, "__");
1749 result = g_string_append_c (result, *p);
1752 return g_string_free (result, FALSE);