1 /* Copyright (c) 2006, Nokia Corporation
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of the Nokia Corporation nor the names of its
14 * contributors may be used to endorse or promote products derived from
15 * this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
18 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
20 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
21 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #endif /*_GNU_SOURCE*/
35 #include <string.h> /* for strcasestr */
40 #include <glib/gi18n.h>
42 #include <modest-tny-platform-factory.h>
43 #include <modest-text-utils.h>
44 #include <modest-runtime.h>
49 #endif /*HAVE_CONFIG_H */
52 #define FORWARD_STRING _("mcen_ia_editor_original_message")
53 #define FROM_STRING _("mail_va_from")
54 #define SENT_STRING _("mcen_fi_message_properties_sent")
55 #define TO_STRING _("mail_va_to")
56 #define SUBJECT_STRING _("mail_va_subject")
57 #define EMPTY_STRING ""
60 * do the hyperlinkification only for texts < 50 Kb,
61 * as it's quite slow. Without this, e.g. mail with
62 * an uuencoded part (which is not recognized as attachment,
65 #define HYPERLINKIFY_MAX_LENGTH (1024*50)
70 * we need these regexps to find URLs in plain text e-mails
72 typedef struct _url_match_pattern_t url_match_pattern_t;
73 struct _url_match_pattern_t {
79 typedef struct _url_match_t url_match_t;
88 * we mark the ampersand with \007 when converting text->html
89 * because after text->html we do hyperlink detecting, which
90 * could be screwed up by the ampersand.
91 * ie. 1<3 ==> 1\007lt;3
93 #define MARK_AMP '\007'
94 #define MARK_AMP_STR "\007"
96 /* mark & separately, because they are parts of urls.
97 * ie. a&b => a\006amp;b, but a>b => a\007gt;b
99 * we need to handle '&' separately, because it can be part of URIs
100 * (as in href="http://foo.bar?a=1&b=1"), so inside those URIs
101 * we need to re-replace \006amp; with '&' again, while outside uri's
104 * yes, it's messy, but a consequence of doing text->html first, then hyperlinkify
106 #define MARK_AMP_URI '\006'
107 #define MARK_AMP_URI_STR "\006"
110 /* note: match MARK_AMP_URI_STR as well, because after txt->html, a '&' will look like $(MARK_AMP_URI_STR)"amp;" */
111 #define MAIL_VIEWER_URL_MATCH_PATTERNS { \
112 { "(file|rtsp|http|ftp|https|mms|mmsh|rtsp|rdp|lastfm)://[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR \
113 "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]", \
115 { "www\\.[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]",\
117 { "ftp\\.[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]",\
119 { "(jabberto|voipto|sipto|sip|chatto|xmpp):[-_a-z@0-9.+]+", \
121 { "mailto:[-_a-z0-9.\\+]+@[-_a-z0-9.]+", \
123 { "[-_a-z0-9.\\+]+@[-_a-z0-9.]+",\
127 const gchar account_title_forbidden_chars[] = {
128 '\\', '/', ':', '*', '?', '\'', '<', '>', '|', '^'
130 const gchar folder_name_forbidden_chars[] = {
131 '<', '>', ':', '\'', '/', '\\', '|', '?', '*', '^', '%', '$', '#', '&'
133 const gchar user_name_forbidden_chars[] = {
136 const guint ACCOUNT_TITLE_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (account_title_forbidden_chars);
137 const guint FOLDER_NAME_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (folder_name_forbidden_chars);
138 const guint USER_NAME_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (user_name_forbidden_chars);
141 static gchar* cite (const time_t sent_date, const gchar *from);
142 static void hyperlinkify_plain_text (GString *txt, gint offset);
143 static gint cmp_offsets_reverse (const url_match_t *match1, const url_match_t *match2);
144 static GSList* get_url_matches (GString *txt, gint offset);
146 static GString* get_next_line (const char *b, const gsize blen, const gchar * iter);
147 static int get_indent_level (const char *l);
148 static void unquote_line (GString * l);
149 static void append_quoted (GString * buf, const int indent, const GString * str,
151 static int get_breakpoint_utf8 (const gchar * s, const gint indent, const gint limit);
152 static int get_breakpoint_ascii (const gchar * s, const gint indent, const gint limit);
153 static int get_breakpoint (const gchar * s, const gint indent, const gint limit);
155 static gchar* modest_text_utils_quote_plain_text (const gchar *text,
157 const gchar *signature,
161 static gchar* modest_text_utils_quote_html (const gchar *text,
163 const gchar *signature,
166 static gchar* get_email_from_address (const gchar *address);
169 /* ******************************************************************* */
170 /* ************************* PUBLIC FUNCTIONS ************************ */
171 /* ******************************************************************* */
174 modest_text_utils_quote (const gchar *text,
175 const gchar *content_type,
176 const gchar *signature,
178 const time_t sent_date,
182 gchar *retval, *cited;
184 g_return_val_if_fail (text, NULL);
185 g_return_val_if_fail (content_type, NULL);
187 cited = cite (sent_date, from);
189 if (content_type && strcmp (content_type, "text/html") == 0)
190 /* TODO: extract the <body> of the HTML and pass it to
192 retval = modest_text_utils_quote_html (text, cited, signature, attachments, limit);
194 retval = modest_text_utils_quote_plain_text (text, cited, signature, attachments, limit);
203 modest_text_utils_cite (const gchar *text,
204 const gchar *content_type,
205 const gchar *signature,
212 g_return_val_if_fail (text, NULL);
213 g_return_val_if_fail (content_type, NULL);
216 retval = g_strdup ("");
217 else if (strcmp(content_type, "text/html") == 0) {
218 tmp_sig = g_strconcat ("\n", signature, NULL);
219 retval = modest_text_utils_convert_to_html_body(tmp_sig, -1, TRUE);
222 retval = g_strconcat (text, "\n", signature, NULL);
229 forward_cite (const gchar *from,
232 const gchar *subject)
234 g_return_val_if_fail (sent, NULL);
236 return g_strdup_printf ("%s\n%s %s\n%s %s\n%s %s\n%s %s\n",
238 FROM_STRING, (from)?from:"",
240 TO_STRING, (to)?to:"",
241 SUBJECT_STRING, (subject)?subject:"");
245 modest_text_utils_inline (const gchar *text,
246 const gchar *content_type,
247 const gchar *signature,
251 const gchar *subject)
257 g_return_val_if_fail (text, NULL);
258 g_return_val_if_fail (content_type, NULL);
260 modest_text_utils_strftime (sent_str, 100, "%c", sent_date);
262 cited = forward_cite (from, sent_str, to, subject);
264 if (content_type && strcmp (content_type, "text/html") == 0)
265 retval = modest_text_utils_quote_html (text, cited, signature, NULL, 80);
267 retval = modest_text_utils_quote_plain_text (text, cited, signature, NULL, 80);
273 /* just to prevent warnings:
274 * warning: `%x' yields only last 2 digits of year in some locales
277 modest_text_utils_strftime(char *s, gsize max, const char *fmt, time_t timet)
281 /* does not work on old maemo glib:
282 * g_date_set_time_t (&date, timet);
284 localtime_r (&timet, &tm);
285 return strftime(s, max, fmt, &tm);
289 modest_text_utils_derived_subject (const gchar *subject, const gchar *prefix)
293 g_return_val_if_fail (prefix, NULL);
295 if (!subject || subject[0] == '\0')
296 subject = _("mail_va_no_subject");
298 tmp = g_strchug (g_strdup (subject));
300 if (!strncmp (tmp, prefix, strlen (prefix))) {
304 return g_strdup_printf ("%s %s", prefix, subject);
309 modest_text_utils_remove_address (const gchar *address_list, const gchar *address)
311 gchar *dup, *token, *ptr = NULL, *result;
312 GString *filtered_emails;
313 gchar *email_address;
315 g_return_val_if_fail (address_list, NULL);
318 return g_strdup (address_list);
320 email_address = get_email_from_address (address);
322 /* search for substring */
323 if (!strstr ((const char *) address_list, (const char *) email_address)) {
324 g_free (email_address);
325 return g_strdup (address_list);
328 dup = g_strdup (address_list);
329 filtered_emails = g_string_new (NULL);
331 token = strtok_r (dup, ",", &ptr);
333 while (token != NULL) {
334 /* Add to list if not found */
335 if (!strstr ((const char *) token, (const char *) email_address)) {
336 if (filtered_emails->len == 0)
337 g_string_append_printf (filtered_emails, "%s", g_strstrip (token));
339 g_string_append_printf (filtered_emails, ",%s", g_strstrip (token));
341 token = strtok_r (NULL, ",", &ptr);
343 result = filtered_emails->str;
346 g_free (email_address);
348 g_string_free (filtered_emails, FALSE);
355 modest_text_utils_remove_duplicate_addresses (const gchar *address_list)
357 GSList *addresses, *cursor;
361 g_return_val_if_fail (address_list, NULL);
363 table = g_hash_table_new (g_str_hash, g_str_equal);
364 addresses = modest_text_utils_split_addresses_list (address_list);
366 new_list = g_strdup("");
369 const gchar* address = (const gchar*)cursor->data;
371 /* ignore the address if already seen */
372 if (g_hash_table_lookup (table, address) == 0) {
374 gchar *tmp = g_strjoin (",", new_list, address, NULL);
378 g_hash_table_insert (table, (gchar*)address, GINT_TO_POINTER(1));
380 cursor = g_slist_next (cursor);
383 g_hash_table_destroy (table);
384 g_slist_foreach (addresses, (GFunc)g_free, NULL);
385 g_slist_free (addresses);
392 modest_text_utils_convert_buffer_to_html_start (GString *html, const gchar *data, gssize n)
395 gboolean space_seen = FALSE;
396 guint break_dist = 0; /* distance since last break point */
401 /* replace with special html chars where needed*/
402 for (i = 0; i != n; ++i) {
403 guchar kar = data[i];
405 if (space_seen && kar != ' ') {
406 g_string_append_c (html, ' ');
410 /* we artificially insert a breakpoint (newline)
411 * after 256, to make sure our lines are not so long
412 * they will DOS the regexping later
413 * Also, check that kar is ASCII to make sure that we
414 * don't break a UTF8 char in two
416 if (++break_dist >= 256 && kar < 127) {
417 g_string_append_c (html, '\n');
425 /* this is a temp place holder for '&'; we can only
426 * set the real '&' after hyperlink translation, otherwise
427 * we might screw that up */
428 break; /* ignore embedded \0s and MARK_AMP */
429 case '<' : g_string_append (html, MARK_AMP_STR "lt;"); break;
430 case '>' : g_string_append (html, MARK_AMP_STR "gt;"); break;
431 case '&' : g_string_append (html, MARK_AMP_URI_STR "amp;"); break; /* special case */
432 case '"' : g_string_append (html, MARK_AMP_STR "quot;"); break;
434 /* don't convert ' --> wpeditor will try to re-convert it... */
435 //case '\'' : g_string_append (html, "'"); break;
436 case '\n' : g_string_append (html, "<br>\n");break_dist= 0; break;
437 case '\t' : g_string_append (html, MARK_AMP_STR "nbsp;" MARK_AMP_STR "nbsp;" MARK_AMP_STR "nbsp; ");
438 break_dist=0; break; /* note the space at the end*/
441 if (space_seen) { /* second space in a row */
442 g_string_append (html, " ");
448 g_string_append_c (html, kar);
455 modest_text_utils_convert_buffer_to_html_finish (GString *html)
458 /* replace all our MARK_AMPs with real ones */
459 for (i = 0; i != html->len; ++i)
460 if ((html->str)[i] == MARK_AMP || (html->str)[i] == MARK_AMP_URI)
461 (html->str)[i] = '&';
466 modest_text_utils_convert_to_html (const gchar *data)
471 g_return_val_if_fail (data, NULL);
477 html = g_string_sized_new (1.5 * len); /* just a guess... */
479 g_string_append_printf (html,
481 "<meta http-equiv=\"content-type\" content=\"text/html; charset=utf8\">"
485 modest_text_utils_convert_buffer_to_html_start (html, data, -1);
487 g_string_append (html, "</body></html>");
489 if (len <= HYPERLINKIFY_MAX_LENGTH)
490 hyperlinkify_plain_text (html, 0);
492 modest_text_utils_convert_buffer_to_html_finish (html);
494 return g_string_free (html, FALSE);
498 modest_text_utils_convert_to_html_body (const gchar *data, gssize n, gboolean hyperlinkify)
502 g_return_val_if_fail (data, NULL);
509 html = g_string_sized_new (1.5 * n); /* just a guess... */
511 modest_text_utils_convert_buffer_to_html_start (html, data, n);
513 if (hyperlinkify && (n < HYPERLINKIFY_MAX_LENGTH))
514 hyperlinkify_plain_text (html, 0);
516 modest_text_utils_convert_buffer_to_html_finish (html);
518 return g_string_free (html, FALSE);
522 modest_text_utils_get_addresses_indexes (const gchar *addresses, GSList **start_indexes, GSList **end_indexes)
524 gchar *current, *start, *last_blank;
525 gint start_offset = 0, current_offset = 0;
527 g_return_if_fail (start_indexes != NULL);
528 g_return_if_fail (end_indexes != NULL);
530 start = (gchar *) addresses;
534 while (*current != '\0') {
535 if ((start == current)&&((*current == ' ')||(*current == ',')||(*current == ';'))) {
536 start = g_utf8_next_char (start);
538 last_blank = current;
539 } else if ((*current == ',')||(*current == ';')) {
540 gint *start_index, *end_index;
541 start_index = g_new0(gint, 1);
542 end_index = g_new0(gint, 1);
543 *start_index = start_offset;
544 *end_index = current_offset;
545 *start_indexes = g_slist_prepend (*start_indexes, start_index);
546 *end_indexes = g_slist_prepend (*end_indexes, end_index);
547 start = g_utf8_next_char (current);
548 start_offset = current_offset + 1;
550 } else if (*current == '"') {
551 current = g_utf8_next_char (current);
553 while ((*current != '"')&&(*current != '\0')) {
554 current = g_utf8_next_char (current);
559 current = g_utf8_next_char (current);
563 if (start != current) {
564 gint *start_index, *end_index;
565 start_index = g_new0(gint, 1);
566 end_index = g_new0(gint, 1);
567 *start_index = start_offset;
568 *end_index = current_offset;
569 *start_indexes = g_slist_prepend (*start_indexes, start_index);
570 *end_indexes = g_slist_prepend (*end_indexes, end_index);
573 *start_indexes = g_slist_reverse (*start_indexes);
574 *end_indexes = g_slist_reverse (*end_indexes);
581 modest_text_utils_split_addresses_list (const gchar *addresses)
584 const gchar *my_addrs = addresses;
588 g_return_val_if_fail (addresses, NULL);
590 /* skip any space, ',', ';' at the start */
591 while (my_addrs && (my_addrs[0] == ' ' || my_addrs[0] == ',' || my_addrs[0] == ';'))
594 /* are we at the end of addresses list? */
598 /* nope, we are at the start of some address
599 * now, let's find the end of the address */
601 while (end[0] && end[0] != ',' && end[0] != ';')
604 /* we got the address; copy it and remove trailing whitespace */
605 addr = g_strndup (my_addrs, end - my_addrs);
608 head = g_slist_append (NULL, addr);
609 head->next = modest_text_utils_split_addresses_list (end); /* recurse */
616 modest_text_utils_address_range_at_position (const gchar *recipients_list,
621 gchar *current = NULL;
622 gint range_start = 0;
625 gboolean is_quoted = FALSE;
627 g_return_if_fail (recipients_list);
628 g_return_if_fail (position < g_utf8_strlen(recipients_list, -1));
631 for (current = (gchar *) recipients_list; *current != '\0';
632 current = g_utf8_find_next_char (current, NULL)) {
633 gunichar c = g_utf8_get_char (current);
635 if ((c == ',') && (!is_quoted)) {
636 if (index < position) {
637 range_start = index + 1;
641 } else if (c == '\"') {
642 is_quoted = !is_quoted;
643 } else if ((c == ' ') &&(range_start == index)) {
651 *start = range_start;
657 modest_text_utils_address_with_standard_length (const gchar *recipients_list)
661 GString *buffer = g_string_new ("");
663 splitted = g_strsplit (recipients_list, "\n", 0);
667 if (current != splitted)
668 buffer = g_string_append_c (buffer, '\n');
669 line = g_strndup (*splitted, 1000);
670 buffer = g_string_append (buffer, line);
675 g_strfreev (splitted);
677 return g_string_free (buffer, FALSE);
681 /* ******************************************************************* */
682 /* ************************* UTILIY FUNCTIONS ************************ */
683 /* ******************************************************************* */
686 get_next_line (const gchar * b, const gsize blen, const gchar * iter)
692 return g_string_new("");
700 gs = g_string_new_len (i0, iter - i0);
704 get_indent_level (const char *l)
721 /* if we hit the signature marker "-- ", we return -(indent + 1). This
722 * stops reformatting.
724 if (strcmp (l, "-- ") == 0) {
732 unquote_line (GString * l)
747 g_string_erase (l, 0, p - l->str);
751 append_quoted (GString * buf, int indent, const GString * str,
756 indent = indent < 0 ? abs (indent) - 1 : indent;
757 for (i = 0; i <= indent; i++) {
758 g_string_append (buf, "> ");
761 g_string_append_len (buf, str->str, cutpoint);
763 g_string_append (buf, str->str);
765 g_string_append (buf, "\n");
769 get_breakpoint_utf8 (const gchar * s, gint indent, const gint limit)
772 const gchar *pos, *last;
775 indent = indent < 0 ? abs (indent) - 1 : indent;
779 uni = g_utf8_to_ucs4_fast (s, -1, NULL);
781 if ((index + 2 * indent > limit) && last) {
785 if (g_unichar_isspace (uni[index])) {
788 pos = g_utf8_next_char (pos);
796 get_breakpoint_ascii (const gchar * s, const gint indent, const gint limit)
801 if (last + 2 * indent < limit)
804 for (i = strlen (s); i > 0; i--) {
806 if (i + 2 * indent <= limit) {
817 get_breakpoint (const gchar * s, const gint indent, const gint limit)
820 if (g_utf8_validate (s, -1, NULL)) {
821 return get_breakpoint_utf8 (s, indent, limit);
822 } else { /* assume ASCII */
823 //g_warning("invalid UTF-8 in msg");
824 return get_breakpoint_ascii (s, indent, limit);
829 cite (const time_t sent_date, const gchar *from)
831 return g_strdup (_("mcen_ia_editor_original_message"));
835 quoted_attachments (GList *attachments)
838 GString *result = g_string_new ("");
839 for (node = attachments; node != NULL; node = g_list_next (node)) {
840 gchar *filename = (gchar *) node->data;
841 g_string_append_printf ( result, "%s %s\n", _("mcen_ia_editor_attach_filename"), filename);
844 return g_string_free (result, FALSE);
849 modest_text_utils_quote_plain_text (const gchar *text,
851 const gchar *signature,
856 gint indent, breakpoint, rem_indent = 0;
857 GString *q, *l, *remaining;
859 gchar *attachments_string = NULL;
861 q = g_string_new ("\n");
862 if (signature != NULL) {
863 q = g_string_append (q, signature);
864 q = g_string_append_c (q, '\n');
866 q = g_string_append (q, cite);
867 q = g_string_append_c (q, '\n');
869 /* remaining will store the rest of the line if we have to break it */
870 remaining = g_string_new ("");
875 l = get_next_line (text, len, iter);
876 iter = iter + l->len + 1;
877 indent = get_indent_level (l->str);
880 if (remaining->len) {
881 if (l->len && indent == rem_indent) {
882 g_string_prepend (l, " ");
883 g_string_prepend (l, remaining->str);
887 get_breakpoint (remaining->str,
890 append_quoted (q, rem_indent,
891 remaining, breakpoint);
892 g_string_erase (remaining, 0,
894 if (remaining->str[0] == ' ') {
895 g_string_erase (remaining, 0,
898 } while (remaining->len);
901 g_string_free (remaining, TRUE);
902 breakpoint = get_breakpoint (l->str, indent, limit);
903 remaining = g_string_new (l->str + breakpoint);
904 if (remaining->str[0] == ' ') {
905 g_string_erase (remaining, 0, 1);
908 append_quoted (q, indent, l, breakpoint);
909 g_string_free (l, TRUE);
910 } while ((iter < text + len) || (remaining->str[0]));
912 attachments_string = quoted_attachments (attachments);
913 q = g_string_append (q, attachments_string);
914 g_free (attachments_string);
916 return g_string_free (q, FALSE);
920 modest_text_utils_quote_html (const gchar *text,
922 const gchar *signature,
926 gchar *result = NULL;
927 gchar *signature_result = NULL;
928 const gchar *format = \
929 "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n" \
933 "<pre>%s<br/>%s<br/>%s</pre>\n" \
936 gchar *attachments_string = NULL;
937 gchar *q_attachments_string = NULL;
938 gchar *q_cite = NULL;
939 gchar *html_text = NULL;
941 if (signature == NULL)
942 signature_result = g_strdup ("");
944 signature_result = modest_text_utils_convert_to_html_body (signature, -1, TRUE);
946 attachments_string = quoted_attachments (attachments);
947 q_attachments_string = modest_text_utils_convert_to_html_body (attachments_string, -1, TRUE);
948 q_cite = modest_text_utils_convert_to_html_body (cite, -1, TRUE);
949 html_text = modest_text_utils_convert_to_html_body (text, -1, TRUE);
950 result = g_strdup_printf (format, signature_result, q_cite, html_text, q_attachments_string);
953 g_free (attachments_string);
954 g_free (q_attachments_string);
955 g_free (signature_result);
961 cmp_offsets_reverse (const url_match_t *match1, const url_match_t *match2)
963 return match2->offset - match1->offset;
966 static gboolean url_matches_block = 0;
967 static url_match_pattern_t patterns[] = MAIL_VIEWER_URL_MATCH_PATTERNS;
974 const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
975 for (i = 0; i != pattern_num; ++i) {
976 patterns[i].preg = g_slice_new0 (regex_t);
978 /* this should not happen */
979 if (regcomp (patterns[i].preg, patterns[i].regex,
980 REG_ICASE|REG_EXTENDED|REG_NEWLINE) != 0) {
981 g_warning ("%s: error in regexp:\n%s\n", __FUNCTION__, patterns[i].regex);
992 const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
993 for (i = 0; i != pattern_num; ++i) {
994 regfree (patterns[i].preg);
995 g_slice_free (regex_t, patterns[i].preg);
996 } /* don't free patterns itself -- it's static */
1000 modest_text_utils_hyperlinkify_begin (void)
1002 if (url_matches_block == 0)
1003 compile_patterns ();
1004 url_matches_block ++;
1008 modest_text_utils_hyperlinkify_end (void)
1010 url_matches_block--;
1011 if (url_matches_block <= 0)
1017 get_url_matches (GString *txt, gint offset)
1020 guint rv, i, tmp_offset = 0;
1021 GSList *match_list = NULL;
1023 const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
1025 /* initalize the regexps */
1026 modest_text_utils_hyperlinkify_begin ();
1028 /* find all the matches */
1029 for (i = 0; i != pattern_num; ++i) {
1030 tmp_offset = offset;
1033 gboolean is_submatch;
1036 if ((rv = regexec (patterns[i].preg, txt->str + tmp_offset, 1, &rm, 0)) != 0) {
1037 g_return_val_if_fail (rv == REG_NOMATCH, NULL); /* this should not happen */
1038 break; /* try next regexp */
1043 is_submatch = FALSE;
1044 /* check old matches to see if this has already been matched */
1045 cursor = match_list;
1046 while (cursor && !is_submatch) {
1047 const url_match_t *old_match =
1048 (const url_match_t *) cursor->data;
1049 guint new_offset = tmp_offset + rm.rm_so;
1050 is_submatch = (new_offset > old_match->offset &&
1051 new_offset < old_match->offset + old_match->len);
1052 cursor = g_slist_next (cursor);
1056 /* make a list of our matches (<offset, len, prefix> tupels)*/
1057 match = g_slice_new (url_match_t);
1058 match->offset = tmp_offset + rm.rm_so;
1059 match->len = rm.rm_eo - rm.rm_so;
1060 match->prefix = patterns[i].prefix;
1061 match_list = g_slist_prepend (match_list, match);
1063 tmp_offset += rm.rm_eo;
1067 modest_text_utils_hyperlinkify_end ();
1069 /* now sort the list, so the matches are in reverse order of occurence.
1070 * that way, we can do the replacements starting from the end, so we don't need
1071 * to recalculate the offsets
1073 match_list = g_slist_sort (match_list,
1074 (GCompareFunc)cmp_offsets_reverse);
1080 /* replace all occurences of needle in haystack with repl*/
1082 replace_string (const gchar *haystack, const gchar *needle, gchar repl)
1084 gchar *str, *cursor;
1086 if (!haystack || !needle || strlen(needle) == 0)
1087 return haystack ? g_strdup(haystack) : NULL;
1089 str = g_strdup (haystack);
1091 for (cursor = str; cursor && *cursor; ++cursor) {
1092 if (g_str_has_prefix (cursor, needle)) {
1094 memmove (cursor + 1,
1095 cursor + strlen (needle),
1096 strlen (cursor + strlen (needle)) + 1);
1104 hyperlinkify_plain_text (GString *txt, gint offset)
1107 GSList *match_list = get_url_matches (txt, offset);
1109 /* we will work backwards, so the offsets stay valid */
1110 for (cursor = match_list; cursor; cursor = cursor->next) {
1112 url_match_t *match = (url_match_t*) cursor->data;
1113 gchar *url = g_strndup (txt->str + match->offset, match->len);
1114 gchar *repl = NULL; /* replacement */
1116 /* the string still contains $(MARK_AMP_URI_STR)"amp;" for each
1117 * '&' in the original, because of the text->html conversion.
1118 * in the href-URL (and only there), we must convert that back to
1121 gchar *href_url = replace_string (url, MARK_AMP_URI_STR "amp;", '&');
1123 /* the prefix is NULL: use the one that is already there */
1124 repl = g_strdup_printf ("<a href=\"%s%s\">%s</a>",
1125 match->prefix ? match->prefix : EMPTY_STRING,
1128 /* replace the old thing with our hyperlink
1129 * replacement thing */
1130 g_string_erase (txt, match->offset, match->len);
1131 g_string_insert (txt, match->offset, repl);
1137 g_slice_free (url_match_t, match);
1140 g_slist_free (match_list);
1144 modest_text_utils_hyperlinkify (GString *string_buffer)
1149 after_body = strstr (string_buffer->str, "<body>");
1150 if (after_body != NULL)
1151 offset = after_body - string_buffer->str;
1152 hyperlinkify_plain_text (string_buffer, offset);
1156 /* for optimization reasons, we change the string in-place */
1158 modest_text_utils_get_display_address (gchar *address)
1162 g_return_if_fail (address);
1167 /* should not be needed, and otherwise, we probably won't screw up the address
1168 * more than it already is :)
1169 * g_return_val_if_fail (g_utf8_validate (address, -1, NULL), NULL);
1172 /* remove leading whitespace */
1173 if (address[0] == ' ')
1174 g_strchug (address);
1176 for (i = 0; address[i]; ++i) {
1177 if (address[i] == '<') {
1178 if (G_UNLIKELY(i == 0))
1179 return; /* there's nothing else, leave it */
1181 address[i] = '\0'; /* terminate the string here */
1193 modest_text_utils_get_email_address (const gchar *full_address)
1195 const gchar *left, *right;
1197 g_return_val_if_fail (full_address, NULL);
1202 g_return_val_if_fail (g_utf8_validate (full_address, -1, NULL), NULL);
1204 left = g_strrstr_len (full_address, strlen(full_address), "<");
1206 return g_strdup (full_address);
1208 right = g_strstr_len (left, strlen(left), ">");
1210 return g_strdup (full_address);
1212 return g_strndup (left + 1, right - left - 1);
1216 modest_text_utils_get_subject_prefix_len (const gchar *sub)
1218 gint prefix_len = 0;
1220 g_return_val_if_fail (sub, 0);
1225 /* optimization: "Re", "RE", "re","Fwd", "FWD", "fwd","FW","Fw", "fw" */
1226 if (sub[0] != 'R' && sub[0] != 'F' && sub[0] != 'r' && sub[0] != 'f')
1228 else if (sub[0] && sub[1] != 'e' && sub[1] != 'E' && sub[1] != 'w' && sub[1] != 'W')
1235 /* skip over a [...] block */
1236 if (sub[prefix_len] == '[') {
1237 int c = prefix_len + 1;
1238 while (sub[c] && sub[c] != ']')
1241 return 0; /* no end to the ']' found */
1246 /* did we find the ':' ? */
1247 if (sub[prefix_len] == ':') {
1249 if (sub[prefix_len] == ' ')
1251 prefix_len += modest_text_utils_get_subject_prefix_len (sub + prefix_len);
1252 /* g_warning ("['%s','%s']", sub, (char*) sub + prefix_len); */
1260 modest_text_utils_utf8_strcmp (const gchar* s1, const gchar *s2, gboolean insensitive)
1263 /* work even when s1 and/or s2 == NULL */
1264 if (G_UNLIKELY(s1 == s2))
1266 if (G_UNLIKELY(!s1))
1268 if (G_UNLIKELY(!s2))
1271 /* if it's not case sensitive */
1274 /* optimization: shortcut if first char is ascii */
1275 if (((s1[0] & 0xf0)== 0) && ((s2[0] & 0xf0) == 0))
1276 return s1[0] - s2[0];
1278 return g_utf8_collate (s1, s2);
1284 /* optimization: short cut iif first char is ascii */
1285 if (((s1[0] & 0xf0) == 0) && ((s2[0] & 0xf0) == 0))
1286 return tolower(s1[0]) - tolower(s2[0]);
1288 n1 = g_utf8_strdown (s1, -1);
1289 n2 = g_utf8_strdown (s2, -1);
1291 result = g_utf8_collate (n1, n2);
1302 modest_text_utils_get_display_date (time_t date)
1304 #define DATE_BUF_SIZE 64
1305 static gchar date_buf[DATE_BUF_SIZE];
1307 /* calculate the # of days since epoch for
1308 * for today and for the date provided
1309 * based on idea from pvanhoof */
1310 int day = time(NULL) / (24 * 60 * 60);
1311 int date_day = date / (24 * 60 * 60);
1313 /* if it's today, show the time, if it's not today, show the date instead */
1315 if (day == date_day) /* is the date today? */
1316 modest_text_utils_strftime (date_buf, DATE_BUF_SIZE, "%X", date);
1318 modest_text_utils_strftime (date_buf, DATE_BUF_SIZE, "%x", date);
1320 return date_buf; /* this is a static buffer, don't free! */
1326 modest_text_utils_validate_folder_name (const gchar *folder_name)
1328 /* based on http://msdn2.microsoft.com/en-us/library/aa365247.aspx,
1329 * with some extras */
1333 const gchar **cursor = NULL;
1334 const gchar *forbidden_names[] = { /* windows does not like these */
1335 "CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6",
1336 "COM7", "COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
1337 ".", "..", "cur", "tmp", "new", NULL /* cur, tmp new are reserved for Maildir */
1340 /* cannot be NULL */
1344 /* cannot be empty */
1345 len = strlen(folder_name);
1349 /* cannot start with a dot, vfat does not seem to like that */
1350 if (folder_name[0] == '.')
1353 /* cannot start or end with a space */
1354 if (g_ascii_isspace(folder_name[0]) || g_ascii_isspace(folder_name[len - 1]))
1357 /* cannot contain a forbidden char */
1358 for (i = 0; i < len; i++)
1359 if (modest_text_utils_is_forbidden_char (folder_name[i], FOLDER_NAME_FORBIDDEN_CHARS))
1362 /* cannot contain a forbidden word */
1364 for (cursor = forbidden_names; cursor && *cursor; ++cursor) {
1365 if (g_ascii_strcasecmp (folder_name, *cursor) == 0)
1370 return TRUE; /* it's valid! */
1376 modest_text_utils_validate_domain_name (const gchar *domain)
1378 gboolean valid = FALSE;
1380 const gchar* domain_regex = "^([a-z0-9-]*[a-z0-9]\\.)+[a-z0-9-]*[a-z0-9]$";
1382 g_return_val_if_fail (domain, FALSE);
1387 memset (&rx, 0, sizeof(regex_t)); /* coverity wants this... */
1389 /* domain name: all alphanum or '-' or '.',
1390 * but beginning/ending in alphanum */
1391 if (regcomp (&rx, domain_regex, REG_ICASE|REG_EXTENDED|REG_NOSUB)) {
1392 g_warning ("BUG: error in regexp");
1396 valid = (regexec (&rx, domain, 1, NULL, 0) == 0);
1405 modest_text_utils_validate_email_address (const gchar *email_address,
1406 const gchar **invalid_char_position)
1409 const gchar *c = NULL, *domain = NULL;
1410 static gchar *rfc822_specials = "()<>@,;:\\\"[]&";
1412 if (invalid_char_position)
1413 *invalid_char_position = NULL;
1415 g_return_val_if_fail (email_address, FALSE);
1417 /* check that the email adress contains exactly one @ */
1418 if (!strstr(email_address, "@") ||
1419 (strstr(email_address, "@") != g_strrstr(email_address, "@")))
1422 /* first we validate the name portion (name@domain) */
1423 for (c = email_address; *c; c++) {
1425 (c == email_address ||
1427 *(c - 1) == '\"')) {
1431 if (*c == '\\' && (*++c == ' '))
1433 if (*c <= ' ' || *c >= 127)
1446 if (*c <= ' ' || *c >= 127)
1448 if (strchr(rfc822_specials, *c)) {
1449 if (invalid_char_position)
1450 *invalid_char_position = c;
1454 if (c == email_address || *(c - 1) == '.')
1457 /* next we validate the domain portion (name@domain) */
1458 if (!*(domain = ++c))
1462 if (c == domain || *(c - 1) == '.' || *(c + 1) == '\0')
1466 if (*c <= ' ' || *c >= 127)
1468 if (strchr(rfc822_specials, *c)) {
1469 if (invalid_char_position)
1470 *invalid_char_position = c;
1475 return (count >= 1) ? TRUE : FALSE;
1479 modest_text_utils_validate_recipient (const gchar *recipient, const gchar **invalid_char_position)
1481 gchar *stripped, *current;
1483 gboolean has_error = FALSE;
1485 if (invalid_char_position)
1486 *invalid_char_position = NULL;
1488 g_return_val_if_fail (recipient, FALSE);
1490 if (modest_text_utils_validate_email_address (recipient, invalid_char_position))
1493 stripped = g_strdup (recipient);
1494 stripped = g_strstrip (stripped);
1497 if (*current == '\0') {
1503 if (*current == '\"') {
1504 current = g_utf8_next_char (current);
1506 for (; *current != '\0'; current = g_utf8_next_char (current)) {
1507 if (*current == '\\') {
1508 /* TODO: This causes a warning, which breaks the build,
1509 * because a gchar cannot be < 0.
1511 if (current[1] <0) {
1516 } else if (*current == '\"') {
1518 current = g_utf8_next_char (current);
1524 for (current = stripped ; *current != '\0'; current = g_utf8_next_char (current)) {
1525 if (*current == '<') {
1537 right_part = g_strdup (current);
1539 right_part = g_strstrip (right_part);
1541 if (g_str_has_prefix (right_part, "<") &&
1542 g_str_has_suffix (right_part, ">")) {
1546 address = g_strndup (right_part+1, strlen (right_part) - 2);
1547 g_free (right_part);
1548 valid = modest_text_utils_validate_email_address (address, invalid_char_position);
1552 g_free (right_part);
1559 modest_text_utils_get_display_size (guint64 size)
1561 const guint KB=1024;
1562 const guint MB=1024 * KB;
1563 const guint GB=1024 * MB;
1566 return g_strdup_printf(_FM("sfil_li_size_kb"), 0);
1567 if (0 < size && size < KB)
1568 return g_strdup_printf (_FM("sfil_li_size_kb"), 1);
1569 else if (KB <= size && size < 100 * KB)
1570 return g_strdup_printf (_FM("sfil_li_size_1kb_99kb"), size / KB);
1571 else if (100*KB <= size && size < MB)
1572 return g_strdup_printf (_FM("sfil_li_size_100kb_1mb"), (float) size / MB);
1573 else if (MB <= size && size < 10*MB)
1574 return g_strdup_printf (_FM("sfil_li_size_1mb_10mb"), (float) size / MB);
1575 else if (10*MB <= size && size < GB)
1576 return g_strdup_printf (_FM("sfil_li_size_10mb_1gb"), size / MB);
1578 return g_strdup_printf (_FM("sfil_li_size_1gb_or_greater"), (float) size / GB);
1582 get_email_from_address (const gchar * address)
1584 gchar *left_limit, *right_limit;
1586 left_limit = strstr (address, "<");
1587 right_limit = g_strrstr (address, ">");
1589 if ((left_limit == NULL)||(right_limit == NULL)|| (left_limit > right_limit))
1590 return g_strdup (address);
1592 return g_strndup (left_limit + 1, (right_limit - left_limit) - 1);
1596 modest_text_utils_get_color_string (GdkColor *color)
1598 g_return_val_if_fail (color, NULL);
1600 return g_strdup_printf ("#%x%x%x%x%x%x%x%x%x%x%x%x",
1601 (color->red >> 12) & 0xf, (color->red >> 8) & 0xf,
1602 (color->red >> 4) & 0xf, (color->red) & 0xf,
1603 (color->green >> 12) & 0xf, (color->green >> 8) & 0xf,
1604 (color->green >> 4) & 0xf, (color->green) & 0xf,
1605 (color->blue >> 12) & 0xf, (color->blue >> 8) & 0xf,
1606 (color->blue >> 4) & 0xf, (color->blue) & 0xf);
1610 modest_text_utils_text_buffer_get_text (GtkTextBuffer *buffer)
1612 GtkTextIter start, end;
1613 gchar *slice, *current;
1614 GString *result = g_string_new ("");
1616 g_return_val_if_fail (buffer && GTK_IS_TEXT_BUFFER (buffer), NULL);
1618 gtk_text_buffer_get_start_iter (buffer, &start);
1619 gtk_text_buffer_get_end_iter (buffer, &end);
1621 slice = gtk_text_buffer_get_slice (buffer, &start, &end, FALSE);
1624 while (current && current != '\0') {
1625 if (g_utf8_get_char (current) == 0xFFFC) {
1626 result = g_string_append_c (result, ' ');
1627 current = g_utf8_next_char (current);
1629 gchar *next = g_utf8_strchr (current, -1, 0xFFFC);
1631 result = g_string_append (result, current);
1633 result = g_string_append_len (result, current, next - current);
1640 return g_string_free (result, FALSE);
1645 modest_text_utils_is_forbidden_char (const gchar character,
1646 ModestTextUtilsForbiddenCharType type)
1649 const gchar *forbidden_chars = NULL;
1651 /* We need to get the length in the switch because the
1652 compiler needs to know the size at compile time */
1654 case ACCOUNT_TITLE_FORBIDDEN_CHARS:
1655 forbidden_chars = account_title_forbidden_chars;
1656 len = G_N_ELEMENTS (account_title_forbidden_chars);
1658 case FOLDER_NAME_FORBIDDEN_CHARS:
1659 forbidden_chars = folder_name_forbidden_chars;
1660 len = G_N_ELEMENTS (folder_name_forbidden_chars);
1662 case USER_NAME_FORBIDDEN_NAMES:
1663 forbidden_chars = user_name_forbidden_chars;
1664 len = G_N_ELEMENTS (user_name_forbidden_chars);
1667 g_return_val_if_reached (TRUE);
1670 for (i = 0; i < len ; i++)
1671 if (forbidden_chars[i] == character)
1674 return FALSE; /* it's valid! */
1678 modest_text_utils_label_get_selection (GtkLabel *label)
1683 if (gtk_label_get_selection_bounds (GTK_LABEL (label), &start, &end)) {
1684 const gchar *start_offset;
1685 const gchar *end_offset;
1686 start_offset = gtk_label_get_text (GTK_LABEL (label));
1687 start_offset = g_utf8_offset_to_pointer (start_offset, start);
1688 end_offset = gtk_label_get_text (GTK_LABEL (label));
1689 end_offset = g_utf8_offset_to_pointer (end_offset, end);
1690 selection = g_strndup (start_offset, end_offset - start_offset);
1693 return g_strdup ("");
1698 _forward_search_image_char (gunichar ch,
1701 return (ch == 0xFFFC);
1705 modest_text_utils_buffer_selection_is_valid (GtkTextBuffer *buffer)
1708 GtkTextIter start, end;
1710 g_return_val_if_fail (GTK_IS_TEXT_BUFFER (buffer), FALSE);
1712 result = gtk_text_buffer_get_has_selection (GTK_TEXT_BUFFER (buffer));
1714 /* check there are no images in selection */
1716 gtk_text_buffer_get_selection_bounds (buffer, &start, &end);
1717 if (gtk_text_iter_get_char (&start)== 0xFFFC)
1720 gtk_text_iter_backward_char (&end);
1721 if (gtk_text_iter_forward_find_char (&start, _forward_search_image_char,
1732 modest_text_utils_escape_mnemonics (const gchar *text)
1735 GString *result = NULL;
1740 result = g_string_new ("");
1741 for (p = text; *p != '\0'; p++) {
1743 result = g_string_append (result, "__");
1745 result = g_string_append_c (result, *p);
1748 return g_string_free (result, FALSE);