1 /* Copyright (c) 2006, Nokia Corporation
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of the Nokia Corporation nor the names of its
14 * contributors may be used to endorse or promote products derived from
15 * this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
18 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
20 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
21 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #endif /*_GNU_SOURCE*/
35 #include <string.h> /* for strcasestr */
40 #include <glib/gi18n.h>
42 #include <modest-tny-platform-factory.h>
43 #include <modest-text-utils.h>
44 #include <modest-runtime.h>
49 #endif /*HAVE_CONFIG_H */
52 #define FORWARD_STRING _("mcen_ia_editor_original_message")
53 #define FROM_STRING _("mail_va_from")
54 #define SENT_STRING _("mcen_fi_message_properties_sent")
55 #define TO_STRING _("mail_va_to")
56 #define SUBJECT_STRING _("mail_va_subject")
57 #define EMPTY_STRING ""
60 * do the hyperlinkification only for texts < 50 Kb,
61 * as it's quite slow. Without this, e.g. mail with
62 * an uuencoded part (which is not recognized as attachment,
65 #define HYPERLINKIFY_MAX_LENGTH (1024*50)
70 * we need these regexps to find URLs in plain text e-mails
72 typedef struct _url_match_pattern_t url_match_pattern_t;
73 struct _url_match_pattern_t {
79 typedef struct _url_match_t url_match_t;
88 * we mark the ampersand with \007 when converting text->html
89 * because after text->html we do hyperlink detecting, which
90 * could be screwed up by the ampersand.
91 * ie. 1<3 ==> 1\007lt;3
93 #define MARK_AMP '\007'
94 #define MARK_AMP_STR "\007"
96 /* mark & separately, because they are parts of urls.
97 * ie. a&b => a\006amp;b, but a>b => a\007gt;b
99 * we need to handle '&' separately, because it can be part of URIs
100 * (as in href="http://foo.bar?a=1&b=1"), so inside those URIs
101 * we need to re-replace \006amp; with '&' again, while outside uri's
104 * yes, it's messy, but a consequence of doing text->html first, then hyperlinkify
106 #define MARK_AMP_URI '\006'
107 #define MARK_AMP_URI_STR "\006"
110 /* note: match MARK_AMP_URI_STR as well, because after txt->html, a '&' will look like $(MARK_AMP_URI_STR)"amp;" */
111 #define MAIL_VIEWER_URL_MATCH_PATTERNS { \
112 { "(file|rtsp|http|ftp|https|mms|mmsh|rtsp|rdp|lastfm)://[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR \
113 "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]", \
115 { "www\\.[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]",\
117 { "ftp\\.[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]",\
119 { "(jabberto|voipto|sipto|sip|chatto|xmpp):[-_a-z@0-9.+]+", \
121 { "mailto:[-_a-z0-9.\\+]+@[-_a-z0-9.]+", \
123 { "[-_a-z0-9.\\+]+@[-_a-z0-9.]+",\
127 const gchar account_title_forbidden_chars[] = {
128 '\\', '/', ':', '*', '?', '\'', '<', '>', '|', '^'
130 const gchar folder_name_forbidden_chars[] = {
131 '<', '>', ':', '\'', '/', '\\', '|', '?', '*', '^', '%', '$', '#', '&'
133 const gchar user_name_forbidden_chars[] = {
136 const guint ACCOUNT_TITLE_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (account_title_forbidden_chars);
137 const guint FOLDER_NAME_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (folder_name_forbidden_chars);
138 const guint USER_NAME_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (user_name_forbidden_chars);
141 static gchar* cite (const time_t sent_date, const gchar *from);
142 static void hyperlinkify_plain_text (GString *txt);
143 static gint cmp_offsets_reverse (const url_match_t *match1, const url_match_t *match2);
144 static GSList* get_url_matches (GString *txt);
146 static GString* get_next_line (const char *b, const gsize blen, const gchar * iter);
147 static int get_indent_level (const char *l);
148 static void unquote_line (GString * l);
149 static void append_quoted (GString * buf, const int indent, const GString * str,
151 static int get_breakpoint_utf8 (const gchar * s, const gint indent, const gint limit);
152 static int get_breakpoint_ascii (const gchar * s, const gint indent, const gint limit);
153 static int get_breakpoint (const gchar * s, const gint indent, const gint limit);
155 static gchar* modest_text_utils_quote_plain_text (const gchar *text,
157 const gchar *signature,
161 static gchar* modest_text_utils_quote_html (const gchar *text,
163 const gchar *signature,
166 static gchar* get_email_from_address (const gchar *address);
169 /* ******************************************************************* */
170 /* ************************* PUBLIC FUNCTIONS ************************ */
171 /* ******************************************************************* */
174 modest_text_utils_quote (const gchar *text,
175 const gchar *content_type,
176 const gchar *signature,
178 const time_t sent_date,
182 gchar *retval, *cited;
184 g_return_val_if_fail (text, NULL);
185 g_return_val_if_fail (content_type, NULL);
187 cited = cite (sent_date, from);
189 if (content_type && strcmp (content_type, "text/html") == 0)
190 /* TODO: extract the <body> of the HTML and pass it to
192 retval = modest_text_utils_quote_html (text, cited, signature, attachments, limit);
194 retval = modest_text_utils_quote_plain_text (text, cited, signature, attachments, limit);
203 modest_text_utils_cite (const gchar *text,
204 const gchar *content_type,
205 const gchar *signature,
212 g_return_val_if_fail (text, NULL);
213 g_return_val_if_fail (content_type, NULL);
216 retval = g_strdup ("");
217 else if (strcmp(content_type, "text/html") == 0) {
218 tmp_sig = g_strconcat ("\n", signature, NULL);
219 retval = modest_text_utils_convert_to_html_body(tmp_sig, -1, TRUE);
222 retval = g_strconcat (text, "\n", signature, NULL);
229 forward_cite (const gchar *from,
232 const gchar *subject)
234 g_return_val_if_fail (sent, NULL);
236 return g_strdup_printf ("%s\n%s %s\n%s %s\n%s %s\n%s %s\n",
238 FROM_STRING, (from)?from:"",
240 TO_STRING, (to)?to:"",
241 SUBJECT_STRING, (subject)?subject:"");
245 modest_text_utils_inline (const gchar *text,
246 const gchar *content_type,
247 const gchar *signature,
251 const gchar *subject)
257 g_return_val_if_fail (text, NULL);
258 g_return_val_if_fail (content_type, NULL);
260 modest_text_utils_strftime (sent_str, 100, "%c", sent_date);
262 cited = forward_cite (from, sent_str, to, subject);
264 if (content_type && strcmp (content_type, "text/html") == 0)
265 retval = modest_text_utils_quote_html (text, cited, signature, NULL, 80);
267 retval = modest_text_utils_quote_plain_text (text, cited, signature, NULL, 80);
273 /* just to prevent warnings:
274 * warning: `%x' yields only last 2 digits of year in some locales
277 modest_text_utils_strftime(char *s, gsize max, const char *fmt, time_t timet)
281 /* does not work on old maemo glib:
282 * g_date_set_time_t (&date, timet);
284 localtime_r (&timet, &tm);
285 return strftime(s, max, fmt, &tm);
289 modest_text_utils_derived_subject (const gchar *subject, const gchar *prefix)
293 g_return_val_if_fail (prefix, NULL);
295 if (!subject || subject[0] == '\0')
296 subject = _("mail_va_no_subject");
298 tmp = g_strchug (g_strdup (subject));
300 if (!strncmp (tmp, prefix, strlen (prefix))) {
304 return g_strdup_printf ("%s %s", prefix, subject);
309 modest_text_utils_remove_address (const gchar *address_list, const gchar *address)
311 gchar *dup, *token, *ptr = NULL, *result;
312 GString *filtered_emails;
313 gchar *email_address;
315 g_return_val_if_fail (address_list, NULL);
318 return g_strdup (address_list);
320 email_address = get_email_from_address (address);
322 /* search for substring */
323 if (!strstr ((const char *) address_list, (const char *) email_address)) {
324 g_free (email_address);
325 return g_strdup (address_list);
328 dup = g_strdup (address_list);
329 filtered_emails = g_string_new (NULL);
331 token = strtok_r (dup, ",", &ptr);
333 while (token != NULL) {
334 /* Add to list if not found */
335 if (!strstr ((const char *) token, (const char *) email_address)) {
336 if (filtered_emails->len == 0)
337 g_string_append_printf (filtered_emails, "%s", g_strstrip (token));
339 g_string_append_printf (filtered_emails, ",%s", g_strstrip (token));
341 token = strtok_r (NULL, ",", &ptr);
343 result = filtered_emails->str;
346 g_free (email_address);
348 g_string_free (filtered_emails, FALSE);
355 modest_text_utils_remove_duplicate_addresses (const gchar *address_list)
357 GSList *addresses, *cursor;
361 g_return_val_if_fail (address_list, NULL);
363 table = g_hash_table_new (g_str_hash, g_str_equal);
364 addresses = modest_text_utils_split_addresses_list (address_list);
366 new_list = g_strdup("");
369 const gchar* address = (const gchar*)cursor->data;
371 /* ignore the address if already seen */
372 if (g_hash_table_lookup (table, address) == 0) {
374 gchar *tmp = g_strjoin (",", new_list, address, NULL);
378 g_hash_table_insert (table, (gchar*)address, GINT_TO_POINTER(1));
380 cursor = g_slist_next (cursor);
383 g_hash_table_destroy (table);
384 g_slist_foreach (addresses, (GFunc)g_free, NULL);
385 g_slist_free (addresses);
392 modest_text_utils_convert_buffer_to_html_start (GString *html, const gchar *data, gssize n)
395 gboolean space_seen = FALSE;
396 guint break_dist = 0; /* distance since last break point */
401 /* replace with special html chars where needed*/
402 for (i = 0; i != n; ++i) {
403 guchar kar = data[i];
405 if (space_seen && kar != ' ') {
406 g_string_append_c (html, ' ');
410 /* we artificially insert a breakpoint (newline)
411 * after 256, to make sure our lines are not so long
412 * they will DOS the regexping later
413 * Also, check that kar is ASCII to make sure that we
414 * don't break a UTF8 char in two
416 if (++break_dist >= 256 && kar < 127) {
417 g_string_append_c (html, '\n');
425 /* this is a temp place holder for '&'; we can only
426 * set the real '&' after hyperlink translation, otherwise
427 * we might screw that up */
428 break; /* ignore embedded \0s and MARK_AMP */
429 case '<' : g_string_append (html, MARK_AMP_STR "lt;"); break;
430 case '>' : g_string_append (html, MARK_AMP_STR "gt;"); break;
431 case '&' : g_string_append (html, MARK_AMP_URI_STR "amp;"); break; /* special case */
432 case '"' : g_string_append (html, MARK_AMP_STR "quot;"); break;
434 /* don't convert ' --> wpeditor will try to re-convert it... */
435 //case '\'' : g_string_append (html, "'"); break;
436 case '\n' : g_string_append (html, "<br>\n");break_dist= 0; break;
437 case '\t' : g_string_append (html, MARK_AMP_STR "nbsp;" MARK_AMP_STR "nbsp;" MARK_AMP_STR "nbsp; ");
438 break_dist=0; break; /* note the space at the end*/
441 if (space_seen) { /* second space in a row */
442 g_string_append (html, " ");
448 g_string_append_c (html, kar);
455 modest_text_utils_convert_buffer_to_html_finish (GString *html)
458 /* replace all our MARK_AMPs with real ones */
459 for (i = 0; i != html->len; ++i)
460 if ((html->str)[i] == MARK_AMP || (html->str)[i] == MARK_AMP_URI)
461 (html->str)[i] = '&';
466 modest_text_utils_convert_to_html (const gchar *data)
471 g_return_val_if_fail (data, NULL);
477 html = g_string_sized_new (1.5 * len); /* just a guess... */
479 g_string_append_printf (html,
481 "<meta http-equiv=\"content-type\" content=\"text/html; charset=utf8\">"
485 modest_text_utils_convert_buffer_to_html_start (html, data, -1);
487 g_string_append (html, "</body></html>");
489 if (len <= HYPERLINKIFY_MAX_LENGTH)
490 hyperlinkify_plain_text (html);
492 modest_text_utils_convert_buffer_to_html_finish (html);
494 return g_string_free (html, FALSE);
498 modest_text_utils_convert_to_html_body (const gchar *data, gssize n, gboolean hyperlinkify)
502 g_return_val_if_fail (data, NULL);
509 html = g_string_sized_new (1.5 * n); /* just a guess... */
511 modest_text_utils_convert_buffer_to_html_start (html, data, n);
513 if (hyperlinkify && (n < HYPERLINKIFY_MAX_LENGTH))
514 hyperlinkify_plain_text (html);
516 modest_text_utils_convert_buffer_to_html_finish (html);
518 return g_string_free (html, FALSE);
522 modest_text_utils_get_addresses_indexes (const gchar *addresses, GSList **start_indexes, GSList **end_indexes)
524 gchar *current, *start, *last_blank;
525 gint start_offset = 0, current_offset = 0;
527 g_return_if_fail (start_indexes != NULL);
528 g_return_if_fail (end_indexes != NULL);
530 start = (gchar *) addresses;
534 while (*current != '\0') {
535 if ((start == current)&&((*current == ' ')||(*current == ',')||(*current == ';'))) {
536 start = g_utf8_next_char (start);
538 last_blank = current;
539 } else if ((*current == ',')||(*current == ';')) {
540 gint *start_index, *end_index;
541 start_index = g_new0(gint, 1);
542 end_index = g_new0(gint, 1);
543 *start_index = start_offset;
544 *end_index = current_offset;
545 *start_indexes = g_slist_prepend (*start_indexes, start_index);
546 *end_indexes = g_slist_prepend (*end_indexes, end_index);
547 start = g_utf8_next_char (current);
548 start_offset = current_offset + 1;
550 } else if (*current == '"') {
551 current = g_utf8_next_char (current);
553 while ((*current != '"')&&(*current != '\0')) {
554 current = g_utf8_next_char (current);
559 current = g_utf8_next_char (current);
563 if (start != current) {
564 gint *start_index, *end_index;
565 start_index = g_new0(gint, 1);
566 end_index = g_new0(gint, 1);
567 *start_index = start_offset;
568 *end_index = current_offset;
569 *start_indexes = g_slist_prepend (*start_indexes, start_index);
570 *end_indexes = g_slist_prepend (*end_indexes, end_index);
573 *start_indexes = g_slist_reverse (*start_indexes);
574 *end_indexes = g_slist_reverse (*end_indexes);
581 modest_text_utils_split_addresses_list (const gchar *addresses)
584 const gchar *my_addrs = addresses;
588 g_return_val_if_fail (addresses, NULL);
590 /* skip any space, ',', ';' at the start */
591 while (my_addrs && (my_addrs[0] == ' ' || my_addrs[0] == ',' || my_addrs[0] == ';'))
594 /* are we at the end of addresses list? */
598 /* nope, we are at the start of some address
599 * now, let's find the end of the address */
601 while (end[0] && end[0] != ',' && end[0] != ';')
604 /* we got the address; copy it and remove trailing whitespace */
605 addr = g_strndup (my_addrs, end - my_addrs);
608 head = g_slist_append (NULL, addr);
609 head->next = modest_text_utils_split_addresses_list (end); /* recurse */
616 modest_text_utils_address_range_at_position (const gchar *recipients_list,
621 gchar *current = NULL;
622 gint range_start = 0;
625 gboolean is_quoted = FALSE;
627 g_return_if_fail (recipients_list);
628 g_return_if_fail (position < g_utf8_strlen(recipients_list, -1));
631 for (current = (gchar *) recipients_list; *current != '\0';
632 current = g_utf8_find_next_char (current, NULL)) {
633 gunichar c = g_utf8_get_char (current);
635 if ((c == ',') && (!is_quoted)) {
636 if (index < position) {
637 range_start = index + 1;
641 } else if (c == '\"') {
642 is_quoted = !is_quoted;
643 } else if ((c == ' ') &&(range_start == index)) {
651 *start = range_start;
657 modest_text_utils_address_with_standard_length (const gchar *recipients_list)
661 GString *buffer = g_string_new ("");
663 splitted = g_strsplit (recipients_list, "\n", 0);
667 if (current != splitted)
668 buffer = g_string_append_c (buffer, '\n');
669 line = g_strndup (*splitted, 1000);
670 buffer = g_string_append (buffer, line);
675 g_strfreev (splitted);
677 return g_string_free (buffer, FALSE);
681 /* ******************************************************************* */
682 /* ************************* UTILIY FUNCTIONS ************************ */
683 /* ******************************************************************* */
686 get_next_line (const gchar * b, const gsize blen, const gchar * iter)
692 return g_string_new("");
700 gs = g_string_new_len (i0, iter - i0);
704 get_indent_level (const char *l)
721 /* if we hit the signature marker "-- ", we return -(indent + 1). This
722 * stops reformatting.
724 if (strcmp (l, "-- ") == 0) {
732 unquote_line (GString * l)
747 g_string_erase (l, 0, p - l->str);
751 append_quoted (GString * buf, int indent, const GString * str,
756 indent = indent < 0 ? abs (indent) - 1 : indent;
757 for (i = 0; i <= indent; i++) {
758 g_string_append (buf, "> ");
761 g_string_append_len (buf, str->str, cutpoint);
763 g_string_append (buf, str->str);
765 g_string_append (buf, "\n");
769 get_breakpoint_utf8 (const gchar * s, gint indent, const gint limit)
772 const gchar *pos, *last;
775 indent = indent < 0 ? abs (indent) - 1 : indent;
779 uni = g_utf8_to_ucs4_fast (s, -1, NULL);
781 if ((index + 2 * indent > limit) && last) {
785 if (g_unichar_isspace (uni[index])) {
788 pos = g_utf8_next_char (pos);
796 get_breakpoint_ascii (const gchar * s, const gint indent, const gint limit)
801 if (last + 2 * indent < limit)
804 for (i = strlen (s); i > 0; i--) {
806 if (i + 2 * indent <= limit) {
817 get_breakpoint (const gchar * s, const gint indent, const gint limit)
820 if (g_utf8_validate (s, -1, NULL)) {
821 return get_breakpoint_utf8 (s, indent, limit);
822 } else { /* assume ASCII */
823 //g_warning("invalid UTF-8 in msg");
824 return get_breakpoint_ascii (s, indent, limit);
829 cite (const time_t sent_date, const gchar *from)
831 return g_strdup (_("mcen_ia_editor_original_message"));
835 quoted_attachments (GList *attachments)
838 GString *result = g_string_new ("");
839 for (node = attachments; node != NULL; node = g_list_next (node)) {
840 gchar *filename = (gchar *) node->data;
841 g_string_append_printf ( result, "%s %s\n", _("mcen_ia_editor_attach_filename"), filename);
844 return g_string_free (result, FALSE);
849 modest_text_utils_quote_plain_text (const gchar *text,
851 const gchar *signature,
856 gint indent, breakpoint, rem_indent = 0;
857 GString *q, *l, *remaining;
859 gchar *attachments_string = NULL;
861 q = g_string_new ("\n");
862 if (signature != NULL) {
863 q = g_string_append (q, signature);
864 q = g_string_append_c (q, '\n');
866 q = g_string_append (q, cite);
867 q = g_string_append_c (q, '\n');
869 /* remaining will store the rest of the line if we have to break it */
870 remaining = g_string_new ("");
875 l = get_next_line (text, len, iter);
876 iter = iter + l->len + 1;
877 indent = get_indent_level (l->str);
880 if (remaining->len) {
881 if (l->len && indent == rem_indent) {
882 g_string_prepend (l, " ");
883 g_string_prepend (l, remaining->str);
887 get_breakpoint (remaining->str,
890 append_quoted (q, rem_indent,
891 remaining, breakpoint);
892 g_string_erase (remaining, 0,
894 if (remaining->str[0] == ' ') {
895 g_string_erase (remaining, 0,
898 } while (remaining->len);
901 g_string_free (remaining, TRUE);
902 breakpoint = get_breakpoint (l->str, indent, limit);
903 remaining = g_string_new (l->str + breakpoint);
904 if (remaining->str[0] == ' ') {
905 g_string_erase (remaining, 0, 1);
908 append_quoted (q, indent, l, breakpoint);
909 g_string_free (l, TRUE);
910 } while ((iter < text + len) || (remaining->str[0]));
912 attachments_string = quoted_attachments (attachments);
913 q = g_string_append (q, attachments_string);
914 g_free (attachments_string);
916 return g_string_free (q, FALSE);
920 modest_text_utils_quote_html (const gchar *text,
922 const gchar *signature,
926 gchar *result = NULL;
927 gchar *signature_result = NULL;
928 const gchar *format = \
929 "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n" \
933 "<pre>%s<br/>%s<br/>%s</pre>\n" \
936 gchar *attachments_string = NULL;
937 gchar *q_attachments_string = NULL;
938 gchar *q_cite = NULL;
939 gchar *html_text = NULL;
941 if (signature == NULL)
942 signature_result = g_strdup ("");
944 signature_result = modest_text_utils_convert_to_html_body (signature, -1, TRUE);
946 attachments_string = quoted_attachments (attachments);
947 q_attachments_string = modest_text_utils_convert_to_html_body (attachments_string, -1, TRUE);
948 q_cite = modest_text_utils_convert_to_html_body (cite, -1, TRUE);
949 html_text = modest_text_utils_convert_to_html_body (text, -1, TRUE);
950 result = g_strdup_printf (format, signature_result, q_cite, html_text, q_attachments_string);
953 g_free (attachments_string);
954 g_free (q_attachments_string);
955 g_free (signature_result);
961 cmp_offsets_reverse (const url_match_t *match1, const url_match_t *match2)
963 return match2->offset - match1->offset;
966 static gboolean url_matches_block = 0;
967 static url_match_pattern_t patterns[] = MAIL_VIEWER_URL_MATCH_PATTERNS;
974 const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
975 for (i = 0; i != pattern_num; ++i) {
976 patterns[i].preg = g_slice_new0 (regex_t);
978 /* this should not happen */
979 if (regcomp (patterns[i].preg, patterns[i].regex,
980 REG_ICASE|REG_EXTENDED|REG_NEWLINE) != 0) {
981 g_warning ("%s: error in regexp:\n%s\n", __FUNCTION__, patterns[i].regex);
992 const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
993 for (i = 0; i != pattern_num; ++i) {
994 regfree (patterns[i].preg);
995 g_slice_free (regex_t, patterns[i].preg);
996 } /* don't free patterns itself -- it's static */
1000 modest_text_utils_hyperlinkify_begin (void)
1002 if (url_matches_block == 0)
1003 compile_patterns ();
1004 url_matches_block ++;
1008 modest_text_utils_hyperlinkify_end (void)
1010 url_matches_block--;
1011 if (url_matches_block <= 0)
1017 get_url_matches (GString *txt)
1020 guint rv, i, offset = 0;
1021 GSList *match_list = NULL;
1023 const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
1025 /* initalize the regexps */
1026 modest_text_utils_hyperlinkify_begin ();
1028 /* find all the matches */
1029 for (i = 0; i != pattern_num; ++i) {
1033 gboolean is_submatch;
1036 if ((rv = regexec (patterns[i].preg, txt->str + offset, 1, &rm, 0)) != 0) {
1037 g_return_val_if_fail (rv == REG_NOMATCH, NULL); /* this should not happen */
1038 break; /* try next regexp */
1043 is_submatch = FALSE;
1044 /* check old matches to see if this has already been matched */
1045 cursor = match_list;
1046 while (cursor && !is_submatch) {
1047 const url_match_t *old_match =
1048 (const url_match_t *) cursor->data;
1049 guint new_offset = offset + rm.rm_so;
1050 is_submatch = (new_offset > old_match->offset &&
1051 new_offset < old_match->offset + old_match->len);
1052 cursor = g_slist_next (cursor);
1056 /* make a list of our matches (<offset, len, prefix> tupels)*/
1057 match = g_slice_new (url_match_t);
1058 match->offset = offset + rm.rm_so;
1059 match->len = rm.rm_eo - rm.rm_so;
1060 match->prefix = patterns[i].prefix;
1061 match_list = g_slist_prepend (match_list, match);
1067 modest_text_utils_hyperlinkify_end ();
1069 /* now sort the list, so the matches are in reverse order of occurence.
1070 * that way, we can do the replacements starting from the end, so we don't need
1071 * to recalculate the offsets
1073 match_list = g_slist_sort (match_list,
1074 (GCompareFunc)cmp_offsets_reverse);
1080 /* replace all occurences of needle in haystack with repl*/
1082 replace_string (const gchar *haystack, const gchar *needle, gchar repl)
1084 gchar *str, *cursor;
1086 if (!haystack || !needle || strlen(needle) == 0)
1087 return haystack ? g_strdup(haystack) : NULL;
1089 str = g_strdup (haystack);
1091 for (cursor = str; cursor && *cursor; ++cursor) {
1092 if (g_str_has_prefix (cursor, needle)) {
1094 memmove (cursor + 1,
1095 cursor + strlen (needle),
1096 strlen (cursor + strlen (needle)) + 1);
1104 hyperlinkify_plain_text (GString *txt)
1107 GSList *match_list = get_url_matches (txt);
1109 /* we will work backwards, so the offsets stay valid */
1110 for (cursor = match_list; cursor; cursor = cursor->next) {
1112 url_match_t *match = (url_match_t*) cursor->data;
1113 gchar *url = g_strndup (txt->str + match->offset, match->len);
1114 gchar *repl = NULL; /* replacement */
1116 /* the string still contains $(MARK_AMP_URI_STR)"amp;" for each
1117 * '&' in the original, because of the text->html conversion.
1118 * in the href-URL (and only there), we must convert that back to
1121 gchar *href_url = replace_string (url, MARK_AMP_URI_STR "amp;", '&');
1123 /* the prefix is NULL: use the one that is already there */
1124 repl = g_strdup_printf ("<a href=\"%s%s\">%s</a>",
1125 match->prefix ? match->prefix : EMPTY_STRING,
1128 /* replace the old thing with our hyperlink
1129 * replacement thing */
1130 g_string_erase (txt, match->offset, match->len);
1131 g_string_insert (txt, match->offset, repl);
1137 g_slice_free (url_match_t, match);
1140 g_slist_free (match_list);
1144 /* for optimization reasons, we change the string in-place */
1146 modest_text_utils_get_display_address (gchar *address)
1150 g_return_if_fail (address);
1155 /* should not be needed, and otherwise, we probably won't screw up the address
1156 * more than it already is :)
1157 * g_return_val_if_fail (g_utf8_validate (address, -1, NULL), NULL);
1160 /* remove leading whitespace */
1161 if (address[0] == ' ')
1162 g_strchug (address);
1164 for (i = 0; address[i]; ++i) {
1165 if (address[i] == '<') {
1166 if (G_UNLIKELY(i == 0))
1167 return; /* there's nothing else, leave it */
1169 address[i] = '\0'; /* terminate the string here */
1181 modest_text_utils_get_email_address (const gchar *full_address)
1183 const gchar *left, *right;
1185 g_return_val_if_fail (full_address, NULL);
1190 g_return_val_if_fail (g_utf8_validate (full_address, -1, NULL), NULL);
1192 left = g_strrstr_len (full_address, strlen(full_address), "<");
1194 return g_strdup (full_address);
1196 right = g_strstr_len (left, strlen(left), ">");
1198 return g_strdup (full_address);
1200 return g_strndup (left + 1, right - left - 1);
1204 modest_text_utils_get_subject_prefix_len (const gchar *sub)
1206 gint prefix_len = 0;
1208 g_return_val_if_fail (sub, 0);
1213 /* optimization: "Re", "RE", "re","Fwd", "FWD", "fwd","FW","Fw", "fw" */
1214 if (sub[0] != 'R' && sub[0] != 'F' && sub[0] != 'r' && sub[0] != 'f')
1216 else if (sub[0] && sub[1] != 'e' && sub[1] != 'E' && sub[1] != 'w' && sub[1] != 'W')
1223 /* skip over a [...] block */
1224 if (sub[prefix_len] == '[') {
1225 int c = prefix_len + 1;
1226 while (sub[c] && sub[c] != ']')
1229 return 0; /* no end to the ']' found */
1234 /* did we find the ':' ? */
1235 if (sub[prefix_len] == ':') {
1237 if (sub[prefix_len] == ' ')
1239 prefix_len += modest_text_utils_get_subject_prefix_len (sub + prefix_len);
1240 /* g_warning ("['%s','%s']", sub, (char*) sub + prefix_len); */
1248 modest_text_utils_utf8_strcmp (const gchar* s1, const gchar *s2, gboolean insensitive)
1251 /* work even when s1 and/or s2 == NULL */
1252 if (G_UNLIKELY(s1 == s2))
1254 if (G_UNLIKELY(!s1))
1256 if (G_UNLIKELY(!s2))
1259 /* if it's not case sensitive */
1262 /* optimization: shortcut if first char is ascii */
1263 if (((s1[0] & 0xf0)== 0) && ((s2[0] & 0xf0) == 0))
1264 return s1[0] - s2[0];
1266 return g_utf8_collate (s1, s2);
1272 /* optimization: short cut iif first char is ascii */
1273 if (((s1[0] & 0xf0) == 0) && ((s2[0] & 0xf0) == 0))
1274 return tolower(s1[0]) - tolower(s2[0]);
1276 n1 = g_utf8_strdown (s1, -1);
1277 n2 = g_utf8_strdown (s2, -1);
1279 result = g_utf8_collate (n1, n2);
1290 modest_text_utils_get_display_date (time_t date)
1292 #define DATE_BUF_SIZE 64
1293 static gchar date_buf[DATE_BUF_SIZE];
1295 /* calculate the # of days since epoch for
1296 * for today and for the date provided
1297 * based on idea from pvanhoof */
1298 int day = time(NULL) / (24 * 60 * 60);
1299 int date_day = date / (24 * 60 * 60);
1301 /* if it's today, show the time, if it's not today, show the date instead */
1303 if (day == date_day) /* is the date today? */
1304 modest_text_utils_strftime (date_buf, DATE_BUF_SIZE, "%X", date);
1306 modest_text_utils_strftime (date_buf, DATE_BUF_SIZE, "%x", date);
1308 return date_buf; /* this is a static buffer, don't free! */
1314 modest_text_utils_validate_folder_name (const gchar *folder_name)
1316 /* based on http://msdn2.microsoft.com/en-us/library/aa365247.aspx,
1317 * with some extras */
1321 const gchar **cursor = NULL;
1322 const gchar *forbidden_names[] = { /* windows does not like these */
1323 "CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6",
1324 "COM7", "COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
1325 ".", "..", "cur", "tmp", "new", NULL /* cur, tmp new are reserved for Maildir */
1328 /* cannot be NULL */
1332 /* cannot be empty */
1333 len = strlen(folder_name);
1337 /* cannot start with a dot, vfat does not seem to like that */
1338 if (folder_name[0] == '.')
1341 /* cannot start or end with a space */
1342 if (g_ascii_isspace(folder_name[0]) || g_ascii_isspace(folder_name[len - 1]))
1345 /* cannot contain a forbidden char */
1346 for (i = 0; i < len; i++)
1347 if (modest_text_utils_is_forbidden_char (folder_name[i], FOLDER_NAME_FORBIDDEN_CHARS))
1350 /* cannot contain a forbidden word */
1352 for (cursor = forbidden_names; cursor && *cursor; ++cursor) {
1353 if (g_ascii_strcasecmp (folder_name, *cursor) == 0)
1358 return TRUE; /* it's valid! */
1364 modest_text_utils_validate_domain_name (const gchar *domain)
1366 gboolean valid = FALSE;
1368 const gchar* domain_regex = "^([a-z0-9-]*[a-z0-9]\\.)+[a-z0-9-]*[a-z0-9]$";
1370 g_return_val_if_fail (domain, FALSE);
1375 memset (&rx, 0, sizeof(regex_t)); /* coverity wants this... */
1377 /* domain name: all alphanum or '-' or '.',
1378 * but beginning/ending in alphanum */
1379 if (regcomp (&rx, domain_regex, REG_ICASE|REG_EXTENDED|REG_NOSUB)) {
1380 g_warning ("BUG: error in regexp");
1384 valid = (regexec (&rx, domain, 1, NULL, 0) == 0);
1393 modest_text_utils_validate_email_address (const gchar *email_address,
1394 const gchar **invalid_char_position)
1397 const gchar *c = NULL, *domain = NULL;
1398 static gchar *rfc822_specials = "()<>@,;:\\\"[]&";
1400 if (invalid_char_position)
1401 *invalid_char_position = NULL;
1403 g_return_val_if_fail (email_address, FALSE);
1405 /* check that the email adress contains exactly one @ */
1406 if (!strstr(email_address, "@") ||
1407 (strstr(email_address, "@") != g_strrstr(email_address, "@")))
1410 /* first we validate the name portion (name@domain) */
1411 for (c = email_address; *c; c++) {
1413 (c == email_address ||
1415 *(c - 1) == '\"')) {
1419 if (*c == '\\' && (*++c == ' '))
1421 if (*c <= ' ' || *c >= 127)
1434 if (*c <= ' ' || *c >= 127)
1436 if (strchr(rfc822_specials, *c)) {
1437 if (invalid_char_position)
1438 *invalid_char_position = c;
1442 if (c == email_address || *(c - 1) == '.')
1445 /* next we validate the domain portion (name@domain) */
1446 if (!*(domain = ++c))
1450 if (c == domain || *(c - 1) == '.' || *(c + 1) == '\0')
1454 if (*c <= ' ' || *c >= 127)
1456 if (strchr(rfc822_specials, *c)) {
1457 if (invalid_char_position)
1458 *invalid_char_position = c;
1463 return (count >= 1) ? TRUE : FALSE;
1467 modest_text_utils_validate_recipient (const gchar *recipient, const gchar **invalid_char_position)
1469 gchar *stripped, *current;
1471 gboolean has_error = FALSE;
1473 if (invalid_char_position)
1474 *invalid_char_position = NULL;
1476 g_return_val_if_fail (recipient, FALSE);
1478 if (modest_text_utils_validate_email_address (recipient, invalid_char_position))
1481 stripped = g_strdup (recipient);
1482 stripped = g_strstrip (stripped);
1485 if (*current == '\0') {
1491 if (*current == '\"') {
1492 current = g_utf8_next_char (current);
1494 for (; *current != '\0'; current = g_utf8_next_char (current)) {
1495 if (*current == '\\') {
1496 /* TODO: This causes a warning, which breaks the build,
1497 * because a gchar cannot be < 0.
1499 if (current[1] <0) {
1504 } else if (*current == '\"') {
1506 current = g_utf8_next_char (current);
1512 for (current = stripped ; *current != '\0'; current = g_utf8_next_char (current)) {
1513 if (*current == '<') {
1525 right_part = g_strdup (current);
1527 right_part = g_strstrip (right_part);
1529 if (g_str_has_prefix (right_part, "<") &&
1530 g_str_has_suffix (right_part, ">")) {
1534 address = g_strndup (right_part+1, strlen (right_part) - 2);
1535 g_free (right_part);
1536 valid = modest_text_utils_validate_email_address (address, invalid_char_position);
1540 g_free (right_part);
1547 modest_text_utils_get_display_size (guint64 size)
1549 const guint KB=1024;
1550 const guint MB=1024 * KB;
1551 const guint GB=1024 * MB;
1554 return g_strdup_printf(_FM("sfil_li_size_kb"), 0);
1555 if (0 < size && size < KB)
1556 return g_strdup_printf (_FM("sfil_li_size_kb"), 1);
1557 else if (KB <= size && size < 100 * KB)
1558 return g_strdup_printf (_FM("sfil_li_size_1kb_99kb"), size / KB);
1559 else if (100*KB <= size && size < MB)
1560 return g_strdup_printf (_FM("sfil_li_size_100kb_1mb"), (float) size / MB);
1561 else if (MB <= size && size < 10*MB)
1562 return g_strdup_printf (_FM("sfil_li_size_1mb_10mb"), (float) size / MB);
1563 else if (10*MB <= size && size < GB)
1564 return g_strdup_printf (_FM("sfil_li_size_10mb_1gb"), size / MB);
1566 return g_strdup_printf (_FM("sfil_li_size_1gb_or_greater"), (float) size / GB);
1570 get_email_from_address (const gchar * address)
1572 gchar *left_limit, *right_limit;
1574 left_limit = strstr (address, "<");
1575 right_limit = g_strrstr (address, ">");
1577 if ((left_limit == NULL)||(right_limit == NULL)|| (left_limit > right_limit))
1578 return g_strdup (address);
1580 return g_strndup (left_limit + 1, (right_limit - left_limit) - 1);
1584 modest_text_utils_get_color_string (GdkColor *color)
1586 g_return_val_if_fail (color, NULL);
1588 return g_strdup_printf ("#%x%x%x%x%x%x%x%x%x%x%x%x",
1589 (color->red >> 12) & 0xf, (color->red >> 8) & 0xf,
1590 (color->red >> 4) & 0xf, (color->red) & 0xf,
1591 (color->green >> 12) & 0xf, (color->green >> 8) & 0xf,
1592 (color->green >> 4) & 0xf, (color->green) & 0xf,
1593 (color->blue >> 12) & 0xf, (color->blue >> 8) & 0xf,
1594 (color->blue >> 4) & 0xf, (color->blue) & 0xf);
1598 modest_text_utils_text_buffer_get_text (GtkTextBuffer *buffer)
1600 GtkTextIter start, end;
1601 gchar *slice, *current;
1602 GString *result = g_string_new ("");
1604 g_return_val_if_fail (buffer && GTK_IS_TEXT_BUFFER (buffer), NULL);
1606 gtk_text_buffer_get_start_iter (buffer, &start);
1607 gtk_text_buffer_get_end_iter (buffer, &end);
1609 slice = gtk_text_buffer_get_slice (buffer, &start, &end, FALSE);
1612 while (current && current != '\0') {
1613 if (g_utf8_get_char (current) == 0xFFFC) {
1614 result = g_string_append_c (result, ' ');
1615 current = g_utf8_next_char (current);
1617 gchar *next = g_utf8_strchr (current, -1, 0xFFFC);
1619 result = g_string_append (result, current);
1621 result = g_string_append_len (result, current, next - current);
1628 return g_string_free (result, FALSE);
1633 modest_text_utils_is_forbidden_char (const gchar character,
1634 ModestTextUtilsForbiddenCharType type)
1637 const gchar *forbidden_chars = NULL;
1639 /* We need to get the length in the switch because the
1640 compiler needs to know the size at compile time */
1642 case ACCOUNT_TITLE_FORBIDDEN_CHARS:
1643 forbidden_chars = account_title_forbidden_chars;
1644 len = G_N_ELEMENTS (account_title_forbidden_chars);
1646 case FOLDER_NAME_FORBIDDEN_CHARS:
1647 forbidden_chars = folder_name_forbidden_chars;
1648 len = G_N_ELEMENTS (folder_name_forbidden_chars);
1650 case USER_NAME_FORBIDDEN_NAMES:
1651 forbidden_chars = user_name_forbidden_chars;
1652 len = G_N_ELEMENTS (user_name_forbidden_chars);
1655 g_return_val_if_reached (TRUE);
1658 for (i = 0; i < len ; i++)
1659 if (forbidden_chars[i] == character)
1662 return FALSE; /* it's valid! */
1666 modest_text_utils_label_get_selection (GtkLabel *label)
1671 if (gtk_label_get_selection_bounds (GTK_LABEL (label), &start, &end)) {
1672 const gchar *start_offset;
1673 const gchar *end_offset;
1674 start_offset = gtk_label_get_text (GTK_LABEL (label));
1675 start_offset = g_utf8_offset_to_pointer (start_offset, start);
1676 end_offset = gtk_label_get_text (GTK_LABEL (label));
1677 end_offset = g_utf8_offset_to_pointer (end_offset, end);
1678 selection = g_strndup (start_offset, end_offset - start_offset);
1681 return g_strdup ("");
1686 _forward_search_image_char (gunichar ch,
1689 return (ch == 0xFFFC);
1693 modest_text_utils_buffer_selection_is_valid (GtkTextBuffer *buffer)
1696 GtkTextIter start, end;
1698 g_return_val_if_fail (GTK_IS_TEXT_BUFFER (buffer), FALSE);
1700 result = gtk_text_buffer_get_has_selection (GTK_TEXT_BUFFER (buffer));
1702 /* check there are no images in selection */
1704 gtk_text_buffer_get_selection_bounds (buffer, &start, &end);
1705 if (gtk_text_iter_get_char (&start)== 0xFFFC)
1708 gtk_text_iter_backward_char (&end);
1709 if (gtk_text_iter_forward_find_char (&start, _forward_search_image_char,
1720 modest_text_utils_escape_mnemonics (const gchar *text)
1723 GString *result = NULL;
1728 result = g_string_new ("");
1729 for (p = text; *p != '\0'; p++) {
1731 result = g_string_append (result, "__");
1733 result = g_string_append_c (result, *p);
1736 return g_string_free (result, FALSE);