1 /* Copyright (c) 2006, Nokia Corporation
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of the Nokia Corporation nor the names of its
14 * contributors may be used to endorse or promote products derived from
15 * this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
18 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
20 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
21 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #endif /*_GNU_SOURCE*/
35 #include <string.h> /* for strcasestr */
40 #include <glib/gi18n.h>
42 #include <modest-tny-platform-factory.h>
43 #include <modest-text-utils.h>
44 #include <modest-runtime.h>
49 #endif /*HAVE_CONFIG_H */
52 #define FORWARD_STRING _("mcen_ia_editor_original_message")
53 #define FROM_STRING _("mail_va_from")
54 #define SENT_STRING _("mcen_fi_message_properties_sent")
55 #define TO_STRING _("mail_va_to")
56 #define SUBJECT_STRING _("mail_va_subject")
57 #define EMPTY_STRING ""
60 * do the hyperlinkification only for texts < 50 Kb,
61 * as it's quite slow. Without this, e.g. mail with
62 * an uuencoded part (which is not recognized as attachment,
65 #define HYPERLINKIFY_MAX_LENGTH (1024*50)
70 * we need these regexps to find URLs in plain text e-mails
72 typedef struct _url_match_pattern_t url_match_pattern_t;
73 struct _url_match_pattern_t {
79 typedef struct _url_match_t url_match_t;
88 * we mark the ampersand with \007 when converting text->html
89 * because after text->html we do hyperlink detecting, which
90 * could be screwed up by the ampersand.
91 * ie. 1<3 ==> 1\007lt;3
93 #define MARK_AMP '\007'
94 #define MARK_AMP_STR "\007"
96 /* mark & separately, because they are parts of urls.
97 * ie. a&b => a\006amp;b, but a>b => a\007gt;b
99 * we need to handle '&' separately, because it can be part of URIs
100 * (as in href="http://foo.bar?a=1&b=1"), so inside those URIs
101 * we need to re-replace \006amp; with '&' again, while outside uri's
104 * yes, it's messy, but a consequence of doing text->html first, then hyperlinkify
106 #define MARK_AMP_URI '\006'
107 #define MARK_AMP_URI_STR "\006"
110 /* note: match MARK_AMP_URI_STR as well, because after txt->html, a '&' will look like $(MARK_AMP_URI_STR)"amp;" */
111 #define MAIL_VIEWER_URL_MATCH_PATTERNS { \
112 { "(file|rtsp|http|ftp|https|mms|mmsh|rtsp|rdp|lastfm)://[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR \
113 "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]", \
115 { "www\\.[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]",\
117 { "ftp\\.[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]",\
119 { "(jabberto|voipto|sipto|sip|chatto|xmpp):[-_a-z@0-9.+]+", \
121 { "mailto:[-_a-z0-9.\\+]+@[-_a-z0-9.]+", \
123 { "[-_a-z0-9.\\+]+@[-_a-z0-9.]+",\
127 const gchar account_title_forbidden_chars[] = {
128 '\\', '/', ':', '*', '?', '\'', '<', '>', '|', '^'
130 const gchar folder_name_forbidden_chars[] = {
131 '<', '>', ':', '\'', '/', '\\', '|', '?', '*', '^', '%', '$', '#', '&'
133 const gchar user_name_forbidden_chars[] = {
136 const guint ACCOUNT_TITLE_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (account_title_forbidden_chars);
137 const guint FOLDER_NAME_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (folder_name_forbidden_chars);
138 const guint USER_NAME_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (user_name_forbidden_chars);
141 static gchar* cite (const time_t sent_date, const gchar *from);
142 static void hyperlinkify_plain_text (GString *txt, gint offset);
143 static gint cmp_offsets_reverse (const url_match_t *match1, const url_match_t *match2);
144 static GSList* get_url_matches (GString *txt, gint offset);
146 static GString* get_next_line (const char *b, const gsize blen, const gchar * iter);
147 static int get_indent_level (const char *l);
148 static void unquote_line (GString * l);
149 static void append_quoted (GString * buf, const int indent, const GString * str,
151 static int get_breakpoint_utf8 (const gchar * s, const gint indent, const gint limit);
152 static int get_breakpoint_ascii (const gchar * s, const gint indent, const gint limit);
153 static int get_breakpoint (const gchar * s, const gint indent, const gint limit);
155 static gchar* modest_text_utils_quote_plain_text (const gchar *text,
157 const gchar *signature,
161 static gchar* modest_text_utils_quote_html (const gchar *text,
163 const gchar *signature,
166 static gchar* get_email_from_address (const gchar *address);
169 /* ******************************************************************* */
170 /* ************************* PUBLIC FUNCTIONS ************************ */
171 /* ******************************************************************* */
174 modest_text_utils_quote (const gchar *text,
175 const gchar *content_type,
176 const gchar *signature,
178 const time_t sent_date,
182 gchar *retval, *cited;
184 g_return_val_if_fail (text, NULL);
185 g_return_val_if_fail (content_type, NULL);
187 cited = cite (sent_date, from);
189 if (content_type && strcmp (content_type, "text/html") == 0)
190 /* TODO: extract the <body> of the HTML and pass it to
192 retval = modest_text_utils_quote_html (text, cited, signature, attachments, limit);
194 retval = modest_text_utils_quote_plain_text (text, cited, signature, attachments, limit);
203 modest_text_utils_cite (const gchar *text,
204 const gchar *content_type,
205 const gchar *signature,
212 g_return_val_if_fail (text, NULL);
213 g_return_val_if_fail (content_type, NULL);
216 retval = g_strdup ("");
217 else if (strcmp(content_type, "text/html") == 0) {
218 tmp_sig = g_strconcat ("\n", signature, NULL);
219 retval = modest_text_utils_convert_to_html_body(tmp_sig, -1, TRUE);
222 retval = g_strconcat (text, "\n", signature, NULL);
229 forward_cite (const gchar *from,
232 const gchar *subject)
234 g_return_val_if_fail (sent, NULL);
236 return g_strdup_printf ("%s\n%s %s\n%s %s\n%s %s\n%s %s\n",
238 FROM_STRING, (from)?from:"",
240 TO_STRING, (to)?to:"",
241 SUBJECT_STRING, (subject)?subject:"");
245 modest_text_utils_inline (const gchar *text,
246 const gchar *content_type,
247 const gchar *signature,
251 const gchar *subject)
257 g_return_val_if_fail (text, NULL);
258 g_return_val_if_fail (content_type, NULL);
260 modest_text_utils_strftime (sent_str, 100, "%c", sent_date);
262 cited = forward_cite (from, sent_str, to, subject);
264 if (content_type && strcmp (content_type, "text/html") == 0)
265 retval = modest_text_utils_quote_html (text, cited, signature, NULL, 80);
267 retval = modest_text_utils_quote_plain_text (text, cited, signature, NULL, 80);
273 /* just to prevent warnings:
274 * warning: `%x' yields only last 2 digits of year in some locales
277 modest_text_utils_strftime(char *s, gsize max, const char *fmt, time_t timet)
281 /* does not work on old maemo glib:
282 * g_date_set_time_t (&date, timet);
284 localtime_r (&timet, &tm);
285 return strftime(s, max, fmt, &tm);
289 modest_text_utils_derived_subject (const gchar *subject, const gchar *prefix)
293 g_return_val_if_fail (prefix, NULL);
295 if (!subject || subject[0] == '\0')
296 subject = _("mail_va_no_subject");
298 tmp = g_strchug (g_strdup (subject));
300 if (!strncmp (tmp, prefix, strlen (prefix))) {
304 return g_strdup_printf ("%s %s", prefix, subject);
309 modest_text_utils_remove_address (const gchar *address_list, const gchar *address)
311 gchar *dup, *token, *ptr = NULL, *result;
312 GString *filtered_emails;
313 gchar *email_address;
315 g_return_val_if_fail (address_list, NULL);
318 return g_strdup (address_list);
320 email_address = get_email_from_address (address);
322 /* search for substring */
323 if (!strstr ((const char *) address_list, (const char *) email_address)) {
324 g_free (email_address);
325 return g_strdup (address_list);
328 dup = g_strdup (address_list);
329 filtered_emails = g_string_new (NULL);
331 token = strtok_r (dup, ",", &ptr);
333 while (token != NULL) {
334 /* Add to list if not found */
335 if (!strstr ((const char *) token, (const char *) email_address)) {
336 if (filtered_emails->len == 0)
337 g_string_append_printf (filtered_emails, "%s", g_strstrip (token));
339 g_string_append_printf (filtered_emails, ",%s", g_strstrip (token));
341 token = strtok_r (NULL, ",", &ptr);
343 result = filtered_emails->str;
346 g_free (email_address);
348 g_string_free (filtered_emails, FALSE);
355 modest_text_utils_remove_duplicate_addresses (const gchar *address_list)
357 GSList *addresses, *cursor;
359 gchar *new_list = NULL;
361 g_return_val_if_fail (address_list, NULL);
363 table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
364 addresses = modest_text_utils_split_addresses_list (address_list);
368 const gchar* address = (const gchar*)cursor->data;
370 /* We need only the email to just compare it and not
371 the full address which would make "a <a@a.com>"
372 different from "a@a.com" */
373 const gchar *email = get_email_from_address (address);
375 /* ignore the address if already seen */
376 if (g_hash_table_lookup (table, email) == 0) {
379 /* Include the full address and not only the
380 email in the returned list */
382 tmp = g_strdup (address);
384 tmp = g_strjoin (",", new_list, address, NULL);
389 g_hash_table_insert (table, (gchar*)email, GINT_TO_POINTER(1));
391 cursor = g_slist_next (cursor);
394 g_hash_table_unref (table);
395 g_slist_foreach (addresses, (GFunc)g_free, NULL);
396 g_slist_free (addresses);
403 modest_text_utils_convert_buffer_to_html_start (GString *html, const gchar *data, gssize n)
406 gboolean space_seen = FALSE;
407 guint break_dist = 0; /* distance since last break point */
412 /* replace with special html chars where needed*/
413 for (i = 0; i != n; ++i) {
414 guchar kar = data[i];
416 if (space_seen && kar != ' ') {
417 g_string_append_c (html, ' ');
421 /* we artificially insert a breakpoint (newline)
422 * after 256, to make sure our lines are not so long
423 * they will DOS the regexping later
424 * Also, check that kar is ASCII to make sure that we
425 * don't break a UTF8 char in two
427 if (++break_dist >= 256 && kar < 127) {
428 g_string_append_c (html, '\n');
436 /* this is a temp place holder for '&'; we can only
437 * set the real '&' after hyperlink translation, otherwise
438 * we might screw that up */
439 break; /* ignore embedded \0s and MARK_AMP */
440 case '<' : g_string_append (html, MARK_AMP_STR "lt;"); break;
441 case '>' : g_string_append (html, MARK_AMP_STR "gt;"); break;
442 case '&' : g_string_append (html, MARK_AMP_URI_STR "amp;"); break; /* special case */
443 case '"' : g_string_append (html, MARK_AMP_STR "quot;"); break;
445 /* don't convert ' --> wpeditor will try to re-convert it... */
446 //case '\'' : g_string_append (html, "'"); break;
447 case '\n' : g_string_append (html, "<br>\n");break_dist= 0; break;
448 case '\t' : g_string_append (html, MARK_AMP_STR "nbsp;" MARK_AMP_STR "nbsp;" MARK_AMP_STR "nbsp; ");
449 break_dist=0; break; /* note the space at the end*/
452 if (space_seen) { /* second space in a row */
453 g_string_append (html, " ");
459 g_string_append_c (html, kar);
466 modest_text_utils_convert_buffer_to_html_finish (GString *html)
469 /* replace all our MARK_AMPs with real ones */
470 for (i = 0; i != html->len; ++i)
471 if ((html->str)[i] == MARK_AMP || (html->str)[i] == MARK_AMP_URI)
472 (html->str)[i] = '&';
477 modest_text_utils_convert_to_html (const gchar *data)
482 g_return_val_if_fail (data, NULL);
488 html = g_string_sized_new (1.5 * len); /* just a guess... */
490 g_string_append_printf (html,
492 "<meta http-equiv=\"content-type\" content=\"text/html; charset=utf8\">"
496 modest_text_utils_convert_buffer_to_html_start (html, data, -1);
498 g_string_append (html, "</body></html>");
500 if (len <= HYPERLINKIFY_MAX_LENGTH)
501 hyperlinkify_plain_text (html, 0);
503 modest_text_utils_convert_buffer_to_html_finish (html);
505 return g_string_free (html, FALSE);
509 modest_text_utils_convert_to_html_body (const gchar *data, gssize n, gboolean hyperlinkify)
513 g_return_val_if_fail (data, NULL);
520 html = g_string_sized_new (1.5 * n); /* just a guess... */
522 modest_text_utils_convert_buffer_to_html_start (html, data, n);
524 if (hyperlinkify && (n < HYPERLINKIFY_MAX_LENGTH))
525 hyperlinkify_plain_text (html, 0);
527 modest_text_utils_convert_buffer_to_html_finish (html);
529 return g_string_free (html, FALSE);
533 modest_text_utils_get_addresses_indexes (const gchar *addresses, GSList **start_indexes, GSList **end_indexes)
535 gchar *current, *start, *last_blank;
536 gint start_offset = 0, current_offset = 0;
538 g_return_if_fail (start_indexes != NULL);
539 g_return_if_fail (end_indexes != NULL);
541 start = (gchar *) addresses;
545 while (*current != '\0') {
546 if ((start == current)&&((*current == ' ')||(*current == ',')||(*current == ';'))) {
547 start = g_utf8_next_char (start);
549 last_blank = current;
550 } else if ((*current == ',')||(*current == ';')) {
551 gint *start_index, *end_index;
552 start_index = g_new0(gint, 1);
553 end_index = g_new0(gint, 1);
554 *start_index = start_offset;
555 *end_index = current_offset;
556 *start_indexes = g_slist_prepend (*start_indexes, start_index);
557 *end_indexes = g_slist_prepend (*end_indexes, end_index);
558 start = g_utf8_next_char (current);
559 start_offset = current_offset + 1;
561 } else if (*current == '"') {
562 current = g_utf8_next_char (current);
564 while ((*current != '"')&&(*current != '\0')) {
565 current = g_utf8_next_char (current);
570 current = g_utf8_next_char (current);
574 if (start != current) {
575 gint *start_index, *end_index;
576 start_index = g_new0(gint, 1);
577 end_index = g_new0(gint, 1);
578 *start_index = start_offset;
579 *end_index = current_offset;
580 *start_indexes = g_slist_prepend (*start_indexes, start_index);
581 *end_indexes = g_slist_prepend (*end_indexes, end_index);
584 *start_indexes = g_slist_reverse (*start_indexes);
585 *end_indexes = g_slist_reverse (*end_indexes);
592 modest_text_utils_split_addresses_list (const gchar *addresses)
595 const gchar *my_addrs = addresses;
599 g_return_val_if_fail (addresses, NULL);
601 /* skip any space, ',', ';' at the start */
602 while (my_addrs && (my_addrs[0] == ' ' || my_addrs[0] == ',' || my_addrs[0] == ';'))
605 /* are we at the end of addresses list? */
609 /* nope, we are at the start of some address
610 * now, let's find the end of the address */
612 while (end[0] && end[0] != ',' && end[0] != ';')
615 /* we got the address; copy it and remove trailing whitespace */
616 addr = g_strndup (my_addrs, end - my_addrs);
619 head = g_slist_append (NULL, addr);
620 head->next = modest_text_utils_split_addresses_list (end); /* recurse */
627 modest_text_utils_address_range_at_position (const gchar *recipients_list,
632 gchar *current = NULL;
633 gint range_start = 0;
636 gboolean is_quoted = FALSE;
638 g_return_if_fail (recipients_list);
639 g_return_if_fail (position < g_utf8_strlen(recipients_list, -1));
642 for (current = (gchar *) recipients_list; *current != '\0';
643 current = g_utf8_find_next_char (current, NULL)) {
644 gunichar c = g_utf8_get_char (current);
646 if ((c == ',') && (!is_quoted)) {
647 if (index < position) {
648 range_start = index + 1;
652 } else if (c == '\"') {
653 is_quoted = !is_quoted;
654 } else if ((c == ' ') &&(range_start == index)) {
662 *start = range_start;
668 modest_text_utils_address_with_standard_length (const gchar *recipients_list)
672 GString *buffer = g_string_new ("");
674 splitted = g_strsplit (recipients_list, "\n", 0);
678 if (current != splitted)
679 buffer = g_string_append_c (buffer, '\n');
680 line = g_strndup (*splitted, 1000);
681 buffer = g_string_append (buffer, line);
686 g_strfreev (splitted);
688 return g_string_free (buffer, FALSE);
692 /* ******************************************************************* */
693 /* ************************* UTILIY FUNCTIONS ************************ */
694 /* ******************************************************************* */
697 get_next_line (const gchar * b, const gsize blen, const gchar * iter)
703 return g_string_new("");
711 gs = g_string_new_len (i0, iter - i0);
715 get_indent_level (const char *l)
732 /* if we hit the signature marker "-- ", we return -(indent + 1). This
733 * stops reformatting.
735 if (strcmp (l, "-- ") == 0) {
743 unquote_line (GString * l)
758 g_string_erase (l, 0, p - l->str);
762 append_quoted (GString * buf, int indent, const GString * str,
767 indent = indent < 0 ? abs (indent) - 1 : indent;
768 for (i = 0; i <= indent; i++) {
769 g_string_append (buf, "> ");
772 g_string_append_len (buf, str->str, cutpoint);
774 g_string_append (buf, str->str);
776 g_string_append (buf, "\n");
780 get_breakpoint_utf8 (const gchar * s, gint indent, const gint limit)
783 const gchar *pos, *last;
786 indent = indent < 0 ? abs (indent) - 1 : indent;
790 uni = g_utf8_to_ucs4_fast (s, -1, NULL);
792 if ((index + 2 * indent > limit) && last) {
796 if (g_unichar_isspace (uni[index])) {
799 pos = g_utf8_next_char (pos);
807 get_breakpoint_ascii (const gchar * s, const gint indent, const gint limit)
812 if (last + 2 * indent < limit)
815 for (i = strlen (s); i > 0; i--) {
817 if (i + 2 * indent <= limit) {
828 get_breakpoint (const gchar * s, const gint indent, const gint limit)
831 if (g_utf8_validate (s, -1, NULL)) {
832 return get_breakpoint_utf8 (s, indent, limit);
833 } else { /* assume ASCII */
834 //g_warning("invalid UTF-8 in msg");
835 return get_breakpoint_ascii (s, indent, limit);
840 cite (const time_t sent_date, const gchar *from)
842 return g_strdup (_("mcen_ia_editor_original_message"));
846 quoted_attachments (GList *attachments)
849 GString *result = g_string_new ("");
850 for (node = attachments; node != NULL; node = g_list_next (node)) {
851 gchar *filename = (gchar *) node->data;
852 g_string_append_printf ( result, "%s %s\n", _("mcen_ia_editor_attach_filename"), filename);
855 return g_string_free (result, FALSE);
860 modest_text_utils_quote_plain_text (const gchar *text,
862 const gchar *signature,
867 gint indent, breakpoint, rem_indent = 0;
868 GString *q, *l, *remaining;
870 gchar *attachments_string = NULL;
872 q = g_string_new ("\n");
873 if (signature != NULL) {
874 q = g_string_append (q, signature);
875 q = g_string_append_c (q, '\n');
877 q = g_string_append (q, cite);
878 q = g_string_append_c (q, '\n');
880 /* remaining will store the rest of the line if we have to break it */
881 remaining = g_string_new ("");
886 l = get_next_line (text, len, iter);
887 iter = iter + l->len + 1;
888 indent = get_indent_level (l->str);
891 if (remaining->len) {
892 if (l->len && indent == rem_indent) {
893 g_string_prepend (l, " ");
894 g_string_prepend (l, remaining->str);
898 get_breakpoint (remaining->str,
901 append_quoted (q, rem_indent,
902 remaining, breakpoint);
903 g_string_erase (remaining, 0,
905 if (remaining->str[0] == ' ') {
906 g_string_erase (remaining, 0,
909 } while (remaining->len);
912 g_string_free (remaining, TRUE);
913 breakpoint = get_breakpoint (l->str, indent, limit);
914 remaining = g_string_new (l->str + breakpoint);
915 if (remaining->str[0] == ' ') {
916 g_string_erase (remaining, 0, 1);
919 append_quoted (q, indent, l, breakpoint);
920 g_string_free (l, TRUE);
921 } while ((iter < text + len) || (remaining->str[0]));
923 attachments_string = quoted_attachments (attachments);
924 q = g_string_append (q, attachments_string);
925 g_free (attachments_string);
927 return g_string_free (q, FALSE);
931 modest_text_utils_quote_html (const gchar *text,
933 const gchar *signature,
937 gchar *result = NULL;
938 gchar *signature_result = NULL;
939 const gchar *format = \
940 "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n" \
944 "<pre>%s<br/>%s<br/>%s</pre>\n" \
947 gchar *attachments_string = NULL;
948 gchar *q_attachments_string = NULL;
949 gchar *q_cite = NULL;
950 gchar *html_text = NULL;
952 if (signature == NULL)
953 signature_result = g_strdup ("");
955 signature_result = modest_text_utils_convert_to_html_body (signature, -1, TRUE);
957 attachments_string = quoted_attachments (attachments);
958 q_attachments_string = modest_text_utils_convert_to_html_body (attachments_string, -1, TRUE);
959 q_cite = modest_text_utils_convert_to_html_body (cite, -1, TRUE);
960 html_text = modest_text_utils_convert_to_html_body (text, -1, TRUE);
961 result = g_strdup_printf (format, signature_result, q_cite, html_text, q_attachments_string);
964 g_free (attachments_string);
965 g_free (q_attachments_string);
966 g_free (signature_result);
972 cmp_offsets_reverse (const url_match_t *match1, const url_match_t *match2)
974 return match2->offset - match1->offset;
977 static gboolean url_matches_block = 0;
978 static url_match_pattern_t patterns[] = MAIL_VIEWER_URL_MATCH_PATTERNS;
985 const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
986 for (i = 0; i != pattern_num; ++i) {
987 patterns[i].preg = g_slice_new0 (regex_t);
989 /* this should not happen */
990 if (regcomp (patterns[i].preg, patterns[i].regex,
991 REG_ICASE|REG_EXTENDED|REG_NEWLINE) != 0) {
992 g_warning ("%s: error in regexp:\n%s\n", __FUNCTION__, patterns[i].regex);
1003 const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
1004 for (i = 0; i != pattern_num; ++i) {
1005 regfree (patterns[i].preg);
1006 g_slice_free (regex_t, patterns[i].preg);
1007 } /* don't free patterns itself -- it's static */
1011 modest_text_utils_hyperlinkify_begin (void)
1013 if (url_matches_block == 0)
1014 compile_patterns ();
1015 url_matches_block ++;
1019 modest_text_utils_hyperlinkify_end (void)
1021 url_matches_block--;
1022 if (url_matches_block <= 0)
1028 get_url_matches (GString *txt, gint offset)
1031 guint rv, i, tmp_offset = 0;
1032 GSList *match_list = NULL;
1034 const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
1036 /* initalize the regexps */
1037 modest_text_utils_hyperlinkify_begin ();
1039 /* find all the matches */
1040 for (i = 0; i != pattern_num; ++i) {
1041 tmp_offset = offset;
1044 gboolean is_submatch;
1047 if ((rv = regexec (patterns[i].preg, txt->str + tmp_offset, 1, &rm, 0)) != 0) {
1048 g_return_val_if_fail (rv == REG_NOMATCH, NULL); /* this should not happen */
1049 break; /* try next regexp */
1054 is_submatch = FALSE;
1055 /* check old matches to see if this has already been matched */
1056 cursor = match_list;
1057 while (cursor && !is_submatch) {
1058 const url_match_t *old_match =
1059 (const url_match_t *) cursor->data;
1060 guint new_offset = tmp_offset + rm.rm_so;
1061 is_submatch = (new_offset > old_match->offset &&
1062 new_offset < old_match->offset + old_match->len);
1063 cursor = g_slist_next (cursor);
1067 /* make a list of our matches (<offset, len, prefix> tupels)*/
1068 match = g_slice_new (url_match_t);
1069 match->offset = tmp_offset + rm.rm_so;
1070 match->len = rm.rm_eo - rm.rm_so;
1071 match->prefix = patterns[i].prefix;
1072 match_list = g_slist_prepend (match_list, match);
1074 tmp_offset += rm.rm_eo;
1078 modest_text_utils_hyperlinkify_end ();
1080 /* now sort the list, so the matches are in reverse order of occurence.
1081 * that way, we can do the replacements starting from the end, so we don't need
1082 * to recalculate the offsets
1084 match_list = g_slist_sort (match_list,
1085 (GCompareFunc)cmp_offsets_reverse);
1091 /* replace all occurences of needle in haystack with repl*/
1093 replace_string (const gchar *haystack, const gchar *needle, gchar repl)
1095 gchar *str, *cursor;
1097 if (!haystack || !needle || strlen(needle) == 0)
1098 return haystack ? g_strdup(haystack) : NULL;
1100 str = g_strdup (haystack);
1102 for (cursor = str; cursor && *cursor; ++cursor) {
1103 if (g_str_has_prefix (cursor, needle)) {
1105 memmove (cursor + 1,
1106 cursor + strlen (needle),
1107 strlen (cursor + strlen (needle)) + 1);
1115 hyperlinkify_plain_text (GString *txt, gint offset)
1118 GSList *match_list = get_url_matches (txt, offset);
1120 /* we will work backwards, so the offsets stay valid */
1121 for (cursor = match_list; cursor; cursor = cursor->next) {
1123 url_match_t *match = (url_match_t*) cursor->data;
1124 gchar *url = g_strndup (txt->str + match->offset, match->len);
1125 gchar *repl = NULL; /* replacement */
1127 /* the string still contains $(MARK_AMP_URI_STR)"amp;" for each
1128 * '&' in the original, because of the text->html conversion.
1129 * in the href-URL (and only there), we must convert that back to
1132 gchar *href_url = replace_string (url, MARK_AMP_URI_STR "amp;", '&');
1134 /* the prefix is NULL: use the one that is already there */
1135 repl = g_strdup_printf ("<a href=\"%s%s\">%s</a>",
1136 match->prefix ? match->prefix : EMPTY_STRING,
1139 /* replace the old thing with our hyperlink
1140 * replacement thing */
1141 g_string_erase (txt, match->offset, match->len);
1142 g_string_insert (txt, match->offset, repl);
1148 g_slice_free (url_match_t, match);
1151 g_slist_free (match_list);
1155 modest_text_utils_hyperlinkify (GString *string_buffer)
1160 after_body = strstr (string_buffer->str, "<body>");
1161 if (after_body != NULL)
1162 offset = after_body - string_buffer->str;
1163 hyperlinkify_plain_text (string_buffer, offset);
1167 /* for optimization reasons, we change the string in-place */
1169 modest_text_utils_get_display_address (gchar *address)
1173 g_return_if_fail (address);
1178 /* should not be needed, and otherwise, we probably won't screw up the address
1179 * more than it already is :)
1180 * g_return_val_if_fail (g_utf8_validate (address, -1, NULL), NULL);
1183 /* remove leading whitespace */
1184 if (address[0] == ' ')
1185 g_strchug (address);
1187 for (i = 0; address[i]; ++i) {
1188 if (address[i] == '<') {
1189 if (G_UNLIKELY(i == 0))
1190 return; /* there's nothing else, leave it */
1192 address[i] = '\0'; /* terminate the string here */
1204 modest_text_utils_get_email_address (const gchar *full_address)
1206 const gchar *left, *right;
1208 g_return_val_if_fail (full_address, NULL);
1213 g_return_val_if_fail (g_utf8_validate (full_address, -1, NULL), NULL);
1215 left = g_strrstr_len (full_address, strlen(full_address), "<");
1217 return g_strdup (full_address);
1219 right = g_strstr_len (left, strlen(left), ">");
1221 return g_strdup (full_address);
1223 return g_strndup (left + 1, right - left - 1);
1227 modest_text_utils_get_subject_prefix_len (const gchar *sub)
1229 gint prefix_len = 0;
1231 g_return_val_if_fail (sub, 0);
1236 /* optimization: "Re", "RE", "re","Fwd", "FWD", "fwd","FW","Fw", "fw" */
1237 if (sub[0] != 'R' && sub[0] != 'F' && sub[0] != 'r' && sub[0] != 'f')
1239 else if (sub[0] && sub[1] != 'e' && sub[1] != 'E' && sub[1] != 'w' && sub[1] != 'W')
1246 /* skip over a [...] block */
1247 if (sub[prefix_len] == '[') {
1248 int c = prefix_len + 1;
1249 while (sub[c] && sub[c] != ']')
1252 return 0; /* no end to the ']' found */
1257 /* did we find the ':' ? */
1258 if (sub[prefix_len] == ':') {
1260 if (sub[prefix_len] == ' ')
1262 prefix_len += modest_text_utils_get_subject_prefix_len (sub + prefix_len);
1263 /* g_warning ("['%s','%s']", sub, (char*) sub + prefix_len); */
1271 modest_text_utils_utf8_strcmp (const gchar* s1, const gchar *s2, gboolean insensitive)
1274 /* work even when s1 and/or s2 == NULL */
1275 if (G_UNLIKELY(s1 == s2))
1277 if (G_UNLIKELY(!s1))
1279 if (G_UNLIKELY(!s2))
1282 /* if it's not case sensitive */
1285 /* optimization: shortcut if first char is ascii */
1286 if (((s1[0] & 0xf0)== 0) && ((s2[0] & 0xf0) == 0))
1287 return s1[0] - s2[0];
1289 return g_utf8_collate (s1, s2);
1295 /* optimization: short cut iif first char is ascii */
1296 if (((s1[0] & 0xf0) == 0) && ((s2[0] & 0xf0) == 0))
1297 return tolower(s1[0]) - tolower(s2[0]);
1299 n1 = g_utf8_strdown (s1, -1);
1300 n2 = g_utf8_strdown (s2, -1);
1302 result = g_utf8_collate (n1, n2);
1313 modest_text_utils_get_display_date (time_t date)
1315 #define DATE_BUF_SIZE 64
1316 static gchar date_buf[DATE_BUF_SIZE];
1318 /* calculate the # of days since epoch for
1319 * for today and for the date provided
1320 * based on idea from pvanhoof */
1321 int day = time(NULL) / (24 * 60 * 60);
1322 int date_day = date / (24 * 60 * 60);
1324 /* if it's today, show the time, if it's not today, show the date instead */
1326 if (day == date_day) /* is the date today? */
1327 modest_text_utils_strftime (date_buf, DATE_BUF_SIZE, "%X", date);
1329 modest_text_utils_strftime (date_buf, DATE_BUF_SIZE, "%x", date);
1331 return date_buf; /* this is a static buffer, don't free! */
1337 modest_text_utils_validate_folder_name (const gchar *folder_name)
1339 /* based on http://msdn2.microsoft.com/en-us/library/aa365247.aspx,
1340 * with some extras */
1344 const gchar **cursor = NULL;
1345 const gchar *forbidden_names[] = { /* windows does not like these */
1346 "CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6",
1347 "COM7", "COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
1348 ".", "..", "cur", "tmp", "new", NULL /* cur, tmp new are reserved for Maildir */
1351 /* cannot be NULL */
1355 /* cannot be empty */
1356 len = strlen(folder_name);
1360 /* cannot start with a dot, vfat does not seem to like that */
1361 if (folder_name[0] == '.')
1364 /* cannot start or end with a space */
1365 if (g_ascii_isspace(folder_name[0]) || g_ascii_isspace(folder_name[len - 1]))
1368 /* cannot contain a forbidden char */
1369 for (i = 0; i < len; i++)
1370 if (modest_text_utils_is_forbidden_char (folder_name[i], FOLDER_NAME_FORBIDDEN_CHARS))
1373 /* cannot contain a forbidden word */
1375 for (cursor = forbidden_names; cursor && *cursor; ++cursor) {
1376 if (g_ascii_strcasecmp (folder_name, *cursor) == 0)
1381 return TRUE; /* it's valid! */
1387 modest_text_utils_validate_domain_name (const gchar *domain)
1389 gboolean valid = FALSE;
1391 const gchar* domain_regex = "^([a-z0-9-]*[a-z0-9]\\.)+[a-z0-9-]*[a-z0-9]$";
1393 g_return_val_if_fail (domain, FALSE);
1398 memset (&rx, 0, sizeof(regex_t)); /* coverity wants this... */
1400 /* domain name: all alphanum or '-' or '.',
1401 * but beginning/ending in alphanum */
1402 if (regcomp (&rx, domain_regex, REG_ICASE|REG_EXTENDED|REG_NOSUB)) {
1403 g_warning ("BUG: error in regexp");
1407 valid = (regexec (&rx, domain, 1, NULL, 0) == 0);
1416 modest_text_utils_validate_email_address (const gchar *email_address,
1417 const gchar **invalid_char_position)
1420 const gchar *c = NULL, *domain = NULL;
1421 static gchar *rfc822_specials = "()<>@,;:\\\"[]&";
1423 if (invalid_char_position)
1424 *invalid_char_position = NULL;
1426 g_return_val_if_fail (email_address, FALSE);
1428 /* check that the email adress contains exactly one @ */
1429 if (!strstr(email_address, "@") ||
1430 (strstr(email_address, "@") != g_strrstr(email_address, "@")))
1433 /* first we validate the name portion (name@domain) */
1434 for (c = email_address; *c; c++) {
1436 (c == email_address ||
1438 *(c - 1) == '\"')) {
1442 if (*c == '\\' && (*++c == ' '))
1444 if (*c <= ' ' || *c >= 127)
1457 if (*c <= ' ' || *c >= 127)
1459 if (strchr(rfc822_specials, *c)) {
1460 if (invalid_char_position)
1461 *invalid_char_position = c;
1465 if (c == email_address || *(c - 1) == '.')
1468 /* next we validate the domain portion (name@domain) */
1469 if (!*(domain = ++c))
1473 if (c == domain || *(c - 1) == '.' || *(c + 1) == '\0')
1477 if (*c <= ' ' || *c >= 127)
1479 if (strchr(rfc822_specials, *c)) {
1480 if (invalid_char_position)
1481 *invalid_char_position = c;
1486 return (count >= 1) ? TRUE : FALSE;
1490 modest_text_utils_validate_recipient (const gchar *recipient, const gchar **invalid_char_position)
1492 gchar *stripped, *current;
1494 gboolean has_error = FALSE;
1496 if (invalid_char_position)
1497 *invalid_char_position = NULL;
1499 g_return_val_if_fail (recipient, FALSE);
1501 if (modest_text_utils_validate_email_address (recipient, invalid_char_position))
1504 stripped = g_strdup (recipient);
1505 stripped = g_strstrip (stripped);
1508 if (*current == '\0') {
1514 if (*current == '\"') {
1515 current = g_utf8_next_char (current);
1517 for (; *current != '\0'; current = g_utf8_next_char (current)) {
1518 if (*current == '\\') {
1519 /* TODO: This causes a warning, which breaks the build,
1520 * because a gchar cannot be < 0.
1522 if (current[1] <0) {
1527 } else if (*current == '\"') {
1529 current = g_utf8_next_char (current);
1535 for (current = stripped ; *current != '\0'; current = g_utf8_next_char (current)) {
1536 if (*current == '<') {
1548 right_part = g_strdup (current);
1550 right_part = g_strstrip (right_part);
1552 if (g_str_has_prefix (right_part, "<") &&
1553 g_str_has_suffix (right_part, ">")) {
1557 address = g_strndup (right_part+1, strlen (right_part) - 2);
1558 g_free (right_part);
1559 valid = modest_text_utils_validate_email_address (address, invalid_char_position);
1563 g_free (right_part);
1570 modest_text_utils_get_display_size (guint64 size)
1572 const guint KB=1024;
1573 const guint MB=1024 * KB;
1574 const guint GB=1024 * MB;
1577 return g_strdup_printf(_FM("sfil_li_size_kb"), 0);
1578 if (0 < size && size < KB)
1579 return g_strdup_printf (_FM("sfil_li_size_kb"), 1);
1580 else if (KB <= size && size < 100 * KB)
1581 return g_strdup_printf (_FM("sfil_li_size_1kb_99kb"), size / KB);
1582 else if (100*KB <= size && size < MB)
1583 return g_strdup_printf (_FM("sfil_li_size_100kb_1mb"), (float) size / MB);
1584 else if (MB <= size && size < 10*MB)
1585 return g_strdup_printf (_FM("sfil_li_size_1mb_10mb"), (float) size / MB);
1586 else if (10*MB <= size && size < GB)
1587 return g_strdup_printf (_FM("sfil_li_size_10mb_1gb"), size / MB);
1589 return g_strdup_printf (_FM("sfil_li_size_1gb_or_greater"), (float) size / GB);
1593 get_email_from_address (const gchar * address)
1595 gchar *left_limit, *right_limit;
1597 left_limit = strstr (address, "<");
1598 right_limit = g_strrstr (address, ">");
1600 if ((left_limit == NULL)||(right_limit == NULL)|| (left_limit > right_limit))
1601 return g_strdup (address);
1603 return g_strndup (left_limit + 1, (right_limit - left_limit) - 1);
1607 modest_text_utils_get_color_string (GdkColor *color)
1609 g_return_val_if_fail (color, NULL);
1611 return g_strdup_printf ("#%x%x%x%x%x%x%x%x%x%x%x%x",
1612 (color->red >> 12) & 0xf, (color->red >> 8) & 0xf,
1613 (color->red >> 4) & 0xf, (color->red) & 0xf,
1614 (color->green >> 12) & 0xf, (color->green >> 8) & 0xf,
1615 (color->green >> 4) & 0xf, (color->green) & 0xf,
1616 (color->blue >> 12) & 0xf, (color->blue >> 8) & 0xf,
1617 (color->blue >> 4) & 0xf, (color->blue) & 0xf);
1621 modest_text_utils_text_buffer_get_text (GtkTextBuffer *buffer)
1623 GtkTextIter start, end;
1624 gchar *slice, *current;
1625 GString *result = g_string_new ("");
1627 g_return_val_if_fail (buffer && GTK_IS_TEXT_BUFFER (buffer), NULL);
1629 gtk_text_buffer_get_start_iter (buffer, &start);
1630 gtk_text_buffer_get_end_iter (buffer, &end);
1632 slice = gtk_text_buffer_get_slice (buffer, &start, &end, FALSE);
1635 while (current && current != '\0') {
1636 if (g_utf8_get_char (current) == 0xFFFC) {
1637 result = g_string_append_c (result, ' ');
1638 current = g_utf8_next_char (current);
1640 gchar *next = g_utf8_strchr (current, -1, 0xFFFC);
1642 result = g_string_append (result, current);
1644 result = g_string_append_len (result, current, next - current);
1651 return g_string_free (result, FALSE);
1656 modest_text_utils_is_forbidden_char (const gchar character,
1657 ModestTextUtilsForbiddenCharType type)
1660 const gchar *forbidden_chars = NULL;
1662 /* We need to get the length in the switch because the
1663 compiler needs to know the size at compile time */
1665 case ACCOUNT_TITLE_FORBIDDEN_CHARS:
1666 forbidden_chars = account_title_forbidden_chars;
1667 len = G_N_ELEMENTS (account_title_forbidden_chars);
1669 case FOLDER_NAME_FORBIDDEN_CHARS:
1670 forbidden_chars = folder_name_forbidden_chars;
1671 len = G_N_ELEMENTS (folder_name_forbidden_chars);
1673 case USER_NAME_FORBIDDEN_NAMES:
1674 forbidden_chars = user_name_forbidden_chars;
1675 len = G_N_ELEMENTS (user_name_forbidden_chars);
1678 g_return_val_if_reached (TRUE);
1681 for (i = 0; i < len ; i++)
1682 if (forbidden_chars[i] == character)
1685 return FALSE; /* it's valid! */
1689 modest_text_utils_label_get_selection (GtkLabel *label)
1694 if (gtk_label_get_selection_bounds (GTK_LABEL (label), &start, &end)) {
1695 const gchar *start_offset;
1696 const gchar *end_offset;
1697 start_offset = gtk_label_get_text (GTK_LABEL (label));
1698 start_offset = g_utf8_offset_to_pointer (start_offset, start);
1699 end_offset = gtk_label_get_text (GTK_LABEL (label));
1700 end_offset = g_utf8_offset_to_pointer (end_offset, end);
1701 selection = g_strndup (start_offset, end_offset - start_offset);
1704 return g_strdup ("");
1709 _forward_search_image_char (gunichar ch,
1712 return (ch == 0xFFFC);
1716 modest_text_utils_buffer_selection_is_valid (GtkTextBuffer *buffer)
1719 GtkTextIter start, end;
1721 g_return_val_if_fail (GTK_IS_TEXT_BUFFER (buffer), FALSE);
1723 result = gtk_text_buffer_get_has_selection (GTK_TEXT_BUFFER (buffer));
1725 /* check there are no images in selection */
1727 gtk_text_buffer_get_selection_bounds (buffer, &start, &end);
1728 if (gtk_text_iter_get_char (&start)== 0xFFFC)
1731 gtk_text_iter_backward_char (&end);
1732 if (gtk_text_iter_forward_find_char (&start, _forward_search_image_char,
1743 modest_text_utils_escape_mnemonics (const gchar *text)
1746 GString *result = NULL;
1751 result = g_string_new ("");
1752 for (p = text; *p != '\0'; p++) {
1754 result = g_string_append (result, "__");
1756 result = g_string_append_c (result, *p);
1759 return g_string_free (result, FALSE);