1 /* Copyright (c) 2006, Nokia Corporation
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of the Nokia Corporation nor the names of its
14 * contributors may be used to endorse or promote products derived from
15 * this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
18 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
20 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
21 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #endif /*_GNU_SOURCE*/
35 #include <string.h> /* for strcasestr */
40 #include <glib/gi18n.h>
42 #include <modest-tny-platform-factory.h>
43 #include <modest-text-utils.h>
44 #include <modest-runtime.h>
49 #endif /*HAVE_CONFIG_H */
52 #define FORWARD_STRING _("mcen_ia_editor_original_message")
53 #define FROM_STRING _("mail_va_from")
54 #define SENT_STRING _("mcen_fi_message_properties_sent")
55 #define TO_STRING _("mail_va_to")
56 #define SUBJECT_STRING _("mail_va_subject")
57 #define EMPTY_STRING ""
60 * do the hyperlinkification only for texts < 50 Kb,
61 * as it's quite slow. Without this, e.g. mail with
62 * an uuencoded part (which is not recognized as attachment,
65 #define HYPERLINKIFY_MAX_LENGTH (1024*50)
70 * we need these regexps to find URLs in plain text e-mails
72 typedef struct _url_match_pattern_t url_match_pattern_t;
73 struct _url_match_pattern_t {
79 typedef struct _url_match_t url_match_t;
88 * we mark the ampersand with \007 when converting text->html
89 * because after text->html we do hyperlink detecting, which
90 * could be screwed up by the ampersand.
91 * ie. 1<3 ==> 1\007lt;3
93 #define MARK_AMP '\007'
94 #define MARK_AMP_STR "\007"
96 /* mark & separately, because they are parts of urls.
97 * ie. a&b => a\006amp;b, but a>b => a\007gt;b
99 * we need to handle '&' separately, because it can be part of URIs
100 * (as in href="http://foo.bar?a=1&b=1"), so inside those URIs
101 * we need to re-replace \006amp; with '&' again, while outside uri's
104 * yes, it's messy, but a consequence of doing text->html first, then hyperlinkify
106 #define MARK_AMP_URI '\006'
107 #define MARK_AMP_URI_STR "\006"
110 /* note: match MARK_AMP_URI_STR as well, because after txt->html, a '&' will look like $(MARK_AMP_URI_STR)"amp;" */
111 #define MAIL_VIEWER_URL_MATCH_PATTERNS { \
112 { "(file|rtsp|http|ftp|https|mms|mmsh|rtsp|rdp|lastfm)://[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR \
113 "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]", \
115 { "www\\.[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]",\
117 { "ftp\\.[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]",\
119 { "(jabberto|voipto|sipto|sip|chatto|xmpp):[-_a-z@0-9.+]+", \
121 { "mailto:[-_a-z0-9.\\+]+@[-_a-z0-9.]+", \
123 { "[-_a-z0-9.\\+]+@[-_a-z0-9.]+",\
127 const gchar account_title_forbidden_chars[] = {
128 '\\', '/', ':', '*', '?', '\'', '<', '>', '|', '^'
130 const gchar folder_name_forbidden_chars[] = {
131 '<', '>', ':', '\'', '/', '\\', '|', '?', '*', '^', '%', '$', '#', '&'
133 const gchar user_name_forbidden_chars[] = {
136 const guint ACCOUNT_TITLE_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (account_title_forbidden_chars);
137 const guint FOLDER_NAME_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (folder_name_forbidden_chars);
138 const guint USER_NAME_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (user_name_forbidden_chars);
141 static gchar* cite (const time_t sent_date, const gchar *from);
142 static void hyperlinkify_plain_text (GString *txt, gint offset);
143 static gint cmp_offsets_reverse (const url_match_t *match1, const url_match_t *match2);
144 static GSList* get_url_matches (GString *txt, gint offset);
146 static GString* get_next_line (const char *b, const gsize blen, const gchar * iter);
147 static int get_indent_level (const char *l);
148 static void unquote_line (GString * l);
149 static void append_quoted (GString * buf, const int indent, const GString * str,
151 static int get_breakpoint_utf8 (const gchar * s, const gint indent, const gint limit);
152 static int get_breakpoint_ascii (const gchar * s, const gint indent, const gint limit);
153 static int get_breakpoint (const gchar * s, const gint indent, const gint limit);
155 static gchar* modest_text_utils_quote_plain_text (const gchar *text,
157 const gchar *signature,
161 static gchar* modest_text_utils_quote_html (const gchar *text,
163 const gchar *signature,
166 static gchar* get_email_from_address (const gchar *address);
169 /* ******************************************************************* */
170 /* ************************* PUBLIC FUNCTIONS ************************ */
171 /* ******************************************************************* */
174 modest_text_utils_quote (const gchar *text,
175 const gchar *content_type,
176 const gchar *signature,
178 const time_t sent_date,
182 gchar *retval, *cited;
184 g_return_val_if_fail (text, NULL);
185 g_return_val_if_fail (content_type, NULL);
187 cited = cite (sent_date, from);
189 if (content_type && strcmp (content_type, "text/html") == 0)
190 /* TODO: extract the <body> of the HTML and pass it to
192 retval = modest_text_utils_quote_html (text, cited, signature, attachments, limit);
194 retval = modest_text_utils_quote_plain_text (text, cited, signature, attachments, limit);
203 modest_text_utils_cite (const gchar *text,
204 const gchar *content_type,
205 const gchar *signature,
212 g_return_val_if_fail (text, NULL);
213 g_return_val_if_fail (content_type, NULL);
216 retval = g_strdup ("");
217 else if (strcmp(content_type, "text/html") == 0) {
218 tmp_sig = g_strconcat ("\n", signature, NULL);
219 retval = modest_text_utils_convert_to_html_body(tmp_sig, -1, TRUE);
222 retval = g_strconcat (text, "\n", signature, NULL);
229 forward_cite (const gchar *from,
232 const gchar *subject)
234 g_return_val_if_fail (sent, NULL);
236 return g_strdup_printf ("%s\n%s %s\n%s %s\n%s %s\n%s %s\n",
238 FROM_STRING, (from)?from:"",
240 TO_STRING, (to)?to:"",
241 SUBJECT_STRING, (subject)?subject:"");
245 modest_text_utils_inline (const gchar *text,
246 const gchar *content_type,
247 const gchar *signature,
251 const gchar *subject)
257 g_return_val_if_fail (text, NULL);
258 g_return_val_if_fail (content_type, NULL);
260 modest_text_utils_strftime (sent_str, 100, "%c", sent_date);
262 cited = forward_cite (from, sent_str, to, subject);
264 if (content_type && strcmp (content_type, "text/html") == 0)
265 retval = modest_text_utils_quote_html (text, cited, signature, NULL, 80);
267 retval = modest_text_utils_quote_plain_text (text, cited, signature, NULL, 80);
273 /* just to prevent warnings:
274 * warning: `%x' yields only last 2 digits of year in some locales
277 modest_text_utils_strftime(char *s, gsize max, const char *fmt, time_t timet)
281 /* does not work on old maemo glib:
282 * g_date_set_time_t (&date, timet);
284 localtime_r (&timet, &tm);
285 return strftime(s, max, fmt, &tm);
289 modest_text_utils_derived_subject (const gchar *subject, const gchar *prefix)
293 g_return_val_if_fail (prefix, NULL);
295 if (!subject || subject[0] == '\0')
296 subject = _("mail_va_no_subject");
298 tmp = g_strchug (g_strdup (subject));
300 if (!strncmp (tmp, prefix, strlen (prefix))) {
304 return g_strdup_printf ("%s %s", prefix, subject);
309 modest_text_utils_remove_address (const gchar *address_list, const gchar *address)
311 gchar *dup, *token, *ptr = NULL, *result;
312 GString *filtered_emails;
313 gchar *email_address;
315 g_return_val_if_fail (address_list, NULL);
318 return g_strdup (address_list);
320 email_address = get_email_from_address (address);
322 /* search for substring */
323 if (!strstr ((const char *) address_list, (const char *) email_address)) {
324 g_free (email_address);
325 return g_strdup (address_list);
328 dup = g_strdup (address_list);
329 filtered_emails = g_string_new (NULL);
331 token = strtok_r (dup, ",", &ptr);
333 while (token != NULL) {
334 /* Add to list if not found */
335 if (!strstr ((const char *) token, (const char *) email_address)) {
336 if (filtered_emails->len == 0)
337 g_string_append_printf (filtered_emails, "%s", g_strstrip (token));
339 g_string_append_printf (filtered_emails, ",%s", g_strstrip (token));
341 token = strtok_r (NULL, ",", &ptr);
343 result = filtered_emails->str;
346 g_free (email_address);
348 g_string_free (filtered_emails, FALSE);
355 modest_text_utils_remove_duplicate_addresses (const gchar *address_list)
357 GSList *addresses, *cursor;
359 gchar *new_list = NULL;
361 g_return_val_if_fail (address_list, NULL);
363 table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
364 addresses = modest_text_utils_split_addresses_list (address_list);
368 const gchar* address = (const gchar*)cursor->data;
370 /* We need only the email to just compare it and not
371 the full address which would make "a <a@a.com>"
372 different from "a@a.com" */
373 const gchar *email = get_email_from_address (address);
375 /* ignore the address if already seen */
376 if (g_hash_table_lookup (table, email) == 0) {
379 /* Include the full address and not only the
380 email in the returned list */
382 tmp = g_strdup (address);
384 tmp = g_strjoin (",", new_list, address, NULL);
389 g_hash_table_insert (table, (gchar*)email, GINT_TO_POINTER(1));
391 cursor = g_slist_next (cursor);
394 g_hash_table_unref (table);
395 g_slist_foreach (addresses, (GFunc)g_free, NULL);
396 g_slist_free (addresses);
403 modest_text_utils_convert_buffer_to_html_start (GString *html, const gchar *data, gssize n)
406 gboolean space_seen = FALSE;
407 guint break_dist = 0; /* distance since last break point */
412 /* replace with special html chars where needed*/
413 for (i = 0; i != n; ++i) {
414 guchar kar = data[i];
416 if (space_seen && kar != ' ') {
417 g_string_append_c (html, ' ');
421 /* we artificially insert a breakpoint (newline)
422 * after 256, to make sure our lines are not so long
423 * they will DOS the regexping later
424 * Also, check that kar is ASCII to make sure that we
425 * don't break a UTF8 char in two
427 if (++break_dist >= 256 && kar < 127) {
428 g_string_append_c (html, '\n');
436 /* this is a temp place holder for '&'; we can only
437 * set the real '&' after hyperlink translation, otherwise
438 * we might screw that up */
439 break; /* ignore embedded \0s and MARK_AMP */
440 case '<' : g_string_append (html, MARK_AMP_STR "lt;"); break;
441 case '>' : g_string_append (html, MARK_AMP_STR "gt;"); break;
442 case '&' : g_string_append (html, MARK_AMP_URI_STR "amp;"); break; /* special case */
443 case '"' : g_string_append (html, MARK_AMP_STR "quot;"); break;
445 /* don't convert ' --> wpeditor will try to re-convert it... */
446 //case '\'' : g_string_append (html, "'"); break;
447 case '\n' : g_string_append (html, "<br>\n");break_dist= 0; break;
448 case '\t' : g_string_append (html, MARK_AMP_STR "nbsp;" MARK_AMP_STR "nbsp;" MARK_AMP_STR "nbsp; ");
449 break_dist=0; break; /* note the space at the end*/
452 if (space_seen) { /* second space in a row */
453 g_string_append (html, " ");
459 g_string_append_c (html, kar);
466 modest_text_utils_convert_buffer_to_html_finish (GString *html)
469 /* replace all our MARK_AMPs with real ones */
470 for (i = 0; i != html->len; ++i)
471 if ((html->str)[i] == MARK_AMP || (html->str)[i] == MARK_AMP_URI)
472 (html->str)[i] = '&';
477 modest_text_utils_convert_to_html (const gchar *data)
482 g_return_val_if_fail (data, NULL);
488 html = g_string_sized_new (1.5 * len); /* just a guess... */
490 g_string_append_printf (html,
492 "<meta http-equiv=\"content-type\" content=\"text/html; charset=utf8\">"
496 modest_text_utils_convert_buffer_to_html_start (html, data, -1);
498 g_string_append (html, "</body></html>");
500 if (len <= HYPERLINKIFY_MAX_LENGTH)
501 hyperlinkify_plain_text (html, 0);
503 modest_text_utils_convert_buffer_to_html_finish (html);
505 return g_string_free (html, FALSE);
509 modest_text_utils_convert_to_html_body (const gchar *data, gssize n, gboolean hyperlinkify)
513 g_return_val_if_fail (data, NULL);
520 html = g_string_sized_new (1.5 * n); /* just a guess... */
522 modest_text_utils_convert_buffer_to_html_start (html, data, n);
524 if (hyperlinkify && (n < HYPERLINKIFY_MAX_LENGTH))
525 hyperlinkify_plain_text (html, 0);
527 modest_text_utils_convert_buffer_to_html_finish (html);
529 return g_string_free (html, FALSE);
533 modest_text_utils_get_addresses_indexes (const gchar *addresses, GSList **start_indexes, GSList **end_indexes)
535 gchar *current, *start, *last_blank;
536 gint start_offset = 0, current_offset = 0;
538 g_return_if_fail (start_indexes != NULL);
539 g_return_if_fail (end_indexes != NULL);
541 start = (gchar *) addresses;
545 while (*current != '\0') {
546 if ((start == current)&&((*current == ' ')||(*current == ',')||(*current == ';'))) {
547 start = g_utf8_next_char (start);
549 last_blank = current;
550 } else if ((*current == ',')||(*current == ';')) {
551 gint *start_index, *end_index;
552 start_index = g_new0(gint, 1);
553 end_index = g_new0(gint, 1);
554 *start_index = start_offset;
555 *end_index = current_offset;
556 *start_indexes = g_slist_prepend (*start_indexes, start_index);
557 *end_indexes = g_slist_prepend (*end_indexes, end_index);
558 start = g_utf8_next_char (current);
559 start_offset = current_offset + 1;
561 } else if (*current == '"') {
562 current = g_utf8_next_char (current);
564 while ((*current != '"')&&(*current != '\0')) {
565 current = g_utf8_next_char (current);
570 current = g_utf8_next_char (current);
574 if (start != current) {
575 gint *start_index, *end_index;
576 start_index = g_new0(gint, 1);
577 end_index = g_new0(gint, 1);
578 *start_index = start_offset;
579 *end_index = current_offset;
580 *start_indexes = g_slist_prepend (*start_indexes, start_index);
581 *end_indexes = g_slist_prepend (*end_indexes, end_index);
584 *start_indexes = g_slist_reverse (*start_indexes);
585 *end_indexes = g_slist_reverse (*end_indexes);
592 modest_text_utils_split_addresses_list (const gchar *addresses)
595 const gchar *my_addrs = addresses;
599 g_return_val_if_fail (addresses, NULL);
601 /* skip any space, ',', ';' at the start */
602 while (my_addrs && (my_addrs[0] == ' ' || my_addrs[0] == ',' || my_addrs[0] == ';'))
605 /* are we at the end of addresses list? */
609 /* nope, we are at the start of some address
610 * now, let's find the end of the address */
612 while (end[0] && end[0] != ',' && end[0] != ';')
615 /* we got the address; copy it and remove trailing whitespace */
616 addr = g_strndup (my_addrs, end - my_addrs);
619 head = g_slist_append (NULL, addr);
620 head->next = modest_text_utils_split_addresses_list (end); /* recurse */
627 modest_text_utils_address_range_at_position (const gchar *recipients_list,
632 gchar *current = NULL;
633 gint range_start = 0;
636 gboolean is_quoted = FALSE;
638 g_return_if_fail (recipients_list);
639 g_return_if_fail (position < g_utf8_strlen(recipients_list, -1));
642 for (current = (gchar *) recipients_list; *current != '\0';
643 current = g_utf8_find_next_char (current, NULL)) {
644 gunichar c = g_utf8_get_char (current);
646 if ((c == ',') && (!is_quoted)) {
647 if (index < position) {
648 range_start = index + 1;
652 } else if (c == '\"') {
653 is_quoted = !is_quoted;
654 } else if ((c == ' ') &&(range_start == index)) {
662 *start = range_start;
668 modest_text_utils_address_with_standard_length (const gchar *recipients_list)
672 GString *buffer = g_string_new ("");
674 splitted = g_strsplit (recipients_list, "\n", 0);
678 if (current != splitted)
679 buffer = g_string_append_c (buffer, '\n');
680 line = g_strndup (*splitted, 1000);
681 buffer = g_string_append (buffer, line);
686 g_strfreev (splitted);
688 return g_string_free (buffer, FALSE);
692 /* ******************************************************************* */
693 /* ************************* UTILIY FUNCTIONS ************************ */
694 /* ******************************************************************* */
697 get_next_line (const gchar * b, const gsize blen, const gchar * iter)
703 return g_string_new("");
711 gs = g_string_new_len (i0, iter - i0);
715 get_indent_level (const char *l)
732 /* if we hit the signature marker "-- ", we return -(indent + 1). This
733 * stops reformatting.
735 if (strcmp (l, "-- ") == 0) {
743 unquote_line (GString * l)
758 g_string_erase (l, 0, p - l->str);
762 append_quoted (GString * buf, int indent, const GString * str,
767 indent = indent < 0 ? abs (indent) - 1 : indent;
768 for (i = 0; i <= indent; i++) {
769 g_string_append (buf, "> ");
772 g_string_append_len (buf, str->str, cutpoint);
774 g_string_append (buf, str->str);
776 g_string_append (buf, "\n");
780 get_breakpoint_utf8 (const gchar * s, gint indent, const gint limit)
783 const gchar *pos, *last;
786 indent = indent < 0 ? abs (indent) - 1 : indent;
790 uni = g_utf8_to_ucs4_fast (s, -1, NULL);
792 if ((index + 2 * indent > limit) && last) {
796 if (g_unichar_isspace (uni[index])) {
799 pos = g_utf8_next_char (pos);
807 get_breakpoint_ascii (const gchar * s, const gint indent, const gint limit)
812 if (last + 2 * indent < limit)
815 for (i = strlen (s); i > 0; i--) {
817 if (i + 2 * indent <= limit) {
828 get_breakpoint (const gchar * s, const gint indent, const gint limit)
831 if (g_utf8_validate (s, -1, NULL)) {
832 return get_breakpoint_utf8 (s, indent, limit);
833 } else { /* assume ASCII */
834 //g_warning("invalid UTF-8 in msg");
835 return get_breakpoint_ascii (s, indent, limit);
840 cite (const time_t sent_date, const gchar *from)
842 return g_strdup (_("mcen_ia_editor_original_message"));
846 quoted_attachments (GList *attachments)
849 GString *result = g_string_new ("");
850 for (node = attachments; node != NULL; node = g_list_next (node)) {
851 gchar *filename = (gchar *) node->data;
852 g_string_append_printf ( result, "%s %s\n", _("mcen_ia_editor_attach_filename"), filename);
855 return g_string_free (result, FALSE);
860 modest_text_utils_quote_plain_text (const gchar *text,
862 const gchar *signature,
867 gint indent, breakpoint, rem_indent = 0;
868 GString *q, *l, *remaining;
870 gchar *attachments_string = NULL;
872 q = g_string_new ("\n");
873 q = g_string_append (q, cite);
874 q = g_string_append_c (q, '\n');
876 /* remaining will store the rest of the line if we have to break it */
877 remaining = g_string_new ("");
882 l = get_next_line (text, len, iter);
883 iter = iter + l->len + 1;
884 indent = get_indent_level (l->str);
887 if (remaining->len) {
888 if (l->len && indent == rem_indent) {
889 g_string_prepend (l, " ");
890 g_string_prepend (l, remaining->str);
894 get_breakpoint (remaining->str,
897 append_quoted (q, rem_indent,
898 remaining, breakpoint);
899 g_string_erase (remaining, 0,
901 if (remaining->str[0] == ' ') {
902 g_string_erase (remaining, 0,
905 } while (remaining->len);
908 g_string_free (remaining, TRUE);
909 breakpoint = get_breakpoint (l->str, indent, limit);
910 remaining = g_string_new (l->str + breakpoint);
911 if (remaining->str[0] == ' ') {
912 g_string_erase (remaining, 0, 1);
915 append_quoted (q, indent, l, breakpoint);
916 g_string_free (l, TRUE);
917 } while ((iter < text + len) || (remaining->str[0]));
919 attachments_string = quoted_attachments (attachments);
920 q = g_string_append (q, attachments_string);
921 g_free (attachments_string);
923 if (signature != NULL) {
924 q = g_string_append (q, "\n--\n");
925 q = g_string_append (q, signature);
926 q = g_string_append_c (q, '\n');
929 return g_string_free (q, FALSE);
933 modest_text_utils_quote_html (const gchar *text,
935 const gchar *signature,
939 gchar *result = NULL;
940 gchar *signature_result = NULL;
941 const gchar *format = \
942 "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n" \
945 "<pre>%s<br/>%s<br/>%s</pre>\n" \
946 "<br/>--<br/>%s<br/>\n" \
949 gchar *attachments_string = NULL;
950 gchar *q_attachments_string = NULL;
951 gchar *q_cite = NULL;
952 gchar *html_text = NULL;
954 if (signature == NULL)
955 signature_result = g_strdup ("");
957 signature_result = modest_text_utils_convert_to_html_body (signature, -1, TRUE);
959 attachments_string = quoted_attachments (attachments);
960 q_attachments_string = modest_text_utils_convert_to_html_body (attachments_string, -1, TRUE);
961 q_cite = modest_text_utils_convert_to_html_body (cite, -1, TRUE);
962 html_text = modest_text_utils_convert_to_html_body (text, -1, TRUE);
963 result = g_strdup_printf (format, q_cite, html_text, q_attachments_string, signature_result);
966 g_free (attachments_string);
967 g_free (q_attachments_string);
968 g_free (signature_result);
974 cmp_offsets_reverse (const url_match_t *match1, const url_match_t *match2)
976 return match2->offset - match1->offset;
979 static gboolean url_matches_block = 0;
980 static url_match_pattern_t patterns[] = MAIL_VIEWER_URL_MATCH_PATTERNS;
987 const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
988 for (i = 0; i != pattern_num; ++i) {
989 patterns[i].preg = g_slice_new0 (regex_t);
991 /* this should not happen */
992 if (regcomp (patterns[i].preg, patterns[i].regex,
993 REG_ICASE|REG_EXTENDED|REG_NEWLINE) != 0) {
994 g_warning ("%s: error in regexp:\n%s\n", __FUNCTION__, patterns[i].regex);
1005 const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
1006 for (i = 0; i != pattern_num; ++i) {
1007 regfree (patterns[i].preg);
1008 g_slice_free (regex_t, patterns[i].preg);
1009 } /* don't free patterns itself -- it's static */
1013 modest_text_utils_hyperlinkify_begin (void)
1015 if (url_matches_block == 0)
1016 compile_patterns ();
1017 url_matches_block ++;
1021 modest_text_utils_hyperlinkify_end (void)
1023 url_matches_block--;
1024 if (url_matches_block <= 0)
1030 get_url_matches (GString *txt, gint offset)
1033 guint rv, i, tmp_offset = 0;
1034 GSList *match_list = NULL;
1036 const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
1038 /* initalize the regexps */
1039 modest_text_utils_hyperlinkify_begin ();
1041 /* find all the matches */
1042 for (i = 0; i != pattern_num; ++i) {
1043 tmp_offset = offset;
1046 gboolean is_submatch;
1049 if ((rv = regexec (patterns[i].preg, txt->str + tmp_offset, 1, &rm, 0)) != 0) {
1050 g_return_val_if_fail (rv == REG_NOMATCH, NULL); /* this should not happen */
1051 break; /* try next regexp */
1056 is_submatch = FALSE;
1057 /* check old matches to see if this has already been matched */
1058 cursor = match_list;
1059 while (cursor && !is_submatch) {
1060 const url_match_t *old_match =
1061 (const url_match_t *) cursor->data;
1062 guint new_offset = tmp_offset + rm.rm_so;
1063 is_submatch = (new_offset > old_match->offset &&
1064 new_offset < old_match->offset + old_match->len);
1065 cursor = g_slist_next (cursor);
1069 /* make a list of our matches (<offset, len, prefix> tupels)*/
1070 match = g_slice_new (url_match_t);
1071 match->offset = tmp_offset + rm.rm_so;
1072 match->len = rm.rm_eo - rm.rm_so;
1073 match->prefix = patterns[i].prefix;
1074 match_list = g_slist_prepend (match_list, match);
1076 tmp_offset += rm.rm_eo;
1080 modest_text_utils_hyperlinkify_end ();
1082 /* now sort the list, so the matches are in reverse order of occurence.
1083 * that way, we can do the replacements starting from the end, so we don't need
1084 * to recalculate the offsets
1086 match_list = g_slist_sort (match_list,
1087 (GCompareFunc)cmp_offsets_reverse);
1093 /* replace all occurences of needle in haystack with repl*/
1095 replace_string (const gchar *haystack, const gchar *needle, gchar repl)
1097 gchar *str, *cursor;
1099 if (!haystack || !needle || strlen(needle) == 0)
1100 return haystack ? g_strdup(haystack) : NULL;
1102 str = g_strdup (haystack);
1104 for (cursor = str; cursor && *cursor; ++cursor) {
1105 if (g_str_has_prefix (cursor, needle)) {
1107 memmove (cursor + 1,
1108 cursor + strlen (needle),
1109 strlen (cursor + strlen (needle)) + 1);
1117 hyperlinkify_plain_text (GString *txt, gint offset)
1120 GSList *match_list = get_url_matches (txt, offset);
1122 /* we will work backwards, so the offsets stay valid */
1123 for (cursor = match_list; cursor; cursor = cursor->next) {
1125 url_match_t *match = (url_match_t*) cursor->data;
1126 gchar *url = g_strndup (txt->str + match->offset, match->len);
1127 gchar *repl = NULL; /* replacement */
1129 /* the string still contains $(MARK_AMP_URI_STR)"amp;" for each
1130 * '&' in the original, because of the text->html conversion.
1131 * in the href-URL (and only there), we must convert that back to
1134 gchar *href_url = replace_string (url, MARK_AMP_URI_STR "amp;", '&');
1136 /* the prefix is NULL: use the one that is already there */
1137 repl = g_strdup_printf ("<a href=\"%s%s\">%s</a>",
1138 match->prefix ? match->prefix : EMPTY_STRING,
1141 /* replace the old thing with our hyperlink
1142 * replacement thing */
1143 g_string_erase (txt, match->offset, match->len);
1144 g_string_insert (txt, match->offset, repl);
1150 g_slice_free (url_match_t, match);
1153 g_slist_free (match_list);
1157 modest_text_utils_hyperlinkify (GString *string_buffer)
1162 after_body = strstr (string_buffer->str, "<body>");
1163 if (after_body != NULL)
1164 offset = after_body - string_buffer->str;
1165 hyperlinkify_plain_text (string_buffer, offset);
1169 /* for optimization reasons, we change the string in-place */
1171 modest_text_utils_get_display_address (gchar *address)
1175 g_return_if_fail (address);
1180 /* should not be needed, and otherwise, we probably won't screw up the address
1181 * more than it already is :)
1182 * g_return_val_if_fail (g_utf8_validate (address, -1, NULL), NULL);
1185 /* remove leading whitespace */
1186 if (address[0] == ' ')
1187 g_strchug (address);
1189 for (i = 0; address[i]; ++i) {
1190 if (address[i] == '<') {
1191 if (G_UNLIKELY(i == 0))
1192 return; /* there's nothing else, leave it */
1194 address[i] = '\0'; /* terminate the string here */
1206 modest_text_utils_get_email_address (const gchar *full_address)
1208 const gchar *left, *right;
1210 g_return_val_if_fail (full_address, NULL);
1215 g_return_val_if_fail (g_utf8_validate (full_address, -1, NULL), NULL);
1217 left = g_strrstr_len (full_address, strlen(full_address), "<");
1219 return g_strdup (full_address);
1221 right = g_strstr_len (left, strlen(left), ">");
1223 return g_strdup (full_address);
1225 return g_strndup (left + 1, right - left - 1);
1229 modest_text_utils_get_subject_prefix_len (const gchar *sub)
1231 gint prefix_len = 0;
1233 g_return_val_if_fail (sub, 0);
1238 /* optimization: "Re", "RE", "re","Fwd", "FWD", "fwd","FW","Fw", "fw" */
1239 if (sub[0] != 'R' && sub[0] != 'F' && sub[0] != 'r' && sub[0] != 'f')
1241 else if (sub[0] && sub[1] != 'e' && sub[1] != 'E' && sub[1] != 'w' && sub[1] != 'W')
1248 /* skip over a [...] block */
1249 if (sub[prefix_len] == '[') {
1250 int c = prefix_len + 1;
1251 while (sub[c] && sub[c] != ']')
1254 return 0; /* no end to the ']' found */
1259 /* did we find the ':' ? */
1260 if (sub[prefix_len] == ':') {
1262 if (sub[prefix_len] == ' ')
1264 prefix_len += modest_text_utils_get_subject_prefix_len (sub + prefix_len);
1265 /* g_warning ("['%s','%s']", sub, (char*) sub + prefix_len); */
1273 modest_text_utils_utf8_strcmp (const gchar* s1, const gchar *s2, gboolean insensitive)
1276 /* work even when s1 and/or s2 == NULL */
1277 if (G_UNLIKELY(s1 == s2))
1279 if (G_UNLIKELY(!s1))
1281 if (G_UNLIKELY(!s2))
1284 /* if it's not case sensitive */
1287 /* optimization: shortcut if first char is ascii */
1288 if (((s1[0] & 0xf0)== 0) && ((s2[0] & 0xf0) == 0))
1289 return s1[0] - s2[0];
1291 return g_utf8_collate (s1, s2);
1297 /* optimization: short cut iif first char is ascii */
1298 if (((s1[0] & 0xf0) == 0) && ((s2[0] & 0xf0) == 0))
1299 return tolower(s1[0]) - tolower(s2[0]);
1301 n1 = g_utf8_strdown (s1, -1);
1302 n2 = g_utf8_strdown (s2, -1);
1304 result = g_utf8_collate (n1, n2);
1315 modest_text_utils_get_display_date (time_t date)
1317 #define DATE_BUF_SIZE 64
1318 static gchar date_buf[DATE_BUF_SIZE];
1320 /* calculate the # of days since epoch for
1321 * for today and for the date provided
1322 * based on idea from pvanhoof */
1323 int day = time(NULL) / (24 * 60 * 60);
1324 int date_day = date / (24 * 60 * 60);
1326 /* if it's today, show the time, if it's not today, show the date instead */
1328 if (day == date_day) /* is the date today? */
1329 modest_text_utils_strftime (date_buf, DATE_BUF_SIZE, "%X", date);
1331 modest_text_utils_strftime (date_buf, DATE_BUF_SIZE, "%x", date);
1333 return date_buf; /* this is a static buffer, don't free! */
1339 modest_text_utils_validate_folder_name (const gchar *folder_name)
1341 /* based on http://msdn2.microsoft.com/en-us/library/aa365247.aspx,
1342 * with some extras */
1346 const gchar **cursor = NULL;
1347 const gchar *forbidden_names[] = { /* windows does not like these */
1348 "CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6",
1349 "COM7", "COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
1350 ".", "..", "cur", "tmp", "new", NULL /* cur, tmp new are reserved for Maildir */
1353 /* cannot be NULL */
1357 /* cannot be empty */
1358 len = strlen(folder_name);
1362 /* cannot start with a dot, vfat does not seem to like that */
1363 if (folder_name[0] == '.')
1366 /* cannot start or end with a space */
1367 if (g_ascii_isspace(folder_name[0]) || g_ascii_isspace(folder_name[len - 1]))
1370 /* cannot contain a forbidden char */
1371 for (i = 0; i < len; i++)
1372 if (modest_text_utils_is_forbidden_char (folder_name[i], FOLDER_NAME_FORBIDDEN_CHARS))
1375 /* cannot contain a forbidden word */
1377 for (cursor = forbidden_names; cursor && *cursor; ++cursor) {
1378 if (g_ascii_strcasecmp (folder_name, *cursor) == 0)
1383 return TRUE; /* it's valid! */
1389 modest_text_utils_validate_domain_name (const gchar *domain)
1391 gboolean valid = FALSE;
1393 const gchar* domain_regex = "^([a-z0-9-]*[a-z0-9]\\.)+[a-z0-9-]*[a-z0-9]$";
1395 g_return_val_if_fail (domain, FALSE);
1400 memset (&rx, 0, sizeof(regex_t)); /* coverity wants this... */
1402 /* domain name: all alphanum or '-' or '.',
1403 * but beginning/ending in alphanum */
1404 if (regcomp (&rx, domain_regex, REG_ICASE|REG_EXTENDED|REG_NOSUB)) {
1405 g_warning ("BUG: error in regexp");
1409 valid = (regexec (&rx, domain, 1, NULL, 0) == 0);
1418 modest_text_utils_validate_email_address (const gchar *email_address,
1419 const gchar **invalid_char_position)
1422 const gchar *c = NULL, *domain = NULL;
1423 static gchar *rfc822_specials = "()<>@,;:\\\"[]&";
1425 if (invalid_char_position)
1426 *invalid_char_position = NULL;
1428 g_return_val_if_fail (email_address, FALSE);
1430 /* check that the email adress contains exactly one @ */
1431 if (!strstr(email_address, "@") ||
1432 (strstr(email_address, "@") != g_strrstr(email_address, "@")))
1435 /* first we validate the name portion (name@domain) */
1436 for (c = email_address; *c; c++) {
1438 (c == email_address ||
1440 *(c - 1) == '\"')) {
1444 if (*c == '\\' && (*++c == ' '))
1446 if (*c <= ' ' || *c >= 127)
1459 if (*c <= ' ' || *c >= 127)
1461 if (strchr(rfc822_specials, *c)) {
1462 if (invalid_char_position)
1463 *invalid_char_position = c;
1467 if (c == email_address || *(c - 1) == '.')
1470 /* next we validate the domain portion (name@domain) */
1471 if (!*(domain = ++c))
1475 if (c == domain || *(c - 1) == '.' || *(c + 1) == '\0')
1479 if (*c <= ' ' || *c >= 127)
1481 if (strchr(rfc822_specials, *c)) {
1482 if (invalid_char_position)
1483 *invalid_char_position = c;
1488 return (count >= 1) ? TRUE : FALSE;
1492 modest_text_utils_validate_recipient (const gchar *recipient, const gchar **invalid_char_position)
1494 gchar *stripped, *current;
1496 gboolean has_error = FALSE;
1498 if (invalid_char_position)
1499 *invalid_char_position = NULL;
1501 g_return_val_if_fail (recipient, FALSE);
1503 if (modest_text_utils_validate_email_address (recipient, invalid_char_position))
1506 stripped = g_strdup (recipient);
1507 stripped = g_strstrip (stripped);
1510 if (*current == '\0') {
1516 if (*current == '\"') {
1517 current = g_utf8_next_char (current);
1519 for (; *current != '\0'; current = g_utf8_next_char (current)) {
1520 if (*current == '\\') {
1521 /* TODO: This causes a warning, which breaks the build,
1522 * because a gchar cannot be < 0.
1524 if (current[1] <0) {
1529 } else if (*current == '\"') {
1531 current = g_utf8_next_char (current);
1537 for (current = stripped ; *current != '\0'; current = g_utf8_next_char (current)) {
1538 if (*current == '<') {
1550 right_part = g_strdup (current);
1552 right_part = g_strstrip (right_part);
1554 if (g_str_has_prefix (right_part, "<") &&
1555 g_str_has_suffix (right_part, ">")) {
1559 address = g_strndup (right_part+1, strlen (right_part) - 2);
1560 g_free (right_part);
1561 valid = modest_text_utils_validate_email_address (address, invalid_char_position);
1565 g_free (right_part);
1572 modest_text_utils_get_display_size (guint64 size)
1574 const guint KB=1024;
1575 const guint MB=1024 * KB;
1576 const guint GB=1024 * MB;
1579 return g_strdup_printf(_FM("sfil_li_size_kb"), 0);
1580 if (0 < size && size < KB)
1581 return g_strdup_printf (_FM("sfil_li_size_kb"), 1);
1582 else if (KB <= size && size < 100 * KB)
1583 return g_strdup_printf (_FM("sfil_li_size_1kb_99kb"), size / KB);
1584 else if (100*KB <= size && size < MB)
1585 return g_strdup_printf (_FM("sfil_li_size_100kb_1mb"), (float) size / MB);
1586 else if (MB <= size && size < 10*MB)
1587 return g_strdup_printf (_FM("sfil_li_size_1mb_10mb"), (float) size / MB);
1588 else if (10*MB <= size && size < GB)
1589 return g_strdup_printf (_FM("sfil_li_size_10mb_1gb"), size / MB);
1591 return g_strdup_printf (_FM("sfil_li_size_1gb_or_greater"), (float) size / GB);
1595 get_email_from_address (const gchar * address)
1597 gchar *left_limit, *right_limit;
1599 left_limit = strstr (address, "<");
1600 right_limit = g_strrstr (address, ">");
1602 if ((left_limit == NULL)||(right_limit == NULL)|| (left_limit > right_limit))
1603 return g_strdup (address);
1605 return g_strndup (left_limit + 1, (right_limit - left_limit) - 1);
1609 modest_text_utils_get_color_string (GdkColor *color)
1611 g_return_val_if_fail (color, NULL);
1613 return g_strdup_printf ("#%x%x%x%x%x%x%x%x%x%x%x%x",
1614 (color->red >> 12) & 0xf, (color->red >> 8) & 0xf,
1615 (color->red >> 4) & 0xf, (color->red) & 0xf,
1616 (color->green >> 12) & 0xf, (color->green >> 8) & 0xf,
1617 (color->green >> 4) & 0xf, (color->green) & 0xf,
1618 (color->blue >> 12) & 0xf, (color->blue >> 8) & 0xf,
1619 (color->blue >> 4) & 0xf, (color->blue) & 0xf);
1623 modest_text_utils_text_buffer_get_text (GtkTextBuffer *buffer)
1625 GtkTextIter start, end;
1626 gchar *slice, *current;
1627 GString *result = g_string_new ("");
1629 g_return_val_if_fail (buffer && GTK_IS_TEXT_BUFFER (buffer), NULL);
1631 gtk_text_buffer_get_start_iter (buffer, &start);
1632 gtk_text_buffer_get_end_iter (buffer, &end);
1634 slice = gtk_text_buffer_get_slice (buffer, &start, &end, FALSE);
1637 while (current && current != '\0') {
1638 if (g_utf8_get_char (current) == 0xFFFC) {
1639 result = g_string_append_c (result, ' ');
1640 current = g_utf8_next_char (current);
1642 gchar *next = g_utf8_strchr (current, -1, 0xFFFC);
1644 result = g_string_append (result, current);
1646 result = g_string_append_len (result, current, next - current);
1653 return g_string_free (result, FALSE);
1658 modest_text_utils_is_forbidden_char (const gchar character,
1659 ModestTextUtilsForbiddenCharType type)
1662 const gchar *forbidden_chars = NULL;
1664 /* We need to get the length in the switch because the
1665 compiler needs to know the size at compile time */
1667 case ACCOUNT_TITLE_FORBIDDEN_CHARS:
1668 forbidden_chars = account_title_forbidden_chars;
1669 len = G_N_ELEMENTS (account_title_forbidden_chars);
1671 case FOLDER_NAME_FORBIDDEN_CHARS:
1672 forbidden_chars = folder_name_forbidden_chars;
1673 len = G_N_ELEMENTS (folder_name_forbidden_chars);
1675 case USER_NAME_FORBIDDEN_NAMES:
1676 forbidden_chars = user_name_forbidden_chars;
1677 len = G_N_ELEMENTS (user_name_forbidden_chars);
1680 g_return_val_if_reached (TRUE);
1683 for (i = 0; i < len ; i++)
1684 if (forbidden_chars[i] == character)
1687 return FALSE; /* it's valid! */
1691 modest_text_utils_label_get_selection (GtkLabel *label)
1696 if (gtk_label_get_selection_bounds (GTK_LABEL (label), &start, &end)) {
1697 const gchar *start_offset;
1698 const gchar *end_offset;
1699 start_offset = gtk_label_get_text (GTK_LABEL (label));
1700 start_offset = g_utf8_offset_to_pointer (start_offset, start);
1701 end_offset = gtk_label_get_text (GTK_LABEL (label));
1702 end_offset = g_utf8_offset_to_pointer (end_offset, end);
1703 selection = g_strndup (start_offset, end_offset - start_offset);
1706 return g_strdup ("");
1711 _forward_search_image_char (gunichar ch,
1714 return (ch == 0xFFFC);
1718 modest_text_utils_buffer_selection_is_valid (GtkTextBuffer *buffer)
1721 GtkTextIter start, end;
1723 g_return_val_if_fail (GTK_IS_TEXT_BUFFER (buffer), FALSE);
1725 result = gtk_text_buffer_get_has_selection (GTK_TEXT_BUFFER (buffer));
1727 /* check there are no images in selection */
1729 gtk_text_buffer_get_selection_bounds (buffer, &start, &end);
1730 if (gtk_text_iter_get_char (&start)== 0xFFFC)
1733 gtk_text_iter_backward_char (&end);
1734 if (gtk_text_iter_forward_find_char (&start, _forward_search_image_char,
1745 modest_text_utils_escape_mnemonics (const gchar *text)
1748 GString *result = NULL;
1753 result = g_string_new ("");
1754 for (p = text; *p != '\0'; p++) {
1756 result = g_string_append (result, "__");
1758 result = g_string_append_c (result, *p);
1761 return g_string_free (result, FALSE);