1 /* Copyright (c) 2006, Nokia Corporation
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of the Nokia Corporation nor the names of its
14 * contributors may be used to endorse or promote products derived from
15 * this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
18 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
20 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
21 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #endif /*_GNU_SOURCE*/
35 #include <string.h> /* for strcasestr */
40 #include <glib/gi18n.h>
42 #include <modest-tny-platform-factory.h>
43 #include <modest-text-utils.h>
44 #include <modest-runtime.h>
49 #endif /*HAVE_CONFIG_H */
52 #define FORWARD_STRING _("mcen_ia_editor_original_message")
53 #define FROM_STRING _("mail_va_from")
54 #define SENT_STRING _("mcen_fi_message_properties_sent")
55 #define TO_STRING _("mail_va_to")
56 #define SUBJECT_STRING _("mail_va_subject")
57 #define EMPTY_STRING ""
60 * do the hyperlinkification only for texts < 50 Kb,
61 * as it's quite slow. Without this, e.g. mail with
62 * an uuencoded part (which is not recognized as attachment,
65 #define HYPERLINKIFY_MAX_LENGTH (1024*50)
66 #define SIGNATURE_MARKER "--"
70 * we need these regexps to find URLs in plain text e-mails
72 typedef struct _url_match_pattern_t url_match_pattern_t;
73 struct _url_match_pattern_t {
79 typedef struct _url_match_t url_match_t;
88 * we mark the ampersand with \007 when converting text->html
89 * because after text->html we do hyperlink detecting, which
90 * could be screwed up by the ampersand.
91 * ie. 1<3 ==> 1\007lt;3
93 #define MARK_AMP '\007'
94 #define MARK_AMP_STR "\007"
96 /* mark & separately, because they are parts of urls.
97 * ie. a&b => a\006amp;b, but a>b => a\007gt;b
99 * we need to handle '&' separately, because it can be part of URIs
100 * (as in href="http://foo.bar?a=1&b=1"), so inside those URIs
101 * we need to re-replace \006amp; with '&' again, while outside uri's
104 * yes, it's messy, but a consequence of doing text->html first, then hyperlinkify
106 #define MARK_AMP_URI '\006'
107 #define MARK_AMP_URI_STR "\006"
110 /* note: match MARK_AMP_URI_STR as well, because after txt->html, a '&' will look like $(MARK_AMP_URI_STR)"amp;" */
111 #define MAIL_VIEWER_URL_MATCH_PATTERNS { \
112 { "(file|rtsp|http|ftp|https|mms|mmsh|rtsp|rdp|lastfm)://[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR \
113 "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]", \
115 { "www\\.[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]",\
117 { "ftp\\.[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]",\
119 { "(jabberto|voipto|sipto|sip|chatto|xmpp):[-_a-z@0-9.+]+", \
121 { "mailto:[-_a-z0-9.\\+]+@[-_a-z0-9.]+", \
123 { "[-_a-z0-9.\\+]+@[-_a-z0-9.]+",\
127 const gchar account_title_forbidden_chars[] = {
128 '\\', '/', ':', '*', '?', '\'', '<', '>', '|', '^'
130 const gchar folder_name_forbidden_chars[] = {
131 '<', '>', ':', '\'', '/', '\\', '|', '?', '*', '^', '%', '$', '#', '&'
133 const gchar user_name_forbidden_chars[] = {
136 const guint ACCOUNT_TITLE_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (account_title_forbidden_chars);
137 const guint FOLDER_NAME_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (folder_name_forbidden_chars);
138 const guint USER_NAME_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (user_name_forbidden_chars);
141 static gchar* cite (const time_t sent_date, const gchar *from);
142 static void hyperlinkify_plain_text (GString *txt, gint offset);
143 static gint cmp_offsets_reverse (const url_match_t *match1, const url_match_t *match2);
144 static GSList* get_url_matches (GString *txt, gint offset);
146 static GString* get_next_line (const char *b, const gsize blen, const gchar * iter);
147 static int get_indent_level (const char *l);
148 static void unquote_line (GString * l);
149 static void append_quoted (GString * buf, const int indent, const GString * str,
151 static int get_breakpoint_utf8 (const gchar * s, const gint indent, const gint limit);
152 static int get_breakpoint_ascii (const gchar * s, const gint indent, const gint limit);
153 static int get_breakpoint (const gchar * s, const gint indent, const gint limit);
155 static gchar* modest_text_utils_quote_plain_text (const gchar *text,
157 const gchar *signature,
161 static gchar* modest_text_utils_quote_html (const gchar *text,
163 const gchar *signature,
166 static gchar* get_email_from_address (const gchar *address);
169 /* ******************************************************************* */
170 /* ************************* PUBLIC FUNCTIONS ************************ */
171 /* ******************************************************************* */
174 modest_text_utils_quote (const gchar *text,
175 const gchar *content_type,
176 const gchar *signature,
178 const time_t sent_date,
182 gchar *retval, *cited;
184 g_return_val_if_fail (text, NULL);
185 g_return_val_if_fail (content_type, NULL);
187 cited = cite (sent_date, from);
189 if (content_type && strcmp (content_type, "text/html") == 0)
190 /* TODO: extract the <body> of the HTML and pass it to
192 retval = modest_text_utils_quote_html (text, cited, signature, attachments, limit);
194 retval = modest_text_utils_quote_plain_text (text, cited, signature, attachments, limit);
203 modest_text_utils_cite (const gchar *text,
204 const gchar *content_type,
205 const gchar *signature,
212 g_return_val_if_fail (text, NULL);
213 g_return_val_if_fail (content_type, NULL);
216 retval = g_strdup ("");
217 else if (strcmp(content_type, "text/html") == 0) {
218 tmp_sig = g_strconcat (SIGNATURE_MARKER,"\n", signature, NULL);
219 retval = modest_text_utils_convert_to_html_body(tmp_sig, -1, TRUE);
222 retval = g_strconcat (text, SIGNATURE_MARKER, "\n", signature, NULL);
229 forward_cite (const gchar *from,
232 const gchar *subject)
234 g_return_val_if_fail (sent, NULL);
236 return g_strdup_printf ("%s\n%s %s\n%s %s\n%s %s\n%s %s\n",
238 FROM_STRING, (from)?from:"",
240 TO_STRING, (to)?to:"",
241 SUBJECT_STRING, (subject)?subject:"");
245 modest_text_utils_inline (const gchar *text,
246 const gchar *content_type,
247 const gchar *signature,
251 const gchar *subject)
257 g_return_val_if_fail (text, NULL);
258 g_return_val_if_fail (content_type, NULL);
260 modest_text_utils_strftime (sent_str, 100, "%c", sent_date);
262 cited = forward_cite (from, sent_str, to, subject);
264 if (content_type && strcmp (content_type, "text/html") == 0)
265 retval = modest_text_utils_quote_html (text, cited, signature, NULL, 80);
267 retval = modest_text_utils_quote_plain_text (text, cited, signature, NULL, 80);
273 /* just to prevent warnings:
274 * warning: `%x' yields only last 2 digits of year in some locales
277 modest_text_utils_strftime(char *s, gsize max, const char *fmt, time_t timet)
281 /* does not work on old maemo glib:
282 * g_date_set_time_t (&date, timet);
284 localtime_r (&timet, &tm);
285 return strftime(s, max, fmt, &tm);
289 modest_text_utils_derived_subject (const gchar *subject, const gchar *prefix)
293 g_return_val_if_fail (prefix, NULL);
295 if (!subject || subject[0] == '\0')
296 subject = _("mail_va_no_subject");
298 tmp = g_strchug (g_strdup (subject));
300 if (!strncmp (tmp, prefix, strlen (prefix))) {
304 return g_strdup_printf ("%s %s", prefix, subject);
309 modest_text_utils_remove_address (const gchar *address_list, const gchar *address)
311 gchar *dup, *token, *ptr = NULL, *result;
312 GString *filtered_emails;
313 gchar *email_address;
315 g_return_val_if_fail (address_list, NULL);
318 return g_strdup (address_list);
320 email_address = get_email_from_address (address);
322 /* search for substring */
323 if (!strstr ((const char *) address_list, (const char *) email_address)) {
324 g_free (email_address);
325 return g_strdup (address_list);
328 dup = g_strdup (address_list);
329 filtered_emails = g_string_new (NULL);
331 token = strtok_r (dup, ",", &ptr);
333 while (token != NULL) {
334 /* Add to list if not found */
335 if (!strstr ((const char *) token, (const char *) email_address)) {
336 if (filtered_emails->len == 0)
337 g_string_append_printf (filtered_emails, "%s", g_strstrip (token));
339 g_string_append_printf (filtered_emails, ",%s", g_strstrip (token));
341 token = strtok_r (NULL, ",", &ptr);
343 result = filtered_emails->str;
346 g_free (email_address);
348 g_string_free (filtered_emails, FALSE);
355 modest_text_utils_remove_duplicate_addresses (const gchar *address_list)
357 GSList *addresses, *cursor;
359 gchar *new_list = NULL;
361 g_return_val_if_fail (address_list, NULL);
363 table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
364 addresses = modest_text_utils_split_addresses_list (address_list);
368 const gchar* address = (const gchar*)cursor->data;
370 /* We need only the email to just compare it and not
371 the full address which would make "a <a@a.com>"
372 different from "a@a.com" */
373 const gchar *email = get_email_from_address (address);
375 /* ignore the address if already seen */
376 if (g_hash_table_lookup (table, email) == 0) {
379 /* Include the full address and not only the
380 email in the returned list */
382 tmp = g_strdup (address);
384 tmp = g_strjoin (",", new_list, address, NULL);
389 g_hash_table_insert (table, (gchar*)email, GINT_TO_POINTER(1));
391 cursor = g_slist_next (cursor);
394 g_hash_table_unref (table);
395 g_slist_foreach (addresses, (GFunc)g_free, NULL);
396 g_slist_free (addresses);
403 modest_text_utils_convert_buffer_to_html_start (GString *html, const gchar *data, gssize n)
406 gboolean space_seen = FALSE;
407 guint break_dist = 0; /* distance since last break point */
412 /* replace with special html chars where needed*/
413 for (i = 0; i != n; ++i) {
414 guchar kar = data[i];
416 if (space_seen && kar != ' ') {
417 g_string_append_c (html, ' ');
421 /* we artificially insert a breakpoint (newline)
422 * after 256, to make sure our lines are not so long
423 * they will DOS the regexping later
424 * Also, check that kar is ASCII to make sure that we
425 * don't break a UTF8 char in two
427 if (++break_dist >= 256 && kar < 127) {
428 g_string_append_c (html, '\n');
436 /* this is a temp place holder for '&'; we can only
437 * set the real '&' after hyperlink translation, otherwise
438 * we might screw that up */
439 break; /* ignore embedded \0s and MARK_AMP */
440 case '<' : g_string_append (html, MARK_AMP_STR "lt;"); break;
441 case '>' : g_string_append (html, MARK_AMP_STR "gt;"); break;
442 case '&' : g_string_append (html, MARK_AMP_URI_STR "amp;"); break; /* special case */
443 case '"' : g_string_append (html, MARK_AMP_STR "quot;"); break;
445 /* don't convert ' --> wpeditor will try to re-convert it... */
446 //case '\'' : g_string_append (html, "'"); break;
447 case '\n' : g_string_append (html, "<br>\n");break_dist= 0; break;
448 case '\t' : g_string_append (html, MARK_AMP_STR "nbsp;" MARK_AMP_STR "nbsp;" MARK_AMP_STR "nbsp; ");
449 break_dist=0; break; /* note the space at the end*/
452 if (space_seen) { /* second space in a row */
453 g_string_append (html, " ");
459 g_string_append_c (html, kar);
466 modest_text_utils_convert_buffer_to_html_finish (GString *html)
469 /* replace all our MARK_AMPs with real ones */
470 for (i = 0; i != html->len; ++i)
471 if ((html->str)[i] == MARK_AMP || (html->str)[i] == MARK_AMP_URI)
472 (html->str)[i] = '&';
477 modest_text_utils_convert_to_html (const gchar *data)
482 g_return_val_if_fail (data, NULL);
488 html = g_string_sized_new (1.5 * len); /* just a guess... */
490 g_string_append_printf (html,
492 "<meta http-equiv=\"content-type\" content=\"text/html; charset=utf8\">"
496 modest_text_utils_convert_buffer_to_html_start (html, data, -1);
498 g_string_append (html, "</body></html>");
500 if (len <= HYPERLINKIFY_MAX_LENGTH)
501 hyperlinkify_plain_text (html, 0);
503 modest_text_utils_convert_buffer_to_html_finish (html);
505 return g_string_free (html, FALSE);
509 modest_text_utils_convert_to_html_body (const gchar *data, gssize n, gboolean hyperlinkify)
513 g_return_val_if_fail (data, NULL);
520 html = g_string_sized_new (1.5 * n); /* just a guess... */
522 modest_text_utils_convert_buffer_to_html_start (html, data, n);
524 if (hyperlinkify && (n < HYPERLINKIFY_MAX_LENGTH))
525 hyperlinkify_plain_text (html, 0);
527 modest_text_utils_convert_buffer_to_html_finish (html);
529 return g_string_free (html, FALSE);
533 modest_text_utils_get_addresses_indexes (const gchar *addresses, GSList **start_indexes, GSList **end_indexes)
535 gchar *current, *start, *last_blank;
536 gint start_offset = 0, current_offset = 0;
538 g_return_if_fail (start_indexes != NULL);
539 g_return_if_fail (end_indexes != NULL);
541 start = (gchar *) addresses;
545 while (*current != '\0') {
546 if ((start == current)&&((*current == ' ')||(*current == ',')||(*current == ';'))) {
547 start = g_utf8_next_char (start);
549 last_blank = current;
550 } else if ((*current == ',')||(*current == ';')) {
551 gint *start_index, *end_index;
552 start_index = g_new0(gint, 1);
553 end_index = g_new0(gint, 1);
554 *start_index = start_offset;
555 *end_index = current_offset;
556 *start_indexes = g_slist_prepend (*start_indexes, start_index);
557 *end_indexes = g_slist_prepend (*end_indexes, end_index);
558 start = g_utf8_next_char (current);
559 start_offset = current_offset + 1;
561 } else if (*current == '"') {
562 current = g_utf8_next_char (current);
564 while ((*current != '"')&&(*current != '\0')) {
565 current = g_utf8_next_char (current);
570 current = g_utf8_next_char (current);
574 if (start != current) {
575 gint *start_index, *end_index;
576 start_index = g_new0(gint, 1);
577 end_index = g_new0(gint, 1);
578 *start_index = start_offset;
579 *end_index = current_offset;
580 *start_indexes = g_slist_prepend (*start_indexes, start_index);
581 *end_indexes = g_slist_prepend (*end_indexes, end_index);
584 *start_indexes = g_slist_reverse (*start_indexes);
585 *end_indexes = g_slist_reverse (*end_indexes);
592 modest_text_utils_split_addresses_list (const gchar *addresses)
595 const gchar *my_addrs = addresses;
599 g_return_val_if_fail (addresses, NULL);
601 /* skip any space, ',', ';' at the start */
602 while (my_addrs && (my_addrs[0] == ' ' || my_addrs[0] == ',' || my_addrs[0] == ';'))
605 /* are we at the end of addresses list? */
609 /* nope, we are at the start of some address
610 * now, let's find the end of the address */
612 while (end[0] && end[0] != ',' && end[0] != ';')
615 /* we got the address; copy it and remove trailing whitespace */
616 addr = g_strndup (my_addrs, end - my_addrs);
619 head = g_slist_append (NULL, addr);
620 head->next = modest_text_utils_split_addresses_list (end); /* recurse */
627 modest_text_utils_address_range_at_position (const gchar *recipients_list,
632 gchar *current = NULL;
633 gint range_start = 0;
636 gboolean is_quoted = FALSE;
638 g_return_if_fail (recipients_list);
639 g_return_if_fail (position < g_utf8_strlen(recipients_list, -1));
642 for (current = (gchar *) recipients_list; *current != '\0';
643 current = g_utf8_find_next_char (current, NULL)) {
644 gunichar c = g_utf8_get_char (current);
646 if ((c == ',') && (!is_quoted)) {
647 if (index < position) {
648 range_start = index + 1;
652 } else if (c == '\"') {
653 is_quoted = !is_quoted;
654 } else if ((c == ' ') &&(range_start == index)) {
662 *start = range_start;
668 modest_text_utils_address_with_standard_length (const gchar *recipients_list)
672 GString *buffer = g_string_new ("");
674 splitted = g_strsplit (recipients_list, "\n", 0);
678 if (current != splitted)
679 buffer = g_string_append_c (buffer, '\n');
680 line = g_strndup (*splitted, 1000);
681 buffer = g_string_append (buffer, line);
686 g_strfreev (splitted);
688 return g_string_free (buffer, FALSE);
692 /* ******************************************************************* */
693 /* ************************* UTILIY FUNCTIONS ************************ */
694 /* ******************************************************************* */
697 get_next_line (const gchar * b, const gsize blen, const gchar * iter)
703 return g_string_new("");
711 gs = g_string_new_len (i0, iter - i0);
715 get_indent_level (const char *l)
732 /* if we hit the signature marker "-- ", we return -(indent + 1). This
733 * stops reformatting.
735 if (strcmp (l, "-- ") == 0) {
743 unquote_line (GString * l)
758 g_string_erase (l, 0, p - l->str);
762 append_quoted (GString * buf, int indent, const GString * str,
767 indent = indent < 0 ? abs (indent) - 1 : indent;
768 for (i = 0; i <= indent; i++) {
769 g_string_append (buf, "> ");
772 g_string_append_len (buf, str->str, cutpoint);
774 g_string_append (buf, str->str);
776 g_string_append (buf, "\n");
780 get_breakpoint_utf8 (const gchar * s, gint indent, const gint limit)
783 const gchar *pos, *last;
786 indent = indent < 0 ? abs (indent) - 1 : indent;
790 uni = g_utf8_to_ucs4_fast (s, -1, NULL);
792 if ((index + 2 * indent > limit) && last) {
796 if (g_unichar_isspace (uni[index])) {
799 pos = g_utf8_next_char (pos);
807 get_breakpoint_ascii (const gchar * s, const gint indent, const gint limit)
812 if (last + 2 * indent < limit)
815 for (i = strlen (s); i > 0; i--) {
817 if (i + 2 * indent <= limit) {
828 get_breakpoint (const gchar * s, const gint indent, const gint limit)
831 if (g_utf8_validate (s, -1, NULL)) {
832 return get_breakpoint_utf8 (s, indent, limit);
833 } else { /* assume ASCII */
834 //g_warning("invalid UTF-8 in msg");
835 return get_breakpoint_ascii (s, indent, limit);
840 cite (const time_t sent_date, const gchar *from)
842 return g_strdup (_("mcen_ia_editor_original_message"));
846 quoted_attachments (GList *attachments)
849 GString *result = g_string_new ("");
850 for (node = attachments; node != NULL; node = g_list_next (node)) {
851 gchar *filename = (gchar *) node->data;
852 g_string_append_printf ( result, "%s %s\n", _("mcen_ia_editor_attach_filename"), filename);
855 return g_string_free (result, FALSE);
860 modest_text_utils_quote_plain_text (const gchar *text,
862 const gchar *signature,
867 gint indent, breakpoint, rem_indent = 0;
868 GString *q, *l, *remaining;
870 gchar *attachments_string = NULL;
872 q = g_string_new ("\n");
873 q = g_string_append (q, cite);
874 q = g_string_append_c (q, '\n');
876 /* remaining will store the rest of the line if we have to break it */
877 remaining = g_string_new ("");
882 l = get_next_line (text, len, iter);
883 iter = iter + l->len + 1;
884 indent = get_indent_level (l->str);
887 if (remaining->len) {
888 if (l->len && indent == rem_indent) {
889 g_string_prepend (l, " ");
890 g_string_prepend (l, remaining->str);
894 get_breakpoint (remaining->str,
897 append_quoted (q, rem_indent,
898 remaining, breakpoint);
899 g_string_erase (remaining, 0,
901 if (remaining->str[0] == ' ') {
902 g_string_erase (remaining, 0,
905 } while (remaining->len);
908 g_string_free (remaining, TRUE);
909 breakpoint = get_breakpoint (l->str, indent, limit);
910 remaining = g_string_new (l->str + breakpoint);
911 if (remaining->str[0] == ' ') {
912 g_string_erase (remaining, 0, 1);
915 append_quoted (q, indent, l, breakpoint);
916 g_string_free (l, TRUE);
917 } while ((iter < text + len) || (remaining->str[0]));
919 attachments_string = quoted_attachments (attachments);
920 q = g_string_append (q, attachments_string);
921 g_free (attachments_string);
923 if (signature != NULL) {
924 q = g_string_append (q, "\n--\n");
925 q = g_string_append (q, signature);
926 q = g_string_append_c (q, '\n');
929 return g_string_free (q, FALSE);
933 quote_html_add_to_gstring (GString *string,
936 if (text && strcmp (text, "")) {
937 gchar *html_text = modest_text_utils_convert_to_html_body (text, -1, TRUE);
938 g_string_append_printf (string, "%s<br/>", html_text);
944 modest_text_utils_quote_html (const gchar *text,
946 const gchar *signature,
950 GString *result_string;
954 "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n" \
958 if (text || cite || signature) {
959 g_string_append (result_string, "<pre>");
960 quote_html_add_to_gstring (result_string, cite);
961 quote_html_add_to_gstring (result_string, text);
963 gchar *attachments_string = quoted_attachments (attachments);
964 quote_html_add_to_gstring (result_string, attachments_string);
965 g_free (attachments_string);
968 quote_html_add_to_gstring (result_string, SIGNATURE_MARKER);
969 quote_html_add_to_gstring (result_string, signature);
971 g_string_append (result_string, "</pre>");
973 g_string_append (result_string, "</body>");
974 g_string_append (result_string, "</html>");
976 return g_string_free (result_string, FALSE);
980 cmp_offsets_reverse (const url_match_t *match1, const url_match_t *match2)
982 return match2->offset - match1->offset;
985 static gboolean url_matches_block = 0;
986 static url_match_pattern_t patterns[] = MAIL_VIEWER_URL_MATCH_PATTERNS;
993 const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
994 for (i = 0; i != pattern_num; ++i) {
995 patterns[i].preg = g_slice_new0 (regex_t);
997 /* this should not happen */
998 if (regcomp (patterns[i].preg, patterns[i].regex,
999 REG_ICASE|REG_EXTENDED|REG_NEWLINE) != 0) {
1000 g_warning ("%s: error in regexp:\n%s\n", __FUNCTION__, patterns[i].regex);
1011 const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
1012 for (i = 0; i != pattern_num; ++i) {
1013 regfree (patterns[i].preg);
1014 g_slice_free (regex_t, patterns[i].preg);
1015 } /* don't free patterns itself -- it's static */
1019 modest_text_utils_hyperlinkify_begin (void)
1021 if (url_matches_block == 0)
1022 compile_patterns ();
1023 url_matches_block ++;
1027 modest_text_utils_hyperlinkify_end (void)
1029 url_matches_block--;
1030 if (url_matches_block <= 0)
1036 get_url_matches (GString *txt, gint offset)
1039 guint rv, i, tmp_offset = 0;
1040 GSList *match_list = NULL;
1042 const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t);
1044 /* initalize the regexps */
1045 modest_text_utils_hyperlinkify_begin ();
1047 /* find all the matches */
1048 for (i = 0; i != pattern_num; ++i) {
1049 tmp_offset = offset;
1052 gboolean is_submatch;
1055 if ((rv = regexec (patterns[i].preg, txt->str + tmp_offset, 1, &rm, 0)) != 0) {
1056 g_return_val_if_fail (rv == REG_NOMATCH, NULL); /* this should not happen */
1057 break; /* try next regexp */
1062 is_submatch = FALSE;
1063 /* check old matches to see if this has already been matched */
1064 cursor = match_list;
1065 while (cursor && !is_submatch) {
1066 const url_match_t *old_match =
1067 (const url_match_t *) cursor->data;
1068 guint new_offset = tmp_offset + rm.rm_so;
1069 is_submatch = (new_offset > old_match->offset &&
1070 new_offset < old_match->offset + old_match->len);
1071 cursor = g_slist_next (cursor);
1075 /* make a list of our matches (<offset, len, prefix> tupels)*/
1076 match = g_slice_new (url_match_t);
1077 match->offset = tmp_offset + rm.rm_so;
1078 match->len = rm.rm_eo - rm.rm_so;
1079 match->prefix = patterns[i].prefix;
1080 match_list = g_slist_prepend (match_list, match);
1082 tmp_offset += rm.rm_eo;
1086 modest_text_utils_hyperlinkify_end ();
1088 /* now sort the list, so the matches are in reverse order of occurence.
1089 * that way, we can do the replacements starting from the end, so we don't need
1090 * to recalculate the offsets
1092 match_list = g_slist_sort (match_list,
1093 (GCompareFunc)cmp_offsets_reverse);
1099 /* replace all occurences of needle in haystack with repl*/
1101 replace_string (const gchar *haystack, const gchar *needle, gchar repl)
1103 gchar *str, *cursor;
1105 if (!haystack || !needle || strlen(needle) == 0)
1106 return haystack ? g_strdup(haystack) : NULL;
1108 str = g_strdup (haystack);
1110 for (cursor = str; cursor && *cursor; ++cursor) {
1111 if (g_str_has_prefix (cursor, needle)) {
1113 memmove (cursor + 1,
1114 cursor + strlen (needle),
1115 strlen (cursor + strlen (needle)) + 1);
1123 hyperlinkify_plain_text (GString *txt, gint offset)
1126 GSList *match_list = get_url_matches (txt, offset);
1128 /* we will work backwards, so the offsets stay valid */
1129 for (cursor = match_list; cursor; cursor = cursor->next) {
1131 url_match_t *match = (url_match_t*) cursor->data;
1132 gchar *url = g_strndup (txt->str + match->offset, match->len);
1133 gchar *repl = NULL; /* replacement */
1135 /* the string still contains $(MARK_AMP_URI_STR)"amp;" for each
1136 * '&' in the original, because of the text->html conversion.
1137 * in the href-URL (and only there), we must convert that back to
1140 gchar *href_url = replace_string (url, MARK_AMP_URI_STR "amp;", '&');
1142 /* the prefix is NULL: use the one that is already there */
1143 repl = g_strdup_printf ("<a href=\"%s%s\">%s</a>",
1144 match->prefix ? match->prefix : EMPTY_STRING,
1147 /* replace the old thing with our hyperlink
1148 * replacement thing */
1149 g_string_erase (txt, match->offset, match->len);
1150 g_string_insert (txt, match->offset, repl);
1156 g_slice_free (url_match_t, match);
1159 g_slist_free (match_list);
1163 modest_text_utils_hyperlinkify (GString *string_buffer)
1168 after_body = strstr (string_buffer->str, "<body>");
1169 if (after_body != NULL)
1170 offset = after_body - string_buffer->str;
1171 hyperlinkify_plain_text (string_buffer, offset);
1175 /* for optimization reasons, we change the string in-place */
1177 modest_text_utils_get_display_address (gchar *address)
1181 g_return_if_fail (address);
1186 /* should not be needed, and otherwise, we probably won't screw up the address
1187 * more than it already is :)
1188 * g_return_val_if_fail (g_utf8_validate (address, -1, NULL), NULL);
1191 /* remove leading whitespace */
1192 if (address[0] == ' ')
1193 g_strchug (address);
1195 for (i = 0; address[i]; ++i) {
1196 if (address[i] == '<') {
1197 if (G_UNLIKELY(i == 0))
1198 return; /* there's nothing else, leave it */
1200 address[i] = '\0'; /* terminate the string here */
1212 modest_text_utils_get_email_address (const gchar *full_address)
1214 const gchar *left, *right;
1216 g_return_val_if_fail (full_address, NULL);
1221 g_return_val_if_fail (g_utf8_validate (full_address, -1, NULL), NULL);
1223 left = g_strrstr_len (full_address, strlen(full_address), "<");
1225 return g_strdup (full_address);
1227 right = g_strstr_len (left, strlen(left), ">");
1229 return g_strdup (full_address);
1231 return g_strndup (left + 1, right - left - 1);
1235 modest_text_utils_get_subject_prefix_len (const gchar *sub)
1237 gint prefix_len = 0;
1239 g_return_val_if_fail (sub, 0);
1244 /* optimization: "Re", "RE", "re","Fwd", "FWD", "fwd","FW","Fw", "fw" */
1245 if (sub[0] != 'R' && sub[0] != 'F' && sub[0] != 'r' && sub[0] != 'f')
1247 else if (sub[0] && sub[1] != 'e' && sub[1] != 'E' && sub[1] != 'w' && sub[1] != 'W')
1254 /* skip over a [...] block */
1255 if (sub[prefix_len] == '[') {
1256 int c = prefix_len + 1;
1257 while (sub[c] && sub[c] != ']')
1260 return 0; /* no end to the ']' found */
1265 /* did we find the ':' ? */
1266 if (sub[prefix_len] == ':') {
1268 if (sub[prefix_len] == ' ')
1270 prefix_len += modest_text_utils_get_subject_prefix_len (sub + prefix_len);
1271 /* g_warning ("['%s','%s']", sub, (char*) sub + prefix_len); */
1279 modest_text_utils_utf8_strcmp (const gchar* s1, const gchar *s2, gboolean insensitive)
1282 /* work even when s1 and/or s2 == NULL */
1283 if (G_UNLIKELY(s1 == s2))
1285 if (G_UNLIKELY(!s1))
1287 if (G_UNLIKELY(!s2))
1290 /* if it's not case sensitive */
1293 /* optimization: shortcut if first char is ascii */
1294 if (((s1[0] & 0xf0)== 0) && ((s2[0] & 0xf0) == 0))
1295 return s1[0] - s2[0];
1297 return g_utf8_collate (s1, s2);
1303 /* optimization: short cut iif first char is ascii */
1304 if (((s1[0] & 0xf0) == 0) && ((s2[0] & 0xf0) == 0))
1305 return tolower(s1[0]) - tolower(s2[0]);
1307 n1 = g_utf8_strdown (s1, -1);
1308 n2 = g_utf8_strdown (s2, -1);
1310 result = g_utf8_collate (n1, n2);
1321 modest_text_utils_get_display_date (time_t date)
1323 #define DATE_BUF_SIZE 64
1324 static gchar date_buf[DATE_BUF_SIZE];
1326 /* calculate the # of days since epoch for
1327 * for today and for the date provided
1328 * based on idea from pvanhoof */
1329 int day = time(NULL) / (24 * 60 * 60);
1330 int date_day = date / (24 * 60 * 60);
1332 /* if it's today, show the time, if it's not today, show the date instead */
1334 if (day == date_day) /* is the date today? */
1335 modest_text_utils_strftime (date_buf, DATE_BUF_SIZE, "%X", date);
1337 modest_text_utils_strftime (date_buf, DATE_BUF_SIZE, "%x", date);
1339 return date_buf; /* this is a static buffer, don't free! */
1345 modest_text_utils_validate_folder_name (const gchar *folder_name)
1347 /* based on http://msdn2.microsoft.com/en-us/library/aa365247.aspx,
1348 * with some extras */
1352 const gchar **cursor = NULL;
1353 const gchar *forbidden_names[] = { /* windows does not like these */
1354 "CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6",
1355 "COM7", "COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
1356 ".", "..", "cur", "tmp", "new", NULL /* cur, tmp new are reserved for Maildir */
1359 /* cannot be NULL */
1363 /* cannot be empty */
1364 len = strlen(folder_name);
1368 /* cannot start with a dot, vfat does not seem to like that */
1369 if (folder_name[0] == '.')
1372 /* cannot start or end with a space */
1373 if (g_ascii_isspace(folder_name[0]) || g_ascii_isspace(folder_name[len - 1]))
1376 /* cannot contain a forbidden char */
1377 for (i = 0; i < len; i++)
1378 if (modest_text_utils_is_forbidden_char (folder_name[i], FOLDER_NAME_FORBIDDEN_CHARS))
1381 /* cannot contain a forbidden word */
1383 for (cursor = forbidden_names; cursor && *cursor; ++cursor) {
1384 if (g_ascii_strcasecmp (folder_name, *cursor) == 0)
1389 return TRUE; /* it's valid! */
1395 modest_text_utils_validate_domain_name (const gchar *domain)
1397 gboolean valid = FALSE;
1399 const gchar* domain_regex = "^([a-z0-9-]*[a-z0-9]\\.)+[a-z0-9-]*[a-z0-9]$";
1401 g_return_val_if_fail (domain, FALSE);
1406 memset (&rx, 0, sizeof(regex_t)); /* coverity wants this... */
1408 /* domain name: all alphanum or '-' or '.',
1409 * but beginning/ending in alphanum */
1410 if (regcomp (&rx, domain_regex, REG_ICASE|REG_EXTENDED|REG_NOSUB)) {
1411 g_warning ("BUG: error in regexp");
1415 valid = (regexec (&rx, domain, 1, NULL, 0) == 0);
1424 modest_text_utils_validate_email_address (const gchar *email_address,
1425 const gchar **invalid_char_position)
1428 const gchar *c = NULL, *domain = NULL;
1429 static gchar *rfc822_specials = "()<>@,;:\\\"[]&";
1431 if (invalid_char_position)
1432 *invalid_char_position = NULL;
1434 g_return_val_if_fail (email_address, FALSE);
1436 /* check that the email adress contains exactly one @ */
1437 if (!strstr(email_address, "@") ||
1438 (strstr(email_address, "@") != g_strrstr(email_address, "@")))
1441 /* first we validate the name portion (name@domain) */
1442 for (c = email_address; *c; c++) {
1444 (c == email_address ||
1446 *(c - 1) == '\"')) {
1450 if (*c == '\\' && (*++c == ' '))
1452 if (*c <= ' ' || *c >= 127)
1465 if (*c <= ' ' || *c >= 127)
1467 if (strchr(rfc822_specials, *c)) {
1468 if (invalid_char_position)
1469 *invalid_char_position = c;
1473 if (c == email_address || *(c - 1) == '.')
1476 /* next we validate the domain portion (name@domain) */
1477 if (!*(domain = ++c))
1481 if (c == domain || *(c - 1) == '.' || *(c + 1) == '\0')
1485 if (*c <= ' ' || *c >= 127)
1487 if (strchr(rfc822_specials, *c)) {
1488 if (invalid_char_position)
1489 *invalid_char_position = c;
1494 return (count >= 1) ? TRUE : FALSE;
1498 modest_text_utils_validate_recipient (const gchar *recipient, const gchar **invalid_char_position)
1500 gchar *stripped, *current;
1502 gboolean has_error = FALSE;
1504 if (invalid_char_position)
1505 *invalid_char_position = NULL;
1507 g_return_val_if_fail (recipient, FALSE);
1509 if (modest_text_utils_validate_email_address (recipient, invalid_char_position))
1512 stripped = g_strdup (recipient);
1513 stripped = g_strstrip (stripped);
1516 if (*current == '\0') {
1522 if (*current == '\"') {
1523 current = g_utf8_next_char (current);
1525 for (; *current != '\0'; current = g_utf8_next_char (current)) {
1526 if (*current == '\\') {
1527 /* TODO: This causes a warning, which breaks the build,
1528 * because a gchar cannot be < 0.
1530 if (current[1] <0) {
1535 } else if (*current == '\"') {
1537 current = g_utf8_next_char (current);
1543 for (current = stripped ; *current != '\0'; current = g_utf8_next_char (current)) {
1544 if (*current == '<') {
1556 right_part = g_strdup (current);
1558 right_part = g_strstrip (right_part);
1560 if (g_str_has_prefix (right_part, "<") &&
1561 g_str_has_suffix (right_part, ">")) {
1565 address = g_strndup (right_part+1, strlen (right_part) - 2);
1566 g_free (right_part);
1567 valid = modest_text_utils_validate_email_address (address, invalid_char_position);
1571 g_free (right_part);
1578 modest_text_utils_get_display_size (guint64 size)
1580 const guint KB=1024;
1581 const guint MB=1024 * KB;
1582 const guint GB=1024 * MB;
1585 return g_strdup_printf(_FM("sfil_li_size_kb"), 0);
1586 if (0 < size && size < KB)
1587 return g_strdup_printf (_FM("sfil_li_size_kb"), 1);
1588 else if (KB <= size && size < 100 * KB)
1589 return g_strdup_printf (_FM("sfil_li_size_1kb_99kb"), size / KB);
1590 else if (100*KB <= size && size < MB)
1591 return g_strdup_printf (_FM("sfil_li_size_100kb_1mb"), (float) size / MB);
1592 else if (MB <= size && size < 10*MB)
1593 return g_strdup_printf (_FM("sfil_li_size_1mb_10mb"), (float) size / MB);
1594 else if (10*MB <= size && size < GB)
1595 return g_strdup_printf (_FM("sfil_li_size_10mb_1gb"), size / MB);
1597 return g_strdup_printf (_FM("sfil_li_size_1gb_or_greater"), (float) size / GB);
1601 get_email_from_address (const gchar * address)
1603 gchar *left_limit, *right_limit;
1605 left_limit = strstr (address, "<");
1606 right_limit = g_strrstr (address, ">");
1608 if ((left_limit == NULL)||(right_limit == NULL)|| (left_limit > right_limit))
1609 return g_strdup (address);
1611 return g_strndup (left_limit + 1, (right_limit - left_limit) - 1);
1615 modest_text_utils_get_color_string (GdkColor *color)
1617 g_return_val_if_fail (color, NULL);
1619 return g_strdup_printf ("#%x%x%x%x%x%x%x%x%x%x%x%x",
1620 (color->red >> 12) & 0xf, (color->red >> 8) & 0xf,
1621 (color->red >> 4) & 0xf, (color->red) & 0xf,
1622 (color->green >> 12) & 0xf, (color->green >> 8) & 0xf,
1623 (color->green >> 4) & 0xf, (color->green) & 0xf,
1624 (color->blue >> 12) & 0xf, (color->blue >> 8) & 0xf,
1625 (color->blue >> 4) & 0xf, (color->blue) & 0xf);
1629 modest_text_utils_text_buffer_get_text (GtkTextBuffer *buffer)
1631 GtkTextIter start, end;
1632 gchar *slice, *current;
1633 GString *result = g_string_new ("");
1635 g_return_val_if_fail (buffer && GTK_IS_TEXT_BUFFER (buffer), NULL);
1637 gtk_text_buffer_get_start_iter (buffer, &start);
1638 gtk_text_buffer_get_end_iter (buffer, &end);
1640 slice = gtk_text_buffer_get_slice (buffer, &start, &end, FALSE);
1643 while (current && current != '\0') {
1644 if (g_utf8_get_char (current) == 0xFFFC) {
1645 result = g_string_append_c (result, ' ');
1646 current = g_utf8_next_char (current);
1648 gchar *next = g_utf8_strchr (current, -1, 0xFFFC);
1650 result = g_string_append (result, current);
1652 result = g_string_append_len (result, current, next - current);
1659 return g_string_free (result, FALSE);
1664 modest_text_utils_is_forbidden_char (const gchar character,
1665 ModestTextUtilsForbiddenCharType type)
1668 const gchar *forbidden_chars = NULL;
1670 /* We need to get the length in the switch because the
1671 compiler needs to know the size at compile time */
1673 case ACCOUNT_TITLE_FORBIDDEN_CHARS:
1674 forbidden_chars = account_title_forbidden_chars;
1675 len = G_N_ELEMENTS (account_title_forbidden_chars);
1677 case FOLDER_NAME_FORBIDDEN_CHARS:
1678 forbidden_chars = folder_name_forbidden_chars;
1679 len = G_N_ELEMENTS (folder_name_forbidden_chars);
1681 case USER_NAME_FORBIDDEN_NAMES:
1682 forbidden_chars = user_name_forbidden_chars;
1683 len = G_N_ELEMENTS (user_name_forbidden_chars);
1686 g_return_val_if_reached (TRUE);
1689 for (i = 0; i < len ; i++)
1690 if (forbidden_chars[i] == character)
1693 return FALSE; /* it's valid! */
1697 modest_text_utils_label_get_selection (GtkLabel *label)
1702 if (gtk_label_get_selection_bounds (GTK_LABEL (label), &start, &end)) {
1703 const gchar *start_offset;
1704 const gchar *end_offset;
1705 start_offset = gtk_label_get_text (GTK_LABEL (label));
1706 start_offset = g_utf8_offset_to_pointer (start_offset, start);
1707 end_offset = gtk_label_get_text (GTK_LABEL (label));
1708 end_offset = g_utf8_offset_to_pointer (end_offset, end);
1709 selection = g_strndup (start_offset, end_offset - start_offset);
1712 return g_strdup ("");
1717 _forward_search_image_char (gunichar ch,
1720 return (ch == 0xFFFC);
1724 modest_text_utils_buffer_selection_is_valid (GtkTextBuffer *buffer)
1727 GtkTextIter start, end;
1729 g_return_val_if_fail (GTK_IS_TEXT_BUFFER (buffer), FALSE);
1731 result = gtk_text_buffer_get_has_selection (GTK_TEXT_BUFFER (buffer));
1733 /* check there are no images in selection */
1735 gtk_text_buffer_get_selection_bounds (buffer, &start, &end);
1736 if (gtk_text_iter_get_char (&start)== 0xFFFC)
1739 gtk_text_iter_backward_char (&end);
1740 if (gtk_text_iter_forward_find_char (&start, _forward_search_image_char,
1751 modest_text_utils_escape_mnemonics (const gchar *text)
1754 GString *result = NULL;
1759 result = g_string_new ("");
1760 for (p = text; *p != '\0'; p++) {
1762 result = g_string_append (result, "__");
1764 result = g_string_append_c (result, *p);
1767 return g_string_free (result, FALSE);