X-Git-Url: http://git.maemo.org/git/?p=modest;a=blobdiff_plain;f=src%2Fmodest-text-utils.c;h=f2ca84088c2f6d9eb20451cb4fee7cc67dfffc15;hp=b0f547ddb908cc48ddc85f726be2b3e81fd7b084;hb=d39a0b6738e86acb01327d641b6e78eb0afa8c63;hpb=a0b97bdb5b2391619437f8ef3c9bae321a1227dd diff --git a/src/modest-text-utils.c b/src/modest-text-utils.c index b0f547d..f2ca840 100644 --- a/src/modest-text-utils.c +++ b/src/modest-text-utils.c @@ -42,7 +42,7 @@ #include #include #include - +#include #ifdef HAVE_CONFIG_H #include @@ -64,6 +64,8 @@ */ #define HYPERLINKIFY_MAX_LENGTH (1024*50) + + /* * we need these regexps to find URLs in plain text e-mails */ @@ -81,14 +83,40 @@ struct _url_match_t { const gchar* prefix; }; + +/* + * we mark the ampersand with \007 when converting text->html + * because after text->html we do hyperlink detecting, which + * could be screwed up by the ampersand. + * ie. 1<3 ==> 1\007lt;3 + */ +#define MARK_AMP '\007' +#define MARK_AMP_STR "\007" + +/* mark & separately, because they are parts of urls. + * ie. a&b => a\006amp;b, but a>b => a\007gt;b + * + * we need to handle '&' separately, because it can be part of URIs + * (as in href="http://foo.bar?a=1&b=1"), so inside those URIs + * we need to re-replace \006amp; with '&' again, while outside uri's + * it will be '&' + * + * yes, it's messy, but a consequence of doing text->html first, then hyperlinkify + */ +#define MARK_AMP_URI '\006' +#define MARK_AMP_URI_STR "\006" + + +/* note: match MARK_AMP_URI_STR as well, because after txt->html, a '&' will look like $(MARK_AMP_URI_STR)"amp;" */ #define MAIL_VIEWER_URL_MATCH_PATTERNS { \ - { "(file|rtsp|http|ftp|https)://[-A-Za-z0-9_$.+!*(),;:@%&=?/~#]+[-A-Za-z0-9_$%&=?/~#]",\ + { "(file|rtsp|http|ftp|https|mms|mmsh|rtsp|rdp|lastfm)://[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR \ + "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]", \ NULL, NULL },\ - { "www\\.[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-A-Za-z0-9_$.+!*(),;:@%&=?/~#]*[^]}\\),?!;:\"]?)?",\ + { "www\\.[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]",\ NULL, "http://" }, \ - { "ftp\\.[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-A-Za-z0-9_$.+!*(),;:@%&=?/~#]*[^]}\\),?!;:\"]?)?",\ + { "ftp\\.[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]",\ NULL, "ftp://" },\ - { "(voipto|callto|chatto|jabberto|xmpp):[-_a-z@0-9.\\+]+", \ + { "(jabberto|voipto|sipto|sip|chatto|xmpp):[-_a-z@0-9.+]+", \ NULL, NULL}, \ { "mailto:[-_a-z0-9.\\+]+@[-_a-z0-9.]+", \ NULL, NULL},\ @@ -100,7 +128,7 @@ const gchar account_title_forbidden_chars[] = { '\\', '/', ':', '*', '?', '\'', '<', '>', '|', '^' }; const gchar folder_name_forbidden_chars[] = { - '<', '>', ':', '\'', '/', '\\', '|', '?', '*', '^', '%', '$' + '<', '>', ':', '\'', '/', '\\', '|', '?', '*', '^', '%', '$', '#', '&' }; const gchar user_name_forbidden_chars[] = { '<', '>' @@ -203,6 +231,8 @@ forward_cite (const gchar *from, const gchar *to, const gchar *subject) { + g_return_val_if_fail (sent, NULL); + return g_strdup_printf ("%s\n%s %s\n%s %s\n%s %s\n%s %s\n", FORWARD_STRING, FROM_STRING, (from)?from:"", @@ -278,12 +308,12 @@ modest_text_utils_derived_subject (const gchar *subject, const gchar *prefix) gchar* modest_text_utils_remove_address (const gchar *address_list, const gchar *address) { - gchar *dup, *token, *ptr, *result; + gchar *dup, *token, *ptr = NULL, *result; GString *filtered_emails; gchar *email_address; g_return_val_if_fail (address_list, NULL); - + if (!address) return g_strdup (address_list); @@ -320,10 +350,48 @@ modest_text_utils_remove_address (const gchar *address_list, const gchar *addres return result; } + +gchar* +modest_text_utils_remove_duplicate_addresses (const gchar *address_list) +{ + GSList *addresses, *cursor; + GHashTable *table; + gchar *new_list; + + g_return_val_if_fail (address_list, NULL); + + table = g_hash_table_new (g_str_hash, g_str_equal); + addresses = modest_text_utils_split_addresses_list (address_list); + + new_list = g_strdup(""); + cursor = addresses; + while (cursor) { + const gchar* address = (const gchar*)cursor->data; + + /* ignore the address if already seen */ + if (g_hash_table_lookup (table, address) == 0) { + + gchar *tmp = g_strjoin (",", new_list, address, NULL); + g_free (new_list); + new_list = tmp; + + g_hash_table_insert (table, (gchar*)address, GINT_TO_POINTER(1)); + } + cursor = g_slist_next (cursor); + } + + g_hash_table_destroy (table); + g_slist_foreach (addresses, (GFunc)g_free, NULL); + g_slist_free (addresses); + + return new_list; +} + + static void -modest_text_utils_convert_buffer_to_html (GString *html, const gchar *data, gssize n) +modest_text_utils_convert_buffer_to_html_start (GString *html, const gchar *data, gssize n) { - guint i; + guint i; gboolean space_seen = FALSE; guint break_dist = 0; /* distance since last break point */ @@ -332,7 +400,7 @@ modest_text_utils_convert_buffer_to_html (GString *html, const gchar *data, gssi /* replace with special html chars where needed*/ for (i = 0; i != n; ++i) { - char kar = data[i]; + guchar kar = data[i]; if (space_seen && kar != ' ') { g_string_append_c (html, ' '); @@ -342,23 +410,32 @@ modest_text_utils_convert_buffer_to_html (GString *html, const gchar *data, gssi /* we artificially insert a breakpoint (newline) * after 256, to make sure our lines are not so long * they will DOS the regexping later + * Also, check that kar is ASCII to make sure that we + * don't break a UTF8 char in two */ - if (++break_dist == 256) { + if (++break_dist >= 256 && kar < 127) { g_string_append_c (html, '\n'); break_dist = 0; } switch (kar) { - case 0: break; /* ignore embedded \0s */ - case '<' : g_string_append (html, "<"); break; - case '>' : g_string_append (html, ">"); break; - case '&' : g_string_append (html, "&"); break; - case '"' : g_string_append (html, """); break; + case 0: + case MARK_AMP: + case MARK_AMP_URI: + /* this is a temp place holder for '&'; we can only + * set the real '&' after hyperlink translation, otherwise + * we might screw that up */ + break; /* ignore embedded \0s and MARK_AMP */ + case '<' : g_string_append (html, MARK_AMP_STR "lt;"); break; + case '>' : g_string_append (html, MARK_AMP_STR "gt;"); break; + case '&' : g_string_append (html, MARK_AMP_URI_STR "amp;"); break; /* special case */ + case '"' : g_string_append (html, MARK_AMP_STR "quot;"); break; /* don't convert ' --> wpeditor will try to re-convert it... */ //case '\'' : g_string_append (html, "'"); break; - case '\n' : g_string_append (html, "
\n"); break_dist= 0; break; - case '\t' : g_string_append (html, "    "); break_dist=0; break; /* note the space at the end*/ + case '\n' : g_string_append (html, "
\n");break_dist= 0; break; + case '\t' : g_string_append (html, MARK_AMP_STR "nbsp;" MARK_AMP_STR "nbsp;" MARK_AMP_STR "nbsp; "); + break_dist=0; break; /* note the space at the end*/ case ' ': break_dist = 0; if (space_seen) { /* second space in a row */ @@ -373,11 +450,25 @@ modest_text_utils_convert_buffer_to_html (GString *html, const gchar *data, gssi } } + +static void +modest_text_utils_convert_buffer_to_html_finish (GString *html) +{ + int i; + /* replace all our MARK_AMPs with real ones */ + for (i = 0; i != html->len; ++i) + if ((html->str)[i] == MARK_AMP || (html->str)[i] == MARK_AMP_URI) + (html->str)[i] = '&'; +} + + gchar* modest_text_utils_convert_to_html (const gchar *data) { GString *html; gsize len; + + g_return_val_if_fail (data, NULL); if (!data) return NULL; @@ -391,13 +482,15 @@ modest_text_utils_convert_to_html (const gchar *data) "" ""); - modest_text_utils_convert_buffer_to_html (html, data, -1); + modest_text_utils_convert_buffer_to_html_start (html, data, -1); g_string_append (html, ""); if (len <= HYPERLINKIFY_MAX_LENGTH) hyperlinkify_plain_text (html); + modest_text_utils_convert_buffer_to_html_finish (html); + return g_string_free (html, FALSE); } @@ -405,7 +498,9 @@ gchar * modest_text_utils_convert_to_html_body (const gchar *data, gssize n, gboolean hyperlinkify) { GString *html; - + + g_return_val_if_fail (data, NULL); + if (!data) return NULL; @@ -413,11 +508,13 @@ modest_text_utils_convert_to_html_body (const gchar *data, gssize n, gboolean hy n = strlen (data); html = g_string_sized_new (1.5 * n); /* just a guess... */ - modest_text_utils_convert_buffer_to_html (html, data, n); + modest_text_utils_convert_buffer_to_html_start (html, data, n); if (hyperlinkify && (n < HYPERLINKIFY_MAX_LENGTH)) hyperlinkify_plain_text (html); + modest_text_utils_convert_buffer_to_html_finish (html); + return g_string_free (html, FALSE); } @@ -479,49 +576,42 @@ modest_text_utils_get_addresses_indexes (const gchar *addresses, GSList **start_ return; } + GSList * modest_text_utils_split_addresses_list (const gchar *addresses) { - gchar *current, *start, *last_blank; - GSList *result = NULL; + GSList *head; + const gchar *my_addrs = addresses; + const gchar *end; + gchar *addr; - start = (gchar *) addresses; - current = start; - last_blank = start; + g_return_val_if_fail (addresses, NULL); + + /* skip any space, ',', ';' at the start */ + while (my_addrs && (my_addrs[0] == ' ' || my_addrs[0] == ',' || my_addrs[0] == ';')) + ++my_addrs; - while (*current != '\0') { - if ((start == current)&&((*current == ' ')||(*current == ',')||(*current == ';'))) { - start = g_utf8_next_char (start); - last_blank = current; - } else if ((*current == ',')||(*current == ';')) { - gchar *new_address = NULL; - new_address = g_strndup (start, current - last_blank); - result = g_slist_prepend (result, new_address); - start = g_utf8_next_char (current); - last_blank = start; - } else if (*current == '\"') { - if (current == start) { - current = g_utf8_next_char (current); - start = g_utf8_next_char (start); - } - while ((*current != '\"')&&(*current != '\0')) - current = g_utf8_next_char (current); - } - - current = g_utf8_next_char (current); - } + /* are we at the end of addresses list? */ + if (!my_addrs[0]) + return NULL; + + /* nope, we are at the start of some address + * now, let's find the end of the address */ + end = my_addrs + 1; + while (end[0] && end[0] != ',' && end[0] != ';') + ++end; - if (start != current) { - gchar *new_address = NULL; - new_address = g_strndup (start, current - last_blank); - result = g_slist_prepend (result, new_address); - } + /* we got the address; copy it and remove trailing whitespace */ + addr = g_strndup (my_addrs, end - my_addrs); + g_strchomp (addr); - result = g_slist_reverse (result); - return result; + head = g_slist_append (NULL, addr); + head->next = modest_text_utils_split_addresses_list (end); /* recurse */ + return head; } + void modest_text_utils_address_range_at_position (const gchar *recipients_list, guint position, @@ -862,8 +952,11 @@ compile_patterns () patterns[i].preg = g_slice_new0 (regex_t); /* this should not happen */ - g_return_val_if_fail (regcomp (patterns[i].preg, patterns[i].regex, - REG_ICASE|REG_EXTENDED|REG_NEWLINE) == 0, FALSE); + if (regcomp (patterns[i].preg, patterns[i].regex, + REG_ICASE|REG_EXTENDED|REG_NEWLINE) != 0) { + g_warning ("%s: error in regexp:\n%s\n", __FUNCTION__, patterns[i].regex); + return FALSE; + } } return TRUE; } @@ -942,8 +1035,7 @@ get_url_matches (GString *txt) match->len = rm.rm_eo - rm.rm_so; match->prefix = patterns[i].prefix; match_list = g_slist_prepend (match_list, match); - } - + } offset += rm.rm_eo; } } @@ -961,6 +1053,29 @@ get_url_matches (GString *txt) +/* replace all occurences of needle in haystack with repl*/ +static gchar* +replace_string (const gchar *haystack, const gchar *needle, gchar repl) +{ + gchar *str, *cursor; + + if (!haystack || !needle || strlen(needle) == 0) + return haystack ? g_strdup(haystack) : NULL; + + str = g_strdup (haystack); + + for (cursor = str; cursor && *cursor; ++cursor) { + if (g_str_has_prefix (cursor, needle)) { + cursor[0] = repl; + memmove (cursor + 1, + cursor + strlen (needle), + strlen (cursor + strlen (needle)) + 1); + } + } + + return str; +} + static void hyperlinkify_plain_text (GString *txt) { @@ -974,10 +1089,17 @@ hyperlinkify_plain_text (GString *txt) gchar *url = g_strndup (txt->str + match->offset, match->len); gchar *repl = NULL; /* replacement */ + /* the string still contains $(MARK_AMP_URI_STR)"amp;" for each + * '&' in the original, because of the text->html conversion. + * in the href-URL (and only there), we must convert that back to + * '&' + */ + gchar *href_url = replace_string (url, MARK_AMP_URI_STR "amp;", '&'); + /* the prefix is NULL: use the one that is already there */ repl = g_strdup_printf ("%s", match->prefix ? match->prefix : EMPTY_STRING, - url, url); + href_url, url); /* replace the old thing with our hyperlink * replacement thing */ @@ -986,6 +1108,7 @@ hyperlinkify_plain_text (GString *txt) g_free (url); g_free (repl); + g_free (href_url); g_slice_free (url_match_t, match); } @@ -999,6 +1122,8 @@ void modest_text_utils_get_display_address (gchar *address) { int i; + + g_return_if_fail (address); if (!address) return; @@ -1032,6 +1157,8 @@ gchar * modest_text_utils_get_email_address (const gchar *full_address) { const gchar *left, *right; + + g_return_val_if_fail (full_address, NULL); if (!full_address) return NULL; @@ -1052,53 +1179,86 @@ modest_text_utils_get_email_address (const gchar *full_address) gint modest_text_utils_get_subject_prefix_len (const gchar *sub) { - gint i; - static const gchar* prefix[] = { - "Re:", "RE:", "RV:", "re:" - "Fwd:", "FWD:", "FW:", "fwd:", "Fw:", "fw:", NULL - }; - - if (!sub || (sub[0] != 'R' && sub[0] != 'F' && sub[0] != 'r' && sub[0] != 'f')) /* optimization */ - return 0; + gint prefix_len = 0; + + g_return_val_if_fail (sub, 0); - i = 0; + if (!sub) + return 0; - while (prefix[i]) { - if (g_str_has_prefix(sub, prefix[i])) { - int prefix_len = strlen(prefix[i]); - if (sub[prefix_len] == ' ') - ++prefix_len; /* ignore space after prefix as well */ - return prefix_len; - } - ++i; + /* optimization: "Re", "RE", "re","Fwd", "FWD", "fwd","FW","Fw", "fw" */ + if (sub[0] != 'R' && sub[0] != 'F' && sub[0] != 'r' && sub[0] != 'f') + return 0; + else if (sub[0] && sub[1] != 'e' && sub[1] != 'E' && sub[1] != 'w' && sub[1] != 'W') + return 0; + + prefix_len = 2; + if (sub[2] == 'd') + ++prefix_len; + + /* skip over a [...] block */ + if (sub[prefix_len] == '[') { + int c = prefix_len + 1; + while (sub[c] && sub[c] != ']') + ++c; + if (sub[c]) + return 0; /* no end to the ']' found */ + else + prefix_len = c + 1; } - return 0; + + /* did we find the ':' ? */ + if (sub[prefix_len] == ':') { + ++prefix_len; + if (sub[prefix_len] == ' ') + ++prefix_len; + prefix_len += modest_text_utils_get_subject_prefix_len (sub + prefix_len); +/* g_warning ("['%s','%s']", sub, (char*) sub + prefix_len); */ + return prefix_len; + } else + return 0; } gint modest_text_utils_utf8_strcmp (const gchar* s1, const gchar *s2, gboolean insensitive) { - gint result = 0; - gchar *n1, *n2; - /* work even when s1 and/or s2 == NULL */ +/* work even when s1 and/or s2 == NULL */ if (G_UNLIKELY(s1 == s2)) return 0; - - /* if it's not case sensitive */ - if (!insensitive) - return strcmp (s1 ? s1 : "", s2 ? s2 : ""); - - n1 = g_utf8_collate_key (s1 ? s1 : "", -1); - n2 = g_utf8_collate_key (s2 ? s2 : "", -1); + if (G_UNLIKELY(!s1)) + return -1; + if (G_UNLIKELY(!s2)) + return 1; - result = strcmp (n1, n2); + /* if it's not case sensitive */ + if (!insensitive) { - g_free (n1); - g_free (n2); + /* optimization: shortcut if first char is ascii */ + if (((s1[0] & 0xf0)== 0) && ((s2[0] & 0xf0) == 0)) + return s1[0] - s2[0]; + + return g_utf8_collate (s1, s2); + + } else { + gint result; + gchar *n1, *n2; + + /* optimization: short cut iif first char is ascii */ + if (((s1[0] & 0xf0) == 0) && ((s2[0] & 0xf0) == 0)) + return tolower(s1[0]) - tolower(s2[0]); + + n1 = g_utf8_strdown (s1, -1); + n2 = g_utf8_strdown (s2, -1); + + result = g_utf8_collate (n1, n2); + + g_free (n1); + g_free (n2); - return result; + return result; + } } @@ -1166,6 +1326,7 @@ modest_text_utils_validate_folder_name (const gchar *folder_name) return FALSE; } } + return TRUE; /* it's valid! */ } @@ -1176,13 +1337,15 @@ modest_text_utils_validate_domain_name (const gchar *domain) { gboolean valid = FALSE; regex_t rx; - const gchar* domain_regex = "^[a-z0-9]([.]?[a-z0-9-])*[a-z0-9]$"; + const gchar* domain_regex = "^([a-z0-9-]*[a-z0-9]\\.)+[a-z0-9-]*[a-z0-9]$"; - memset (&rx, 0, sizeof(regex_t)); /* coverity wants this... */ + g_return_val_if_fail (domain, FALSE); if (!domain) return FALSE; + memset (&rx, 0, sizeof(regex_t)); /* coverity wants this... */ + /* domain name: all alphanum or '-' or '.', * but beginning/ending in alphanum */ if (regcomp (&rx, domain_regex, REG_ICASE|REG_EXTENDED|REG_NOSUB)) { @@ -1199,21 +1362,22 @@ modest_text_utils_validate_domain_name (const gchar *domain) gboolean -modest_text_utils_validate_email_address (const gchar *email_address, const gchar **invalid_char_position) +modest_text_utils_validate_email_address (const gchar *email_address, + const gchar **invalid_char_position) { int count = 0; const gchar *c = NULL, *domain = NULL; static gchar *rfc822_specials = "()<>@,;:\\\"[]&"; - - if (invalid_char_position != NULL) + + if (invalid_char_position) *invalid_char_position = NULL; + g_return_val_if_fail (email_address, FALSE); + /* check that the email adress contains exactly one @ */ if (!strstr(email_address, "@") || - (strstr(email_address, "@") != g_strrstr(email_address, "@"))) - { + (strstr(email_address, "@") != g_strrstr(email_address, "@"))) return FALSE; - } /* first we validate the name portion (name@domain) */ for (c = email_address; *c; c++) { @@ -1278,8 +1442,14 @@ modest_text_utils_validate_recipient (const gchar *recipient, const gchar **inva gchar *right_part; gboolean has_error = FALSE; + if (invalid_char_position) + *invalid_char_position = NULL; + + g_return_val_if_fail (recipient, FALSE); + if (modest_text_utils_validate_email_address (recipient, invalid_char_position)) return TRUE; + stripped = g_strdup (recipient); stripped = g_strstrip (stripped); current = stripped; @@ -1385,7 +1555,8 @@ get_email_from_address (const gchar * address) gchar * modest_text_utils_get_color_string (GdkColor *color) { - + g_return_val_if_fail (color, NULL); + return g_strdup_printf ("#%x%x%x%x%x%x%x%x%x%x%x%x", (color->red >> 12) & 0xf, (color->red >> 8) & 0xf, (color->red >> 4) & 0xf, (color->red) & 0xf, @@ -1402,8 +1573,8 @@ modest_text_utils_text_buffer_get_text (GtkTextBuffer *buffer) gchar *slice, *current; GString *result = g_string_new (""); - g_return_val_if_fail (GTK_IS_TEXT_BUFFER (buffer), NULL); - + g_return_val_if_fail (buffer && GTK_IS_TEXT_BUFFER (buffer), NULL); + gtk_text_buffer_get_start_iter (buffer, &start); gtk_text_buffer_get_end_iter (buffer, &end); @@ -1462,3 +1633,57 @@ modest_text_utils_is_forbidden_char (const gchar character, return FALSE; /* it's valid! */ } + +gchar * +modest_text_utils_label_get_selection (GtkLabel *label) +{ + gint start, end; + gchar *selection; + + if (gtk_label_get_selection_bounds (GTK_LABEL (label), &start, &end)) { + const gchar *start_offset; + const gchar *end_offset; + start_offset = gtk_label_get_text (GTK_LABEL (label)); + start_offset = g_utf8_offset_to_pointer (start_offset, start); + end_offset = gtk_label_get_text (GTK_LABEL (label)); + end_offset = g_utf8_offset_to_pointer (end_offset, end); + selection = g_strndup (start_offset, end_offset - start_offset); + return selection; + } else { + return g_strdup (""); + } +} + +static gboolean +_forward_search_image_char (gunichar ch, + gpointer userdata) +{ + return (ch == 0xFFFC); +} + +gboolean +modest_text_utils_buffer_selection_is_valid (GtkTextBuffer *buffer) +{ + gboolean result; + GtkTextIter start, end; + + g_return_val_if_fail (GTK_IS_TEXT_BUFFER (buffer), FALSE); + + result = gtk_text_buffer_get_has_selection (GTK_TEXT_BUFFER (buffer)); + + /* check there are no images in selection */ + if (result) { + gtk_text_buffer_get_selection_bounds (buffer, &start, &end); + if (gtk_text_iter_get_char (&start)== 0xFFFC) + result = FALSE; + else { + gtk_text_iter_backward_char (&end); + if (gtk_text_iter_forward_find_char (&start, _forward_search_image_char, + NULL, &end)) + result = FALSE; + } + + } + + return result; +}