X-Git-Url: http://git.maemo.org/git/?p=modest;a=blobdiff_plain;f=src%2Fmodest-text-utils.c;h=f2ca84088c2f6d9eb20451cb4fee7cc67dfffc15;hp=c054a5e2dfffc4d7567a12789f2b7532b98db4ee;hb=d39a0b6738e86acb01327d641b6e78eb0afa8c63;hpb=5680999d6a70bb204064fd8f00bd309b974ff036 diff --git a/src/modest-text-utils.c b/src/modest-text-utils.c index c054a5e..f2ca840 100644 --- a/src/modest-text-utils.c +++ b/src/modest-text-utils.c @@ -64,6 +64,8 @@ */ #define HYPERLINKIFY_MAX_LENGTH (1024*50) + + /* * we need these regexps to find URLs in plain text e-mails */ @@ -81,14 +83,40 @@ struct _url_match_t { const gchar* prefix; }; + +/* + * we mark the ampersand with \007 when converting text->html + * because after text->html we do hyperlink detecting, which + * could be screwed up by the ampersand. + * ie. 1<3 ==> 1\007lt;3 + */ +#define MARK_AMP '\007' +#define MARK_AMP_STR "\007" + +/* mark & separately, because they are parts of urls. + * ie. a&b => a\006amp;b, but a>b => a\007gt;b + * + * we need to handle '&' separately, because it can be part of URIs + * (as in href="http://foo.bar?a=1&b=1"), so inside those URIs + * we need to re-replace \006amp; with '&' again, while outside uri's + * it will be '&' + * + * yes, it's messy, but a consequence of doing text->html first, then hyperlinkify + */ +#define MARK_AMP_URI '\006' +#define MARK_AMP_URI_STR "\006" + + +/* note: match MARK_AMP_URI_STR as well, because after txt->html, a '&' will look like $(MARK_AMP_URI_STR)"amp;" */ #define MAIL_VIEWER_URL_MATCH_PATTERNS { \ - { "(file|rtsp|http|ftp|https)://[-A-Za-z0-9_$.+!*(),;:@%&=?/~#]+[-A-Za-z0-9_$%&=?/~#]",\ + { "(file|rtsp|http|ftp|https|mms|mmsh|rtsp|rdp|lastfm)://[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR \ + "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]", \ NULL, NULL },\ - { "www\\.[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-A-Za-z0-9_$.+!*(),;:@%&=?/~#]*[^]}\\),?!;:\"]?)?",\ + { "www\\.[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]",\ NULL, "http://" }, \ - { "ftp\\.[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-A-Za-z0-9_$.+!*(),;:@%&=?/~#]*[^]}\\),?!;:\"]?)?",\ + { "ftp\\.[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_URI_STR "]+[-a-z0-9_$%" MARK_AMP_URI_STR "=?/~#]",\ NULL, "ftp://" },\ - { "(voipto|callto|chatto|jabberto|xmpp):[-_a-z@0-9.\\+]+", \ + { "(jabberto|voipto|sipto|sip|chatto|xmpp):[-_a-z@0-9.+]+", \ NULL, NULL}, \ { "mailto:[-_a-z0-9.\\+]+@[-_a-z0-9.]+", \ NULL, NULL},\ @@ -100,7 +128,7 @@ const gchar account_title_forbidden_chars[] = { '\\', '/', ':', '*', '?', '\'', '<', '>', '|', '^' }; const gchar folder_name_forbidden_chars[] = { - '<', '>', ':', '\'', '/', '\\', '|', '?', '*', '^', '%', '$' + '<', '>', ':', '\'', '/', '\\', '|', '?', '*', '^', '%', '$', '#', '&' }; const gchar user_name_forbidden_chars[] = { '<', '>' @@ -203,6 +231,8 @@ forward_cite (const gchar *from, const gchar *to, const gchar *subject) { + g_return_val_if_fail (sent, NULL); + return g_strdup_printf ("%s\n%s %s\n%s %s\n%s %s\n%s %s\n", FORWARD_STRING, FROM_STRING, (from)?from:"", @@ -320,10 +350,48 @@ modest_text_utils_remove_address (const gchar *address_list, const gchar *addres return result; } + +gchar* +modest_text_utils_remove_duplicate_addresses (const gchar *address_list) +{ + GSList *addresses, *cursor; + GHashTable *table; + gchar *new_list; + + g_return_val_if_fail (address_list, NULL); + + table = g_hash_table_new (g_str_hash, g_str_equal); + addresses = modest_text_utils_split_addresses_list (address_list); + + new_list = g_strdup(""); + cursor = addresses; + while (cursor) { + const gchar* address = (const gchar*)cursor->data; + + /* ignore the address if already seen */ + if (g_hash_table_lookup (table, address) == 0) { + + gchar *tmp = g_strjoin (",", new_list, address, NULL); + g_free (new_list); + new_list = tmp; + + g_hash_table_insert (table, (gchar*)address, GINT_TO_POINTER(1)); + } + cursor = g_slist_next (cursor); + } + + g_hash_table_destroy (table); + g_slist_foreach (addresses, (GFunc)g_free, NULL); + g_slist_free (addresses); + + return new_list; +} + + static void -modest_text_utils_convert_buffer_to_html (GString *html, const gchar *data, gssize n) +modest_text_utils_convert_buffer_to_html_start (GString *html, const gchar *data, gssize n) { - guint i; + guint i; gboolean space_seen = FALSE; guint break_dist = 0; /* distance since last break point */ @@ -332,7 +400,7 @@ modest_text_utils_convert_buffer_to_html (GString *html, const gchar *data, gssi /* replace with special html chars where needed*/ for (i = 0; i != n; ++i) { - char kar = data[i]; + guchar kar = data[i]; if (space_seen && kar != ' ') { g_string_append_c (html, ' '); @@ -342,23 +410,32 @@ modest_text_utils_convert_buffer_to_html (GString *html, const gchar *data, gssi /* we artificially insert a breakpoint (newline) * after 256, to make sure our lines are not so long * they will DOS the regexping later + * Also, check that kar is ASCII to make sure that we + * don't break a UTF8 char in two */ - if (++break_dist == 256) { + if (++break_dist >= 256 && kar < 127) { g_string_append_c (html, '\n'); break_dist = 0; } switch (kar) { - case 0: break; /* ignore embedded \0s */ - case '<' : g_string_append (html, "<"); break; - case '>' : g_string_append (html, ">"); break; - case '&' : g_string_append (html, "&"); break; - case '"' : g_string_append (html, """); break; + case 0: + case MARK_AMP: + case MARK_AMP_URI: + /* this is a temp place holder for '&'; we can only + * set the real '&' after hyperlink translation, otherwise + * we might screw that up */ + break; /* ignore embedded \0s and MARK_AMP */ + case '<' : g_string_append (html, MARK_AMP_STR "lt;"); break; + case '>' : g_string_append (html, MARK_AMP_STR "gt;"); break; + case '&' : g_string_append (html, MARK_AMP_URI_STR "amp;"); break; /* special case */ + case '"' : g_string_append (html, MARK_AMP_STR "quot;"); break; /* don't convert ' --> wpeditor will try to re-convert it... */ //case '\'' : g_string_append (html, "'"); break; - case '\n' : g_string_append (html, "
\n"); break_dist= 0; break; - case '\t' : g_string_append (html, "    "); break_dist=0; break; /* note the space at the end*/ + case '\n' : g_string_append (html, "
\n");break_dist= 0; break; + case '\t' : g_string_append (html, MARK_AMP_STR "nbsp;" MARK_AMP_STR "nbsp;" MARK_AMP_STR "nbsp; "); + break_dist=0; break; /* note the space at the end*/ case ' ': break_dist = 0; if (space_seen) { /* second space in a row */ @@ -373,6 +450,18 @@ modest_text_utils_convert_buffer_to_html (GString *html, const gchar *data, gssi } } + +static void +modest_text_utils_convert_buffer_to_html_finish (GString *html) +{ + int i; + /* replace all our MARK_AMPs with real ones */ + for (i = 0; i != html->len; ++i) + if ((html->str)[i] == MARK_AMP || (html->str)[i] == MARK_AMP_URI) + (html->str)[i] = '&'; +} + + gchar* modest_text_utils_convert_to_html (const gchar *data) { @@ -393,13 +482,15 @@ modest_text_utils_convert_to_html (const gchar *data) "" ""); - modest_text_utils_convert_buffer_to_html (html, data, -1); + modest_text_utils_convert_buffer_to_html_start (html, data, -1); g_string_append (html, ""); if (len <= HYPERLINKIFY_MAX_LENGTH) hyperlinkify_plain_text (html); + modest_text_utils_convert_buffer_to_html_finish (html); + return g_string_free (html, FALSE); } @@ -417,11 +508,13 @@ modest_text_utils_convert_to_html_body (const gchar *data, gssize n, gboolean hy n = strlen (data); html = g_string_sized_new (1.5 * n); /* just a guess... */ - modest_text_utils_convert_buffer_to_html (html, data, n); + modest_text_utils_convert_buffer_to_html_start (html, data, n); if (hyperlinkify && (n < HYPERLINKIFY_MAX_LENGTH)) hyperlinkify_plain_text (html); + modest_text_utils_convert_buffer_to_html_finish (html); + return g_string_free (html, FALSE); } @@ -483,49 +576,42 @@ modest_text_utils_get_addresses_indexes (const gchar *addresses, GSList **start_ return; } + GSList * modest_text_utils_split_addresses_list (const gchar *addresses) { - gchar *current, *start, *last_blank; - GSList *result = NULL; + GSList *head; + const gchar *my_addrs = addresses; + const gchar *end; + gchar *addr; - start = (gchar *) addresses; - current = start; - last_blank = start; + g_return_val_if_fail (addresses, NULL); + + /* skip any space, ',', ';' at the start */ + while (my_addrs && (my_addrs[0] == ' ' || my_addrs[0] == ',' || my_addrs[0] == ';')) + ++my_addrs; - while (*current != '\0') { - if ((start == current)&&((*current == ' ')||(*current == ',')||(*current == ';'))) { - start = g_utf8_next_char (start); - last_blank = current; - } else if ((*current == ',')||(*current == ';')) { - gchar *new_address = NULL; - new_address = g_strndup (start, current - last_blank); - result = g_slist_prepend (result, new_address); - start = g_utf8_next_char (current); - last_blank = start; - } else if (*current == '\"') { - if (current == start) { - current = g_utf8_next_char (current); - start = g_utf8_next_char (start); - } - while ((*current != '\"')&&(*current != '\0')) - current = g_utf8_next_char (current); - } - - current = g_utf8_next_char (current); - } + /* are we at the end of addresses list? */ + if (!my_addrs[0]) + return NULL; + + /* nope, we are at the start of some address + * now, let's find the end of the address */ + end = my_addrs + 1; + while (end[0] && end[0] != ',' && end[0] != ';') + ++end; - if (start != current) { - gchar *new_address = NULL; - new_address = g_strndup (start, current - last_blank); - result = g_slist_prepend (result, new_address); - } + /* we got the address; copy it and remove trailing whitespace */ + addr = g_strndup (my_addrs, end - my_addrs); + g_strchomp (addr); - result = g_slist_reverse (result); - return result; + head = g_slist_append (NULL, addr); + head->next = modest_text_utils_split_addresses_list (end); /* recurse */ + return head; } + void modest_text_utils_address_range_at_position (const gchar *recipients_list, guint position, @@ -866,8 +952,11 @@ compile_patterns () patterns[i].preg = g_slice_new0 (regex_t); /* this should not happen */ - g_return_val_if_fail (regcomp (patterns[i].preg, patterns[i].regex, - REG_ICASE|REG_EXTENDED|REG_NEWLINE) == 0, FALSE); + if (regcomp (patterns[i].preg, patterns[i].regex, + REG_ICASE|REG_EXTENDED|REG_NEWLINE) != 0) { + g_warning ("%s: error in regexp:\n%s\n", __FUNCTION__, patterns[i].regex); + return FALSE; + } } return TRUE; } @@ -946,8 +1035,7 @@ get_url_matches (GString *txt) match->len = rm.rm_eo - rm.rm_so; match->prefix = patterns[i].prefix; match_list = g_slist_prepend (match_list, match); - } - + } offset += rm.rm_eo; } } @@ -965,6 +1053,29 @@ get_url_matches (GString *txt) +/* replace all occurences of needle in haystack with repl*/ +static gchar* +replace_string (const gchar *haystack, const gchar *needle, gchar repl) +{ + gchar *str, *cursor; + + if (!haystack || !needle || strlen(needle) == 0) + return haystack ? g_strdup(haystack) : NULL; + + str = g_strdup (haystack); + + for (cursor = str; cursor && *cursor; ++cursor) { + if (g_str_has_prefix (cursor, needle)) { + cursor[0] = repl; + memmove (cursor + 1, + cursor + strlen (needle), + strlen (cursor + strlen (needle)) + 1); + } + } + + return str; +} + static void hyperlinkify_plain_text (GString *txt) { @@ -978,10 +1089,17 @@ hyperlinkify_plain_text (GString *txt) gchar *url = g_strndup (txt->str + match->offset, match->len); gchar *repl = NULL; /* replacement */ + /* the string still contains $(MARK_AMP_URI_STR)"amp;" for each + * '&' in the original, because of the text->html conversion. + * in the href-URL (and only there), we must convert that back to + * '&' + */ + gchar *href_url = replace_string (url, MARK_AMP_URI_STR "amp;", '&'); + /* the prefix is NULL: use the one that is already there */ repl = g_strdup_printf ("%s", match->prefix ? match->prefix : EMPTY_STRING, - url, url); + href_url, url); /* replace the old thing with our hyperlink * replacement thing */ @@ -990,6 +1108,7 @@ hyperlinkify_plain_text (GString *txt) g_free (url); g_free (repl); + g_free (href_url); g_slice_free (url_match_t, match); } @@ -1116,7 +1235,7 @@ modest_text_utils_utf8_strcmp (const gchar* s1, const gchar *s2, gboolean insens /* if it's not case sensitive */ if (!insensitive) { - /* optimization: short cut if first char is ascii */ + /* optimization: shortcut if first char is ascii */ if (((s1[0] & 0xf0)== 0) && ((s2[0] & 0xf0) == 0)) return s1[0] - s2[0]; @@ -1218,7 +1337,7 @@ modest_text_utils_validate_domain_name (const gchar *domain) { gboolean valid = FALSE; regex_t rx; - const gchar* domain_regex = "^[a-z0-9]([.]?[a-z0-9-])*[a-z0-9]$"; + const gchar* domain_regex = "^([a-z0-9-]*[a-z0-9]\\.)+[a-z0-9-]*[a-z0-9]$"; g_return_val_if_fail (domain, FALSE); @@ -1514,3 +1633,57 @@ modest_text_utils_is_forbidden_char (const gchar character, return FALSE; /* it's valid! */ } + +gchar * +modest_text_utils_label_get_selection (GtkLabel *label) +{ + gint start, end; + gchar *selection; + + if (gtk_label_get_selection_bounds (GTK_LABEL (label), &start, &end)) { + const gchar *start_offset; + const gchar *end_offset; + start_offset = gtk_label_get_text (GTK_LABEL (label)); + start_offset = g_utf8_offset_to_pointer (start_offset, start); + end_offset = gtk_label_get_text (GTK_LABEL (label)); + end_offset = g_utf8_offset_to_pointer (end_offset, end); + selection = g_strndup (start_offset, end_offset - start_offset); + return selection; + } else { + return g_strdup (""); + } +} + +static gboolean +_forward_search_image_char (gunichar ch, + gpointer userdata) +{ + return (ch == 0xFFFC); +} + +gboolean +modest_text_utils_buffer_selection_is_valid (GtkTextBuffer *buffer) +{ + gboolean result; + GtkTextIter start, end; + + g_return_val_if_fail (GTK_IS_TEXT_BUFFER (buffer), FALSE); + + result = gtk_text_buffer_get_has_selection (GTK_TEXT_BUFFER (buffer)); + + /* check there are no images in selection */ + if (result) { + gtk_text_buffer_get_selection_bounds (buffer, &start, &end); + if (gtk_text_iter_get_char (&start)== 0xFFFC) + result = FALSE; + else { + gtk_text_iter_backward_char (&end); + if (gtk_text_iter_forward_find_char (&start, _forward_search_image_char, + NULL, &end)) + result = FALSE; + } + + } + + return result; +}