X-Git-Url: http://git.maemo.org/git/?p=modest;a=blobdiff_plain;f=src%2Fmodest-text-utils.c;h=3a89582bdb87680172e9a296043eac068afca844;hp=27a829c6ad3ee7e3b280d432ecf516c3d3510491;hb=a3524da28501507a8c5dc8319bd62d61448edf46;hpb=cd52c8748afe0d7ef9987994d2f2386c2f53fbee diff --git a/src/modest-text-utils.c b/src/modest-text-utils.c index 27a829c..3a89582 100644 --- a/src/modest-text-utils.c +++ b/src/modest-text-utils.c @@ -28,31 +28,656 @@ */ -/* modest-ui.c */ -#include -#include -#include +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif /*_GNU_SOURCE*/ +#include /* for strcasestr */ -#include "modest-text-utils.h" +#include +#include +#include +#include +#include +#include +#include +#include #ifdef HAVE_CONFIG_H #include #endif /*HAVE_CONFIG_H */ +/* defines */ +#define FORWARD_STRING _("mcen_ia_editor_original_message") +#define FROM_STRING _("mail_va_from") +#define SENT_STRING _("mcen_fi_message_properties_sent") +#define TO_STRING _("mail_va_to") +#define SUBJECT_STRING _("mail_va_subject") +#define EMPTY_STRING "" + +/* + * do the hyperlinkification only for texts < 50 Kb, + * as it's quite slow. Without this, e.g. mail with + * an uuencoded part (which is not recognized as attachment, + * will hang modest + */ +#define HYPERLINKIFY_MAX_LENGTH (1024*50) + + +/* + * we mark the ampersand with \007 when converting text->html + * because after text->html we do hyperlink detecting, which + * could be screwed up by the ampersand + */ +#define MARK_AMP '\007' +#define MARK_AMP_STR "\007" + +/* + * we need these regexps to find URLs in plain text e-mails + */ +typedef struct _url_match_pattern_t url_match_pattern_t; +struct _url_match_pattern_t { + gchar *regex; + regex_t *preg; + gchar *prefix; +}; + +typedef struct _url_match_t url_match_t; +struct _url_match_t { + guint offset; + guint len; + const gchar* prefix; +}; + +/* note: match MARK_AMP_STR as well, because after txt->html, a '&' will look like $(MARK_AMP_STR)"amp;" */ +#define MAIL_VIEWER_URL_MATCH_PATTERNS { \ + { "^(file|rtsp|http|ftp|https)://[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_STR "]+[-a-z0-9_$%" MARK_AMP_STR "=?/~#]",\ + NULL, NULL },\ + { "^www\\.[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_STR "]+[-a-z0-9_$%" MARK_AMP_STR "=?/~#]",\ + NULL, "http://" }, \ + { "^ftp\\.[-a-z0-9_$.+!*(),;:@%=?/~#" MARK_AMP_STR "]+[-a-z0-9_$%" MARK_AMP_STR "=?/~#]",\ + NULL, "ftp://" },\ + { "^(voipto|callto|chatto|jabberto|xmpp):[-_a-z@0-9.+]+", \ + NULL, NULL}, \ + { "^mailto:[-_a-z0-9.\\+]+@[-_a-z0-9.]+", \ + NULL, NULL},\ + { "^[-_a-z0-9.\\+]+@[-_a-z0-9.]+",\ + NULL, "mailto:"}\ + } + +const gchar account_title_forbidden_chars[] = { + '\\', '/', ':', '*', '?', '\'', '<', '>', '|', '^' +}; +const gchar folder_name_forbidden_chars[] = { + '<', '>', ':', '\'', '/', '\\', '|', '?', '*', '^', '%', '$', '#', '&' +}; +const gchar user_name_forbidden_chars[] = { + '<', '>' +}; +const guint ACCOUNT_TITLE_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (account_title_forbidden_chars); +const guint FOLDER_NAME_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (folder_name_forbidden_chars); +const guint USER_NAME_FORBIDDEN_CHARS_LENGTH = G_N_ELEMENTS (user_name_forbidden_chars); + /* private */ -static GString *get_next_line (const char *b, const gsize blen, const gchar * iter); -static int get_indent_level (const char *l); -static void unquote_line (GString * l); -static void append_quoted (GString * buf, const int indent, - const GString * str, const int cutpoint); -static int get_breakpoint_utf8 (const gchar * s, const gint indent, - const gint limit); -static int get_breakpoint_ascii (const gchar * s, const gint indent, - const gint limit); -static int get_breakpoint (const gchar * s, const gint indent, - const gint limit); +static gchar* cite (const time_t sent_date, const gchar *from); +static void hyperlinkify_plain_text (GString *txt); +static gint cmp_offsets_reverse (const url_match_t *match1, const url_match_t *match2); +static GSList* get_url_matches (GString *txt); + +static GString* get_next_line (const char *b, const gsize blen, const gchar * iter); +static int get_indent_level (const char *l); +static void unquote_line (GString * l); +static void append_quoted (GString * buf, const int indent, const GString * str, + const int cutpoint); +static int get_breakpoint_utf8 (const gchar * s, const gint indent, const gint limit); +static int get_breakpoint_ascii (const gchar * s, const gint indent, const gint limit); +static int get_breakpoint (const gchar * s, const gint indent, const gint limit); + +static gchar* modest_text_utils_quote_plain_text (const gchar *text, + const gchar *cite, + const gchar *signature, + GList *attachments, + int limit); + +static gchar* modest_text_utils_quote_html (const gchar *text, + const gchar *cite, + const gchar *signature, + GList *attachments, + int limit); +static gchar* get_email_from_address (const gchar *address); + + +/* ******************************************************************* */ +/* ************************* PUBLIC FUNCTIONS ************************ */ +/* ******************************************************************* */ + +gchar * +modest_text_utils_quote (const gchar *text, + const gchar *content_type, + const gchar *signature, + const gchar *from, + const time_t sent_date, + GList *attachments, + int limit) +{ + gchar *retval, *cited; + + g_return_val_if_fail (text, NULL); + g_return_val_if_fail (content_type, NULL); + + cited = cite (sent_date, from); + + if (content_type && strcmp (content_type, "text/html") == 0) + /* TODO: extract the of the HTML and pass it to + the function */ + retval = modest_text_utils_quote_html (text, cited, signature, attachments, limit); + else + retval = modest_text_utils_quote_plain_text (text, cited, signature, attachments, limit); + + g_free (cited); + + return retval; +} + + +gchar * +modest_text_utils_cite (const gchar *text, + const gchar *content_type, + const gchar *signature, + const gchar *from, + time_t sent_date) +{ + gchar *retval; + gchar *tmp_sig; + + g_return_val_if_fail (text, NULL); + g_return_val_if_fail (content_type, NULL); + + if (!signature) + retval = g_strdup (""); + else if (strcmp(content_type, "text/html") == 0) { + tmp_sig = g_strconcat ("\n", signature, NULL); + retval = modest_text_utils_convert_to_html_body(tmp_sig, -1, TRUE); + g_free (tmp_sig); + } else { + retval = g_strconcat (text, "\n", signature, NULL); + } + + return retval; +} + +static gchar * +forward_cite (const gchar *from, + const gchar *sent, + const gchar *to, + const gchar *subject) +{ + return g_strdup_printf ("%s\n%s %s\n%s %s\n%s %s\n%s %s\n", + FORWARD_STRING, + FROM_STRING, (from)?from:"", + SENT_STRING, sent, + TO_STRING, (to)?to:"", + SUBJECT_STRING, (subject)?subject:""); +} + +gchar * +modest_text_utils_inline (const gchar *text, + const gchar *content_type, + const gchar *signature, + const gchar *from, + time_t sent_date, + const gchar *to, + const gchar *subject) +{ + gchar sent_str[101]; + gchar *cited; + gchar *retval; + + g_return_val_if_fail (text, NULL); + g_return_val_if_fail (content_type, NULL); + + modest_text_utils_strftime (sent_str, 100, "%c", sent_date); + + cited = forward_cite (from, sent_str, to, subject); + + if (content_type && strcmp (content_type, "text/html") == 0) + retval = modest_text_utils_quote_html (text, cited, signature, NULL, 80); + else + retval = modest_text_utils_quote_plain_text (text, cited, signature, NULL, 80); + + g_free (cited); + return retval; +} + +/* just to prevent warnings: + * warning: `%x' yields only last 2 digits of year in some locales + */ +gsize +modest_text_utils_strftime(char *s, gsize max, const char *fmt, time_t timet) +{ + struct tm tm; + + /* does not work on old maemo glib: + * g_date_set_time_t (&date, timet); + */ + localtime_r (&timet, &tm); + return strftime(s, max, fmt, &tm); +} + +gchar * +modest_text_utils_derived_subject (const gchar *subject, const gchar *prefix) +{ + gchar *tmp; + + g_return_val_if_fail (prefix, NULL); + + if (!subject || subject[0] == '\0') + subject = _("mail_va_no_subject"); + + tmp = g_strchug (g_strdup (subject)); + + if (!strncmp (tmp, prefix, strlen (prefix))) { + return tmp; + } else { + g_free (tmp); + return g_strdup_printf ("%s %s", prefix, subject); + } +} + +gchar* +modest_text_utils_remove_address (const gchar *address_list, const gchar *address) +{ + gchar *dup, *token, *ptr = NULL, *result; + GString *filtered_emails; + gchar *email_address; + + g_return_val_if_fail (address_list, NULL); + + if (!address) + return g_strdup (address_list); + + email_address = get_email_from_address (address); + + /* search for substring */ + if (!strstr ((const char *) address_list, (const char *) email_address)) { + g_free (email_address); + return g_strdup (address_list); + } + + dup = g_strdup (address_list); + filtered_emails = g_string_new (NULL); + + token = strtok_r (dup, ",", &ptr); + + while (token != NULL) { + /* Add to list if not found */ + if (!strstr ((const char *) token, (const char *) email_address)) { + if (filtered_emails->len == 0) + g_string_append_printf (filtered_emails, "%s", g_strstrip (token)); + else + g_string_append_printf (filtered_emails, ",%s", g_strstrip (token)); + } + token = strtok_r (NULL, ",", &ptr); + } + result = filtered_emails->str; + + /* Clean */ + g_free (email_address); + g_free (dup); + g_string_free (filtered_emails, FALSE); + + return result; +} + + +gchar* +modest_text_utils_remove_duplicate_addresses (const gchar *address_list) +{ + GSList *addresses, *cursor; + GHashTable *table; + gchar *new_list; + + g_return_val_if_fail (address_list, NULL); + + table = g_hash_table_new (g_str_hash, g_str_equal); + addresses = modest_text_utils_split_addresses_list (address_list); + + new_list = g_strdup(""); + cursor = addresses; + while (cursor) { + const gchar* address = (const gchar*)cursor->data; + + /* ignore the address if already seen */ + if (g_hash_table_lookup (table, address) == 0) { + + gchar *tmp = g_strjoin (",", new_list, address, NULL); + g_free (new_list); + new_list = tmp; + + g_hash_table_insert (table, (gchar*)address, GINT_TO_POINTER(1)); + } + cursor = g_slist_next (cursor); + } + + g_hash_table_destroy (table); + g_slist_foreach (addresses, (GFunc)g_free, NULL); + g_slist_free (addresses); + + return new_list; +} + + +static void +modest_text_utils_convert_buffer_to_html_start (GString *html, const gchar *data, gssize n) +{ + guint i; + gboolean space_seen = FALSE; + guint break_dist = 0; /* distance since last break point */ + + if (n == -1) + n = strlen (data); + + /* replace with special html chars where needed*/ + for (i = 0; i != n; ++i) { + char kar = data[i]; + + if (space_seen && kar != ' ') { + g_string_append_c (html, ' '); + space_seen = FALSE; + } + + /* we artificially insert a breakpoint (newline) + * after 256, to make sure our lines are not so long + * they will DOS the regexping later + */ + if (++break_dist == 256) { + g_string_append_c (html, '\n'); + break_dist = 0; + } + + switch (kar) { + case 0: + case MARK_AMP: /* this is a temp place holder for '&'; we can only + * set the real '&' after hyperlink translation, otherwise + * we might screw that up */ + break; /* ignore embedded \0s and MARK_AMP */ + case '<' : g_string_append (html, MARK_AMP_STR "lt;"); break; + case '>' : g_string_append (html, MARK_AMP_STR "gt;"); break; + case '&' : g_string_append (html, MARK_AMP_STR "amp;"); break; + case '"' : g_string_append (html, MARK_AMP_STR "quot;"); break; + + /* don't convert ' --> wpeditor will try to re-convert it... */ + //case '\'' : g_string_append (html, "'"); break; + case '\n' : g_string_append (html, "
\n");break_dist= 0; break; + case '\t' : g_string_append (html, MARK_AMP_STR "nbsp;" MARK_AMP_STR "nbsp;" MARK_AMP_STR "nbsp; "); + break_dist=0; break; /* note the space at the end*/ + case ' ': + break_dist = 0; + if (space_seen) { /* second space in a row */ + g_string_append (html, "  "); + space_seen = FALSE; + } else + space_seen = TRUE; + break; + default: + g_string_append_c (html, kar); + } + } +} + + +static void +modest_text_utils_convert_buffer_to_html_finish (GString *html) +{ + int i; + /* replace all our MARK_AMPs with real ones */ + for (i = 0; i != html->len; ++i) + if ((html->str)[i] == MARK_AMP) + (html->str)[i] = '&'; +} + + +gchar* +modest_text_utils_convert_to_html (const gchar *data) +{ + GString *html; + gsize len; + + g_return_val_if_fail (data, NULL); + + if (!data) + return NULL; + + len = strlen (data); + html = g_string_sized_new (1.5 * len); /* just a guess... */ + + g_string_append_printf (html, + "" + "" + "" + ""); + + modest_text_utils_convert_buffer_to_html_start (html, data, -1); + + g_string_append (html, ""); + + if (len <= HYPERLINKIFY_MAX_LENGTH) + hyperlinkify_plain_text (html); + + modest_text_utils_convert_buffer_to_html_finish (html); + + return g_string_free (html, FALSE); +} + +gchar * +modest_text_utils_convert_to_html_body (const gchar *data, gssize n, gboolean hyperlinkify) +{ + GString *html; + + g_return_val_if_fail (data, NULL); + + if (!data) + return NULL; + + if (n == -1) + n = strlen (data); + html = g_string_sized_new (1.5 * n); /* just a guess... */ + + modest_text_utils_convert_buffer_to_html_start (html, data, n); + + if (hyperlinkify && (n < HYPERLINKIFY_MAX_LENGTH)) + hyperlinkify_plain_text (html); + + modest_text_utils_convert_buffer_to_html_finish (html); + + return g_string_free (html, FALSE); +} + +void +modest_text_utils_get_addresses_indexes (const gchar *addresses, GSList **start_indexes, GSList **end_indexes) +{ + gchar *current, *start, *last_blank; + gint start_offset = 0, current_offset = 0; + + g_return_if_fail (start_indexes != NULL); + g_return_if_fail (end_indexes != NULL); + + start = (gchar *) addresses; + current = start; + last_blank = start; + + while (*current != '\0') { + if ((start == current)&&((*current == ' ')||(*current == ',')||(*current == ';'))) { + start = g_utf8_next_char (start); + start_offset++; + last_blank = current; + } else if ((*current == ',')||(*current == ';')) { + gint *start_index, *end_index; + start_index = g_new0(gint, 1); + end_index = g_new0(gint, 1); + *start_index = start_offset; + *end_index = current_offset; + *start_indexes = g_slist_prepend (*start_indexes, start_index); + *end_indexes = g_slist_prepend (*end_indexes, end_index); + start = g_utf8_next_char (current); + start_offset = current_offset + 1; + last_blank = start; + } else if (*current == '"') { + current = g_utf8_next_char (current); + current_offset ++; + while ((*current != '"')&&(*current != '\0')) { + current = g_utf8_next_char (current); + current_offset ++; + } + } + + current = g_utf8_next_char (current); + current_offset ++; + } + + if (start != current) { + gint *start_index, *end_index; + start_index = g_new0(gint, 1); + end_index = g_new0(gint, 1); + *start_index = start_offset; + *end_index = current_offset; + *start_indexes = g_slist_prepend (*start_indexes, start_index); + *end_indexes = g_slist_prepend (*end_indexes, end_index); + } + + *start_indexes = g_slist_reverse (*start_indexes); + *end_indexes = g_slist_reverse (*end_indexes); + + return; +} + +#if 0 +GSList * +modest_text_utils_split_addresses_list (const gchar *addresses) +{ + gchar *current, *start, *last_blank; + GSList *result = NULL; + + start = (gchar *) addresses; + current = start; + last_blank = start; + + while (*current != '\0') { + if ((start == current)&&((*current == ' ')||(*current == ',')||(*current == ';'))) { + start = g_utf8_next_char (start); + last_blank = current; + } else if ((*current == ',')||(*current == ';')) { + gchar *new_address = NULL; + new_address = g_strndup (start, current - last_blank); + result = g_slist_prepend (result, new_address); + start = g_utf8_next_char (current); + last_blank = start; + } else if (*current == '\"') { + if (current == start) { + current = g_utf8_next_char (current); + start = g_utf8_next_char (start); + } + while ((*current != '\"')&&(*current != '\0')) + current = g_utf8_next_char (current); + } + + current = g_utf8_next_char (current); + } + + if (start != current) { + gchar *new_address = NULL; + new_address = g_strndup (start, current - last_blank); + result = g_slist_prepend (result, new_address); + } + + result = g_slist_reverse (result); + return result; + +} +#endif + + + + +GSList * +modest_text_utils_split_addresses_list (const gchar *addresses) +{ + GSList *head; + const gchar *my_addrs = addresses; + const gchar *end; + gchar *addr; + + /* skip any space, ',', ';' at the start */ + while (my_addrs && (my_addrs[0] == ' ' || my_addrs[0] == ',' || my_addrs[0] == ';')) + ++my_addrs; + + /* are we at the end of addresses list? */ + if (!my_addrs[0]) + return NULL; + + /* nope, we are at the start of some address + * now, let's find the end of the address */ + end = my_addrs + 1; + while (end[0] && end[0] != ',' && end[0] != ';') + ++end; + + /* we got the address; copy it and remove trailing whitespace */ + addr = g_strndup (my_addrs, end - my_addrs); + g_strchomp (addr); + + head = g_slist_append (NULL, addr); + head->next = modest_text_utils_split_addresses_list (end); /* recurse */ + + return head; +} + + +void +modest_text_utils_address_range_at_position (const gchar *recipients_list, + guint position, + guint *start, + guint *end) +{ + gchar *current = NULL; + gint range_start = 0; + gint range_end = 0; + gint index; + gboolean is_quoted = FALSE; + + g_return_if_fail (recipients_list); + g_return_if_fail (position < g_utf8_strlen(recipients_list, -1)); + + index = 0; + for (current = (gchar *) recipients_list; *current != '\0'; + current = g_utf8_find_next_char (current, NULL)) { + gunichar c = g_utf8_get_char (current); + + if ((c == ',') && (!is_quoted)) { + if (index < position) { + range_start = index + 1; + } else { + break; + } + } else if (c == '\"') { + is_quoted = !is_quoted; + } else if ((c == ' ') &&(range_start == index)) { + range_start ++; + } + index ++; + range_end = index; + } + + if (start) + *start = range_start; + if (end) + *end = range_end; +} + + +/* ******************************************************************* */ +/* ************************* UTILIY FUNCTIONS ************************ */ +/* ******************************************************************* */ static GString * get_next_line (const gchar * b, const gsize blen, const gchar * iter) @@ -197,40 +822,49 @@ get_breakpoint (const gchar * s, const gint indent, const gint limit) } } +static gchar * +cite (const time_t sent_date, const gchar *from) +{ + return g_strdup (_("mcen_ia_editor_original_message")); +} +static gchar * +quoted_attachments (GList *attachments) +{ + GList *node = NULL; + GString *result = g_string_new (""); + for (node = attachments; node != NULL; node = g_list_next (node)) { + gchar *filename = (gchar *) node->data; + g_string_append_printf ( result, "%s %s\n", _("mcen_ia_editor_attach_filename"), filename); + } -/* just to prevent warnings: - * warning: `%x' yields only last 2 digits of year in some locales - */ -static size_t -my_strftime(char *s, size_t max, const char *fmt, const - struct tm *tm) { - return strftime(s, max, fmt, tm); -} + return g_string_free (result, FALSE); +} -gchar * -modest_text_utils_quote (const gchar * to_quote, const gchar * from, - const time_t sent_date, const int limit) +static gchar * +modest_text_utils_quote_plain_text (const gchar *text, + const gchar *cite, + const gchar *signature, + GList *attachments, + int limit) { const gchar *iter; gint indent, breakpoint, rem_indent = 0; - gchar sent_str[101]; GString *q, *l, *remaining; gsize len; - - /* format sent_date */ - my_strftime (sent_str, 100, "%c", localtime (&sent_date)); - q = g_string_new (""); - g_string_printf (q, "On %s, %s wrote:\n", sent_str, from); + gchar *attachments_string = NULL; /* remaining will store the rest of the line if we have to break it */ + q = g_string_new ("\n"); + q = g_string_append (q, cite); + q = g_string_append_c (q, '\n'); remaining = g_string_new (""); - iter = to_quote; - len = strlen(to_quote); + iter = text; + len = strlen(text); do { - l = get_next_line (to_quote, len, iter); + l = get_next_line (text, len, iter); iter = iter + l->len + 1; indent = get_indent_level (l->str); unquote_line (l); @@ -242,7 +876,7 @@ modest_text_utils_quote (const gchar * to_quote, const gchar * from, } else { do { breakpoint = - get_breakpoint (remaining-> str, + get_breakpoint (remaining->str, rem_indent, limit); append_quoted (q, rem_indent, @@ -265,7 +899,758 @@ modest_text_utils_quote (const gchar * to_quote, const gchar * from, rem_indent = indent; append_quoted (q, indent, l, breakpoint); g_string_free (l, TRUE); - } while ((iter < to_quote + len) || (remaining->str[0])); + } while ((iter < text + len) || (remaining->str[0])); + + attachments_string = quoted_attachments (attachments); + q = g_string_append (q, attachments_string); + g_free (attachments_string); + + if (signature != NULL) { + q = g_string_append_c (q, '\n'); + q = g_string_append (q, signature); + } return g_string_free (q, FALSE); } + +static gchar* +modest_text_utils_quote_html (const gchar *text, + const gchar *cite, + const gchar *signature, + GList *attachments, + int limit) +{ + gchar *result = NULL; + gchar *signature_result = NULL; + const gchar *format = \ + "\n" \ + "\n" \ + "\n" \ + "
%s
" \ + "
%s
%s
%s
\n" \ + "\n" \ + "\n"; + gchar *attachments_string = NULL; + gchar *q_attachments_string = NULL; + gchar *q_cite = NULL; + gchar *html_text = NULL; + + if (signature == NULL) + signature_result = g_strdup (""); + else + signature_result = modest_text_utils_convert_to_html_body (signature, -1, TRUE); + + attachments_string = quoted_attachments (attachments); + q_attachments_string = modest_text_utils_convert_to_html_body (attachments_string, -1, TRUE); + q_cite = modest_text_utils_convert_to_html_body (cite, -1, TRUE); + html_text = modest_text_utils_convert_to_html_body (text, -1, TRUE); + result = g_strdup_printf (format, signature_result, q_cite, html_text, q_attachments_string); + g_free (q_cite); + g_free (html_text); + g_free (attachments_string); + g_free (q_attachments_string); + g_free (signature_result); + + return result; +} + +static gint +cmp_offsets_reverse (const url_match_t *match1, const url_match_t *match2) +{ + return match2->offset - match1->offset; +} + +static gboolean url_matches_block = 0; +static url_match_pattern_t patterns[] = MAIL_VIEWER_URL_MATCH_PATTERNS; + + +static gboolean +compile_patterns () +{ + guint i; + const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t); + for (i = 0; i != pattern_num; ++i) { + patterns[i].preg = g_slice_new0 (regex_t); + + /* this should not happen */ + if (regcomp (patterns[i].preg, patterns[i].regex, + REG_ICASE|REG_EXTENDED|REG_NEWLINE) != 0) { + g_warning ("%s: error in regexp:\n%s\n", __FUNCTION__, patterns[i].regex); + return FALSE; + } + } + return TRUE; +} + +static void +free_patterns () +{ + guint i; + const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t); + for (i = 0; i != pattern_num; ++i) { + regfree (patterns[i].preg); + g_slice_free (regex_t, patterns[i].preg); + } /* don't free patterns itself -- it's static */ +} + +void +modest_text_utils_hyperlinkify_begin (void) +{ + if (url_matches_block == 0) + compile_patterns (); + url_matches_block ++; +} + +void +modest_text_utils_hyperlinkify_end (void) +{ + url_matches_block--; + if (url_matches_block <= 0) + free_patterns (); +} + + +static GSList* +get_url_matches (GString *txt) +{ + regmatch_t rm; + guint rv, i, offset = 0; + GSList *match_list = NULL; + + const size_t pattern_num = sizeof(patterns)/sizeof(url_match_pattern_t); + + /* initalize the regexps */ + modest_text_utils_hyperlinkify_begin (); + + /* find all the matches */ + for (i = 0; i != pattern_num; ++i) { + offset = 0; + while (1) { + url_match_t *match; + gboolean is_submatch; + GSList *cursor; + + if ((rv = regexec (patterns[i].preg, txt->str + offset, 1, &rm, 0)) != 0) { + g_return_val_if_fail (rv == REG_NOMATCH, NULL); /* this should not happen */ + break; /* try next regexp */ + } + if (rm.rm_so == -1) + break; + + is_submatch = FALSE; + /* check old matches to see if this has already been matched */ + cursor = match_list; + while (cursor && !is_submatch) { + const url_match_t *old_match = + (const url_match_t *) cursor->data; + guint new_offset = offset + rm.rm_so; + is_submatch = (new_offset > old_match->offset && + new_offset < old_match->offset + old_match->len); + cursor = g_slist_next (cursor); + } + + if (!is_submatch) { + /* make a list of our matches ( tupels)*/ + match = g_slice_new (url_match_t); + match->offset = offset + rm.rm_so; + match->len = rm.rm_eo - rm.rm_so; + match->prefix = patterns[i].prefix; + match_list = g_slist_prepend (match_list, match); + } + offset += rm.rm_eo; + } + } + + modest_text_utils_hyperlinkify_end (); + + /* now sort the list, so the matches are in reverse order of occurence. + * that way, we can do the replacements starting from the end, so we don't need + * to recalculate the offsets + */ + match_list = g_slist_sort (match_list, + (GCompareFunc)cmp_offsets_reverse); + return match_list; +} + + + +/* replace all occurences of needle in haystack with repl*/ +static gchar* +replace_string (const gchar *haystack, const gchar *needle, gchar repl) +{ + gchar *str, *cursor; + + if (!haystack || !needle || strlen(needle) == 0) + return haystack ? g_strdup(haystack) : NULL; + + str = g_strdup (haystack); + + for (cursor = str; cursor && *cursor; ++cursor) { + if (g_str_has_prefix (cursor, needle)) { + cursor[0] = repl; + memmove (cursor + 1, + cursor + strlen (needle), + strlen (cursor + strlen (needle)) + 1); + } + } + + return str; +} + +static void +hyperlinkify_plain_text (GString *txt) +{ + GSList *cursor; + GSList *match_list = get_url_matches (txt); + + /* we will work backwards, so the offsets stay valid */ + for (cursor = match_list; cursor; cursor = cursor->next) { + + url_match_t *match = (url_match_t*) cursor->data; + gchar *url = g_strndup (txt->str + match->offset, match->len); + gchar *repl = NULL; /* replacement */ + + /* the string still contains $(MARK_AMP_STR)"amp;" for each + * '&' in the original, because of the text->html conversion. + * in the href-URL (and only there), we must convert that back to + * '&' + */ + gchar *href_url = replace_string (url, MARK_AMP_STR "amp;", '&'); + + /* the prefix is NULL: use the one that is already there */ + repl = g_strdup_printf ("%s", + match->prefix ? match->prefix : EMPTY_STRING, + href_url, url); + + /* replace the old thing with our hyperlink + * replacement thing */ + g_string_erase (txt, match->offset, match->len); + g_string_insert (txt, match->offset, repl); + + g_free (url); + g_free (repl); + g_free (href_url); + + g_slice_free (url_match_t, match); + } + + g_slist_free (match_list); +} + + +/* for optimization reasons, we change the string in-place */ +void +modest_text_utils_get_display_address (gchar *address) +{ + int i; + + g_return_if_fail (address); + + if (!address) + return; + + /* should not be needed, and otherwise, we probably won't screw up the address + * more than it already is :) + * g_return_val_if_fail (g_utf8_validate (address, -1, NULL), NULL); + * */ + + /* remove leading whitespace */ + if (address[0] == ' ') + g_strchug (address); + + for (i = 0; address[i]; ++i) { + if (address[i] == '<') { + if (G_UNLIKELY(i == 0)) + return; /* there's nothing else, leave it */ + else { + address[i] = '\0'; /* terminate the string here */ + return; + } + } + } +} + + + + + +gchar * +modest_text_utils_get_email_address (const gchar *full_address) +{ + const gchar *left, *right; + + g_return_val_if_fail (full_address, NULL); + + if (!full_address) + return NULL; + + g_return_val_if_fail (g_utf8_validate (full_address, -1, NULL), NULL); + + left = g_strrstr_len (full_address, strlen(full_address), "<"); + if (left == NULL) + return g_strdup (full_address); + + right = g_strstr_len (left, strlen(left), ">"); + if (right == NULL) + return g_strdup (full_address); + + return g_strndup (left + 1, right - left - 1); +} + +gint +modest_text_utils_get_subject_prefix_len (const gchar *sub) +{ + gint prefix_len = 0; + + g_return_val_if_fail (sub, 0); + + if (!sub) + return 0; + + /* optimization: "Re", "RE", "re","Fwd", "FWD", "fwd","FW","Fw", "fw" */ + if (sub[0] != 'R' && sub[0] != 'F' && sub[0] != 'r' && sub[0] != 'f') + return 0; + else if (sub[0] && sub[1] != 'e' && sub[1] != 'E' && sub[1] != 'w' && sub[1] != 'W') + return 0; + + prefix_len = 2; + if (sub[2] == 'd') + ++prefix_len; + + /* skip over a [...] block */ + if (sub[prefix_len] == '[') { + int c = prefix_len + 1; + while (sub[c] && sub[c] != ']') + ++c; + if (sub[c]) + return 0; /* no end to the ']' found */ + else + prefix_len = c + 1; + } + + /* did we find the ':' ? */ + if (sub[prefix_len] == ':') { + ++prefix_len; + if (sub[prefix_len] == ' ') + ++prefix_len; + prefix_len += modest_text_utils_get_subject_prefix_len (sub + prefix_len); +/* g_warning ("['%s','%s']", sub, (char*) sub + prefix_len); */ + return prefix_len; + } else + return 0; +} + + +gint +modest_text_utils_utf8_strcmp (const gchar* s1, const gchar *s2, gboolean insensitive) +{ + +/* work even when s1 and/or s2 == NULL */ + if (G_UNLIKELY(s1 == s2)) + return 0; + if (G_UNLIKELY(!s1)) + return -1; + if (G_UNLIKELY(!s2)) + return 1; + + /* if it's not case sensitive */ + if (!insensitive) { + + /* optimization: short cut if first char is ascii */ + if (((s1[0] & 0xf0)== 0) && ((s2[0] & 0xf0) == 0)) + return s1[0] - s2[0]; + + return g_utf8_collate (s1, s2); + + } else { + gint result; + gchar *n1, *n2; + + /* optimization: short cut iif first char is ascii */ + if (((s1[0] & 0xf0) == 0) && ((s2[0] & 0xf0) == 0)) + return tolower(s1[0]) - tolower(s2[0]); + + n1 = g_utf8_strdown (s1, -1); + n2 = g_utf8_strdown (s2, -1); + + result = g_utf8_collate (n1, n2); + + g_free (n1); + g_free (n2); + + return result; + } +} + + +const gchar* +modest_text_utils_get_display_date (time_t date) +{ +#define DATE_BUF_SIZE 64 + static gchar date_buf[DATE_BUF_SIZE]; + + /* calculate the # of days since epoch for + * for today and for the date provided + * based on idea from pvanhoof */ + int day = time(NULL) / (24 * 60 * 60); + int date_day = date / (24 * 60 * 60); + + /* if it's today, show the time, if it's not today, show the date instead */ + + if (day == date_day) /* is the date today? */ + modest_text_utils_strftime (date_buf, DATE_BUF_SIZE, "%X", date); + else + modest_text_utils_strftime (date_buf, DATE_BUF_SIZE, "%x", date); + + return date_buf; /* this is a static buffer, don't free! */ +} + + + +gboolean +modest_text_utils_validate_folder_name (const gchar *folder_name) +{ + /* based on http://msdn2.microsoft.com/en-us/library/aa365247.aspx, + * with some extras */ + + guint len; + gint i; + const gchar **cursor = NULL; + const gchar *forbidden_names[] = { /* windows does not like these */ + "CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", + "COM7", "COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9", + ".", "..", NULL + }; + + /* cannot be NULL */ + if (!folder_name) + return FALSE; + + /* cannot be empty */ + len = strlen(folder_name); + if (len == 0) + return FALSE; + + /* cannot start or end with a space */ + if (g_ascii_isspace(folder_name[0]) || g_ascii_isspace(folder_name[len - 1])) + return FALSE; + + /* cannot contain a forbidden char */ + for (i = 0; i < len; i++) + if (modest_text_utils_is_forbidden_char (folder_name[i], FOLDER_NAME_FORBIDDEN_CHARS)) + return FALSE; + + /* cannot contain a forbidden word */ + if (len <= 4) { + for (cursor = forbidden_names; cursor && *cursor; ++cursor) { + if (g_ascii_strcasecmp (folder_name, *cursor) == 0) + return FALSE; + } + } + + return TRUE; /* it's valid! */ +} + + + +gboolean +modest_text_utils_validate_domain_name (const gchar *domain) +{ + gboolean valid = FALSE; + regex_t rx; + const gchar* domain_regex = "^([a-z0-9-]*[a-z0-9]\\.)+[a-z0-9-]*[a-z0-9]$"; + + g_return_val_if_fail (domain, FALSE); + + if (!domain) + return FALSE; + + memset (&rx, 0, sizeof(regex_t)); /* coverity wants this... */ + + /* domain name: all alphanum or '-' or '.', + * but beginning/ending in alphanum */ + if (regcomp (&rx, domain_regex, REG_ICASE|REG_EXTENDED|REG_NOSUB)) { + g_warning ("BUG: error in regexp"); + return FALSE; + } + + valid = (regexec (&rx, domain, 1, NULL, 0) == 0); + regfree (&rx); + + return valid; +} + + + +gboolean +modest_text_utils_validate_email_address (const gchar *email_address, + const gchar **invalid_char_position) +{ + int count = 0; + const gchar *c = NULL, *domain = NULL; + static gchar *rfc822_specials = "()<>@,;:\\\"[]&"; + + if (invalid_char_position) + *invalid_char_position = NULL; + + g_return_val_if_fail (email_address, FALSE); + + /* check that the email adress contains exactly one @ */ + if (!strstr(email_address, "@") || + (strstr(email_address, "@") != g_strrstr(email_address, "@"))) + return FALSE; + + /* first we validate the name portion (name@domain) */ + for (c = email_address; *c; c++) { + if (*c == '\"' && + (c == email_address || + *(c - 1) == '.' || + *(c - 1) == '\"')) { + while (*++c) { + if (*c == '\"') + break; + if (*c == '\\' && (*++c == ' ')) + continue; + if (*c <= ' ' || *c >= 127) + return FALSE; + } + if (!*c++) + return FALSE; + if (*c == '@') + break; + if (*c != '.') + return FALSE; + continue; + } + if (*c == '@') + break; + if (*c <= ' ' || *c >= 127) + return FALSE; + if (strchr(rfc822_specials, *c)) { + if (invalid_char_position) + *invalid_char_position = c; + return FALSE; + } + } + if (c == email_address || *(c - 1) == '.') + return FALSE; + + /* next we validate the domain portion (name@domain) */ + if (!*(domain = ++c)) + return FALSE; + do { + if (*c == '.') { + if (c == domain || *(c - 1) == '.' || *(c + 1) == '\0') + return FALSE; + count++; + } + if (*c <= ' ' || *c >= 127) + return FALSE; + if (strchr(rfc822_specials, *c)) { + if (invalid_char_position) + *invalid_char_position = c; + return FALSE; + } + } while (*++c); + + return (count >= 1) ? TRUE : FALSE; +} + +gboolean +modest_text_utils_validate_recipient (const gchar *recipient, const gchar **invalid_char_position) +{ + gchar *stripped, *current; + gchar *right_part; + gboolean has_error = FALSE; + + if (invalid_char_position) + *invalid_char_position = NULL; + + g_return_val_if_fail (recipient, FALSE); + + if (modest_text_utils_validate_email_address (recipient, invalid_char_position)) + return TRUE; + + stripped = g_strdup (recipient); + stripped = g_strstrip (stripped); + current = stripped; + + if (*current == '\0') { + g_free (stripped); + return FALSE; + } + + /* quoted string */ + if (*current == '\"') { + current = g_utf8_next_char (current); + has_error = TRUE; + for (; *current != '\0'; current = g_utf8_next_char (current)) { + if (*current == '\\') { + /* TODO: This causes a warning, which breaks the build, + * because a gchar cannot be < 0. + * murrayc. + if (current[1] <0) { + has_error = TRUE; + break; + } + */ + } else if (*current == '\"') { + has_error = FALSE; + current = g_utf8_next_char (current); + break; + } + } + } else { + has_error = TRUE; + for (current = stripped ; *current != '\0'; current = g_utf8_next_char (current)) { + if (*current == '<') { + has_error = FALSE; + break; + } + } + } + + if (has_error) { + g_free (stripped); + return FALSE; + } + + right_part = g_strdup (current); + g_free (stripped); + right_part = g_strstrip (right_part); + + if (g_str_has_prefix (right_part, "<") && + g_str_has_suffix (right_part, ">")) { + gchar *address; + gboolean valid; + + address = g_strndup (right_part+1, strlen (right_part) - 2); + g_free (right_part); + valid = modest_text_utils_validate_email_address (address, invalid_char_position); + g_free (address); + return valid; + } else { + g_free (right_part); + return FALSE; + } +} + + +gchar * +modest_text_utils_get_display_size (guint64 size) +{ + const guint KB=1024; + const guint MB=1024 * KB; + const guint GB=1024 * MB; + + if (size == 0) + return g_strdup_printf(_FM("sfil_li_size_kb"), 0); + if (0 < size && size < KB) + return g_strdup_printf (_FM("sfil_li_size_kb"), 1); + else if (KB <= size && size < 100 * KB) + return g_strdup_printf (_FM("sfil_li_size_1kb_99kb"), size / KB); + else if (100*KB <= size && size < MB) + return g_strdup_printf (_FM("sfil_li_size_100kb_1mb"), (float) size / MB); + else if (MB <= size && size < 10*MB) + return g_strdup_printf (_FM("sfil_li_size_1mb_10mb"), (float) size / MB); + else if (10*MB <= size && size < GB) + return g_strdup_printf (_FM("sfil_li_size_10mb_1gb"), size / MB); + else + return g_strdup_printf (_FM("sfil_li_size_1gb_or_greater"), (float) size / GB); +} + +static gchar * +get_email_from_address (const gchar * address) +{ + gchar *left_limit, *right_limit; + + left_limit = strstr (address, "<"); + right_limit = g_strrstr (address, ">"); + + if ((left_limit == NULL)||(right_limit == NULL)|| (left_limit > right_limit)) + return g_strdup (address); + else + return g_strndup (left_limit + 1, (right_limit - left_limit) - 1); +} + +gchar * +modest_text_utils_get_color_string (GdkColor *color) +{ + g_return_val_if_fail (color, NULL); + + return g_strdup_printf ("#%x%x%x%x%x%x%x%x%x%x%x%x", + (color->red >> 12) & 0xf, (color->red >> 8) & 0xf, + (color->red >> 4) & 0xf, (color->red) & 0xf, + (color->green >> 12) & 0xf, (color->green >> 8) & 0xf, + (color->green >> 4) & 0xf, (color->green) & 0xf, + (color->blue >> 12) & 0xf, (color->blue >> 8) & 0xf, + (color->blue >> 4) & 0xf, (color->blue) & 0xf); +} + +gchar * +modest_text_utils_text_buffer_get_text (GtkTextBuffer *buffer) +{ + GtkTextIter start, end; + gchar *slice, *current; + GString *result = g_string_new (""); + + g_return_val_if_fail (buffer && GTK_IS_TEXT_BUFFER (buffer), NULL); + + gtk_text_buffer_get_start_iter (buffer, &start); + gtk_text_buffer_get_end_iter (buffer, &end); + + slice = gtk_text_buffer_get_slice (buffer, &start, &end, FALSE); + current = slice; + + while (current && current != '\0') { + if (g_utf8_get_char (current) == 0xFFFC) { + result = g_string_append_c (result, ' '); + current = g_utf8_next_char (current); + } else { + gchar *next = g_utf8_strchr (current, -1, 0xFFFC); + if (next == NULL) { + result = g_string_append (result, current); + } else { + result = g_string_append_len (result, current, next - current); + } + current = next; + } + } + g_free (slice); + + return g_string_free (result, FALSE); + +} + +gboolean +modest_text_utils_is_forbidden_char (const gchar character, + ModestTextUtilsForbiddenCharType type) +{ + gint i, len; + const gchar *forbidden_chars = NULL; + + /* We need to get the length in the switch because the + compiler needs to know the size at compile time */ + switch (type) { + case ACCOUNT_TITLE_FORBIDDEN_CHARS: + forbidden_chars = account_title_forbidden_chars; + len = G_N_ELEMENTS (account_title_forbidden_chars); + break; + case FOLDER_NAME_FORBIDDEN_CHARS: + forbidden_chars = folder_name_forbidden_chars; + len = G_N_ELEMENTS (folder_name_forbidden_chars); + break; + case USER_NAME_FORBIDDEN_NAMES: + forbidden_chars = user_name_forbidden_chars; + len = G_N_ELEMENTS (user_name_forbidden_chars); + break; + default: + g_return_val_if_reached (TRUE); + } + + for (i = 0; i < len ; i++) + if (forbidden_chars[i] == character) + return TRUE; + + return FALSE; /* it's valid! */ +}