From 10b6d474b385146b4961b3272f38da82e8d8ade0 Mon Sep 17 00:00:00 2001 From: "Dirk-Jan C. Binnema" Date: Fri, 27 Jul 2007 12:00:30 +0000 Subject: [PATCH] * make the hyperlinkify regexp code a bit faster, also don't try to hyperlinkify really big parts, because it will take very, very long even in sbox. limit is 100KB now. pmo-trunk-r2822 --- src/modest-text-utils.c | 57 ++++++++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/src/modest-text-utils.c b/src/modest-text-utils.c index 3c32ada..6122f69 100644 --- a/src/modest-text-utils.c +++ b/src/modest-text-utils.c @@ -51,6 +51,14 @@ #define EMPTY_STRING "" /* + * do the hyperlinkification only for texts < 50 Kb, + * as it's quite slow. Without this, e.g. mail with + * an uuencoded part (which is not recognized as attachment, + * will hang modest + */ +#define HYPERLINKIFY_MAX_LENGTH (1024*50) + +/* * we need these regexps to find URLs in plain text e-mails */ typedef struct _url_match_pattern_t url_match_pattern_t; @@ -86,7 +94,6 @@ struct _url_match_t { static gchar* cite (const time_t sent_date, const gchar *from); static void hyperlinkify_plain_text (GString *txt); static gint cmp_offsets_reverse (const url_match_t *match1, const url_match_t *match2); -static void chk_partial_match (const url_match_t *match, guint* offset); static GSList* get_url_matches (GString *txt); static GString* get_next_line (const char *b, const gsize blen, const gchar * iter); @@ -366,7 +373,9 @@ modest_text_utils_convert_to_html (const gchar *data) modest_text_utils_convert_buffer_to_html (html, data); g_string_append (html, ""); - hyperlinkify_plain_text (html); + + if (len <= HYPERLINKIFY_MAX_LENGTH) + hyperlinkify_plain_text (html); return g_string_free (html, FALSE); } @@ -385,7 +394,8 @@ modest_text_utils_convert_to_html_body (const gchar *data) modest_text_utils_convert_buffer_to_html (html, data); - hyperlinkify_plain_text (html); + if (len < HYPERLINKIFY_MAX_LENGTH) + hyperlinkify_plain_text (html); return g_string_free (html, FALSE); } @@ -814,16 +824,6 @@ cmp_offsets_reverse (const url_match_t *match1, const url_match_t *match2) } - -/* - * check if the match is inside an existing match... */ -static void -chk_partial_match (const url_match_t *match, guint* offset) -{ - if (*offset >= match->offset && *offset < match->offset + match->len) - *offset = -1; -} - static GSList* get_url_matches (GString *txt) { @@ -846,28 +846,39 @@ get_url_matches (GString *txt) for (i = 0; i != pattern_num; ++i) { offset = 0; while (1) { - int test_offset; + url_match_t *match; + gboolean is_submatch; + GSList *cursor; + if ((rv = regexec (patterns[i].preg, txt->str + offset, 1, &rm, 0)) != 0) { g_return_val_if_fail (rv == REG_NOMATCH, NULL); /* this should not happen */ break; /* try next regexp */ } if (rm.rm_so == -1) break; - - /* FIXME: optimize this */ - /* to avoid partial matches on something that was already found... */ - /* check_partial_match will put -1 in the data ptr if that is the case */ - test_offset = offset + rm.rm_so; - g_slist_foreach (match_list, (GFunc)chk_partial_match, &test_offset); - /* make a list of our matches ( tupels)*/ - if (test_offset != -1) { - url_match_t *match = g_slice_new (url_match_t); + is_submatch = FALSE; + /* check old matches to see if this has already been matched */ + cursor = match_list; + while (cursor && !is_submatch) { + const url_match_t *old_match = + (const url_match_t *) cursor->data; + guint new_offset = offset + rm.rm_so; + is_submatch = (new_offset > old_match->offset && + new_offset < old_match->offset + old_match->len); + cursor = g_slist_next (cursor); + } + + if (!is_submatch) { + /* make a list of our matches ( tupels)*/ + match = g_slice_new (url_match_t); match->offset = offset + rm.rm_so; match->len = rm.rm_eo - rm.rm_so; match->prefix = patterns[i].prefix; + g_warning ("<%d, %d, %s>", match->offset, match->len, match->prefix); match_list = g_slist_prepend (match_list, match); } + offset += rm.rm_eo; } } -- 1.7.9.5