From 10b6d474b385146b4961b3272f38da82e8d8ade0 Mon Sep 17 00:00:00 2001
From: "Dirk-Jan C. Binnema" <dirk-jan.binnema@nokia.com>
Date: Fri, 27 Jul 2007 12:00:30 +0000
Subject: [PATCH] * make the hyperlinkify regexp code a bit faster, also don't
 try   to hyperlinkify really big parts, because it will
 take very, very   long even in sbox. limit is 100KB now.

pmo-trunk-r2822
---
 src/modest-text-utils.c |   57 ++++++++++++++++++++++++++++-------------------
 1 file changed, 34 insertions(+), 23 deletions(-)
diff --git a/src/modest-text-utils.c b/src/modest-text-utils.c
index 3c32ada..6122f69 100644
--- a/src/modest-text-utils.c
+++ b/src/modest-text-utils.c
@@ -51,6 +51,14 @@
 #define EMPTY_STRING ""
 
 /*
+ * do the hyperlinkification only for texts < 50 Kb,
+ * as it's quite slow. Without this, e.g. mail with
+ * an uuencoded part (which is not recognized as attachment,
+ * will hang modest
+ */
+#define HYPERLINKIFY_MAX_LENGTH (1024*50)
+
+/*
  * we need these regexps to find URLs in plain text e-mails
  */
 typedef struct _url_match_pattern_t url_match_pattern_t;
@@ -86,7 +94,6 @@ struct _url_match_t {
 static gchar*   cite                    (const time_t sent_date, const gchar *from);
 static void     hyperlinkify_plain_text (GString *txt);
 static gint     cmp_offsets_reverse     (const url_match_t *match1, const url_match_t *match2);
-static void     chk_partial_match       (const url_match_t *match, guint* offset);
 static GSList*  get_url_matches         (GString *txt);
 
 static GString* get_next_line           (const char *b, const gsize blen, const gchar * iter);
@@ -366,7 +373,9 @@ modest_text_utils_convert_to_html (const gchar *data)
 	modest_text_utils_convert_buffer_to_html (html, data);
 	
 	g_string_append (html, "</body></html>");
-	hyperlinkify_plain_text (html);
+
+	if (len <= HYPERLINKIFY_MAX_LENGTH)
+		hyperlinkify_plain_text (html);
 
 	return g_string_free (html, FALSE);
 }
@@ -385,7 +394,8 @@ modest_text_utils_convert_to_html_body (const gchar *data)
 
 	modest_text_utils_convert_buffer_to_html (html, data);
 
-	hyperlinkify_plain_text (html);
+	if (len < HYPERLINKIFY_MAX_LENGTH)
+		hyperlinkify_plain_text (html);
 
 	return g_string_free (html, FALSE);
 }
@@ -814,16 +824,6 @@ cmp_offsets_reverse (const url_match_t *match1, const url_match_t *match2)
 }
 
 
-
-/*
- * check if the match is inside an existing match... */
-static void
-chk_partial_match (const url_match_t *match, guint* offset)
-{
-	if (*offset >= match->offset && *offset < match->offset + match->len)
-		*offset = -1;
-}
-
 static GSList*
 get_url_matches (GString *txt)
 {
@@ -846,28 +846,39 @@ get_url_matches (GString *txt)
 	for (i = 0; i != pattern_num; ++i) {
 		offset     = 0;	
 		while (1) {
-			int test_offset;
+			url_match_t *match;
+			gboolean is_submatch;
+			GSList *cursor;
+			
 			if ((rv = regexec (patterns[i].preg, txt->str + offset, 1, &rm, 0)) != 0) {
 				g_return_val_if_fail (rv == REG_NOMATCH, NULL); /* this should not happen */
 				break; /* try next regexp */ 
 			}
 			if (rm.rm_so == -1)
 				break;
-
-			/* FIXME: optimize this */
-			/* to avoid partial matches on something that was already found... */
-			/* check_partial_match will put -1 in the data ptr if that is the case */
-			test_offset = offset + rm.rm_so;
-			g_slist_foreach (match_list, (GFunc)chk_partial_match, &test_offset);
 			
-			/* make a list of our matches (<offset, len, prefix> tupels)*/
-			if (test_offset != -1) {
-				url_match_t *match = g_slice_new (url_match_t);
+			is_submatch = FALSE;
+			/* check  old matches to see if this has already been matched */
+			cursor = match_list;
+			while (cursor && !is_submatch) {
+				const url_match_t *old_match =
+					(const url_match_t *) cursor->data;
+				guint new_offset = offset + rm.rm_so;
+				is_submatch = (new_offset >  old_match->offset &&
+					       new_offset <  old_match->offset + old_match->len);
+				cursor = g_slist_next (cursor);
+			}
+
+			if (!is_submatch) {
+				/* make a list of our matches (<offset, len, prefix> tupels)*/
+				match = g_slice_new (url_match_t);
 				match->offset = offset + rm.rm_so;
 				match->len    = rm.rm_eo - rm.rm_so;
 				match->prefix = patterns[i].prefix;
+				g_warning ("<%d, %d, %s>",  match->offset, match->len, match->prefix);
 				match_list = g_slist_prepend (match_list, match);
 			}
+				
 			offset += rm.rm_eo;
 		}
 	}
-- 
1.7.9.5