static void
modest_text_utils_convert_buffer_to_html_start (GString *html, const gchar *data, gssize n)
{
- guint i = 0;
+ guint i;
gboolean space_seen = FALSE;
guint break_dist = 0; /* distance since last break point */
n = strlen (data);
/* replace with special html chars where needed*/
- while (i != n) {
- char kar = data[i];
+ for (i = 0; i != n; ++i) {
+ guchar kar = data[i];
if (space_seen && kar != ' ') {
g_string_append_c (html, ' ');
/* we artificially insert a breakpoint (newline)
* after 256, to make sure our lines are not so long
* they will DOS the regexping later
+ * Also, check that kar is ASCII to make sure that we
+ * don't break a UTF8 char in two
*/
- if (++break_dist == 256) {
+ if (++break_dist >= 256 && kar < 127) {
g_string_append_c (html, '\n');
break_dist = 0;
}
space_seen = TRUE;
break;
default:
- /* Optimization to copy single ascii
- * characters faster */
- if (kar > 31 && kar < 127) {
- g_string_append_c (html, kar);
- } else {
- /* Important: copy full UTF-8 characters,
- * don't copy them byte by byte */
- gunichar c = g_utf8_get_char_validated (data+i, -1);
- if (c != (gunichar) -1 && c != (gunichar) -2) {
- const gchar *copyfrom = data + i;
- int len = g_utf8_next_char(copyfrom) - copyfrom;
- g_string_append_len (html, copyfrom, len);
- i += len - 1;
- } else {
- g_warning ("%s: non-UTF8 byte found, skipping", __FUNCTION__);
- }
- }
+ g_string_append_c (html, kar);
}
- i++;
}
}