Remove extra lines added during the HTML->text conversion when replying
[modest] / src / modest-formatter.c
1 /* Copyright (c) 2006, Nokia Corporation
2  * All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are
6  * met:
7  *
8  * * Redistributions of source code must retain the above copyright
9  *   notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  *   notice, this list of conditions and the following disclaimer in the
12  *   documentation and/or other materials provided with the distribution.
13  * * Neither the name of the Nokia Corporation nor the names of its
14  *   contributors may be used to endorse or promote products derived from
15  *   this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
18  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
20  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
21  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 #include <glib/gi18n.h>
31 #include <string.h>
32 #include <tny-header.h>
33 #include <tny-simple-list.h>
34 #include <tny-gtk-text-buffer-stream.h>
35 #include <tny-camel-mem-stream.h>
36 #include <tny-camel-html-to-text-stream.h>
37 #include "modest-formatter.h"
38 #include "modest-text-utils.h"
39 #include "modest-tny-platform-factory.h"
40 #include <modest-runtime.h>
41
42 #define LINE_WRAP 78
43 #define MAX_BODY_LINES 1024
44 #define MAX_BODY_LENGTH 1024*128
45
46 typedef struct _ModestFormatterPrivate ModestFormatterPrivate;
47 struct _ModestFormatterPrivate {
48         gchar *content_type;
49         gchar *signature;
50 };
51 #define MODEST_FORMATTER_GET_PRIVATE(o)  (G_TYPE_INSTANCE_GET_PRIVATE((o), \
52                                           MODEST_TYPE_FORMATTER, \
53                                           ModestFormatterPrivate))
54
55 static GObjectClass *parent_class = NULL;
56
57 typedef gchar* FormatterFunc (ModestFormatter *self, const gchar *text, TnyHeader *header, GList *attachments);
58
59 static TnyMsg *modest_formatter_do (ModestFormatter *self, TnyMimePart *body,  TnyHeader *header, 
60                                     FormatterFunc func, GList *attachments);
61
62 static gchar*  modest_formatter_wrapper_cite   (ModestFormatter *self, const gchar *text,
63                                                 TnyHeader *header, GList *attachments);
64 static gchar*  modest_formatter_wrapper_quote  (ModestFormatter *self, const gchar *text,
65                                                 TnyHeader *header, GList *attachments);
66 static gchar*  modest_formatter_wrapper_inline (ModestFormatter *self, const gchar *text,
67                                                 TnyHeader *header, GList *attachments);
68
69 static TnyMimePart *find_body_parent (TnyMimePart *part);
70
71 static guint
72 count_end_tag_lines (const gchar *haystack, const gchar *needle)
73 {
74         gchar *tmp;
75         guint lines = 0;
76
77         tmp = g_strstr_len (haystack, g_utf8_strlen (haystack, -1), ">\n");
78         while (tmp && (tmp <= needle)) {
79                 lines++;
80                 tmp += 2;
81                 tmp = g_strstr_len (tmp, g_utf8_strlen (tmp, -1), ">\n");
82         }
83
84         return lines;
85 }
86
87 static gchar *
88 extract_text (ModestFormatter *self, TnyMimePart *body)
89 {
90         TnyStream *mp_stream;
91         TnyStream *stream;
92         TnyStream *input_stream;
93         GtkTextBuffer *buf;
94         GtkTextIter start, end;
95         gchar *text;
96         ModestFormatterPrivate *priv;
97         gint total, lines, total_lines, line_chars;
98         gboolean is_html;
99
100         buf = gtk_text_buffer_new (NULL);
101         stream = TNY_STREAM (tny_gtk_text_buffer_stream_new (buf));
102         tny_stream_reset (stream);
103         mp_stream = tny_mime_part_get_decoded_stream (body);
104
105         is_html = (g_strcmp0 (tny_mime_part_get_content_type (body), "text/html") == 0);
106         if (is_html) {
107                 input_stream = tny_camel_html_to_text_stream_new (mp_stream);
108         } else {
109                 input_stream = g_object_ref (mp_stream);
110         }
111
112         total = 0;
113         total_lines = 0;
114         line_chars = 0;
115         lines = 0;
116
117         /* For pure HTML emails tny_camel_html_to_text_stream inserts
118            a \n for every ">\n" found in the email including the HTML
119            headers (<html>, <head> ...). For that reason we need to
120            remove them from the resulting text as it is artificially
121            added by the stream */
122         if (is_html) {
123                 const guint BUFFER_SIZE = 1024;
124                 TnyStream *is;
125                 gboolean look_for_end_tag, found;
126                 gchar buffer [BUFFER_SIZE + 1];
127                 gchar *needle;
128
129                 is = g_object_ref (mp_stream);
130                 look_for_end_tag = FALSE;
131                 found = FALSE;
132
133                 /* This algorithm does not work if the body tag is
134                    spread along 2 different stream reads. But there
135                    are not a lot of changes for this to happen as the
136                    buffer size is big enough in most situations. In
137                    the worst case, when it's not found we just accept
138                    the original translation with the extra "\n" */
139                 while (!tny_stream_is_eos (is) && !found) {
140                         gint n_read;
141
142                         needle = NULL;
143                         memset (buffer, 0, BUFFER_SIZE);
144                         n_read = tny_stream_read (is, buffer, BUFFER_SIZE);
145
146                         if (G_UNLIKELY (n_read < 0))
147                                 break;
148
149                         buffer[n_read] = '\0';
150
151                         /* If we found body,then look for the end of the tag */
152                         if (look_for_end_tag) {
153                                 needle = strchr (buffer, '>');
154
155                                 if (needle) {
156                                         found = TRUE;
157                                         lines += count_end_tag_lines (buffer, needle);
158                                         break;
159                                 }
160                         } else {
161                                 gchar *closing;
162
163                                 /* Try to find the <body> tag. There
164                                    is no other HTML tag starting by
165                                    "bo", and we can detect more cases
166                                    were <body> tag falls into two
167                                    different stream reads */
168                                 needle = g_strstr_len (buffer, n_read, "<bo");
169
170                                 if (needle)
171                                         look_for_end_tag = TRUE;
172                                 else
173                                         needle = &(buffer[n_read]);
174
175                                 lines += count_end_tag_lines (buffer, needle);
176
177                                 closing = strchr (needle, '>');
178                                 if (closing) {
179                                         if (*(closing + 1) == '\n')
180                                                 lines++;
181                                         found = TRUE;
182                                         break;
183                                 }
184                         }
185                 }
186                 if (!found)
187                         lines = 0;
188                 tny_stream_reset (is);
189
190                 g_object_unref (is);
191         }
192
193         while (!tny_stream_is_eos (input_stream)) {
194                 gchar buffer [128];
195                 gchar *offset;
196                 gint n_read;
197                 gint next_read;
198
199                 next_read = MIN (128, MAX_BODY_LENGTH - total);
200                 if (next_read == 0)
201                         break;
202                 n_read = tny_stream_read (input_stream, buffer, next_read);
203
204                 if (G_UNLIKELY (n_read < 0))
205                         break;
206
207                 offset = buffer;
208                 while (offset < buffer + n_read) {
209
210                         if (*offset == '\n') {
211                                 total_lines ++;
212                                 line_chars = 0;
213                         } else {
214                                 line_chars ++;
215                                 if (line_chars >= LINE_WRAP) {
216                                         total_lines ++;
217                                         line_chars = 0;
218                                 }
219                         }
220                         if (total_lines >= MAX_BODY_LINES)
221                                 break;
222                         offset++;
223                 }
224
225                 if (offset - buffer > 0) {
226                         gint n_write = 0, to_write = 0;
227                         gchar *buffer_ptr;
228
229                         /* Discard lines artificially inserted by
230                            Camel when translating from HTML to text */
231                         buffer_ptr = buffer;
232                         if (lines) {
233                                 int i;
234                                 for (i=0; i < lines; i++) {
235                                         buffer_ptr = strchr (buffer_ptr, '\n');
236                                         buffer_ptr++;
237                                 }
238                         }
239                         to_write = offset - buffer_ptr;
240                         n_write = tny_stream_write (stream, buffer_ptr, to_write);
241                         total += n_write;
242                 } else if (n_read == -1) {
243                         break;
244                 }
245
246                 if (total_lines >= MAX_BODY_LINES)
247                         break;
248         }
249
250         tny_stream_reset (stream);
251
252         g_object_unref (G_OBJECT(stream));
253         g_object_unref (G_OBJECT (mp_stream));
254         g_object_unref (G_OBJECT (input_stream));
255
256         gtk_text_buffer_get_bounds (buf, &start, &end);
257         text = gtk_text_buffer_get_text (buf, &start, &end, FALSE);
258         g_object_unref (G_OBJECT(buf));
259
260         /* Convert to desired content type if needed */
261         priv = MODEST_FORMATTER_GET_PRIVATE (self);
262
263         return text;
264 }
265
266 static void
267 construct_from_text (TnyMimePart *part,
268                      const gchar *text,
269                      const gchar *content_type)
270 {
271         TnyStream *text_body_stream;
272
273         /* Create the stream */
274         text_body_stream = TNY_STREAM (tny_camel_mem_stream_new_with_buffer
275                                         (text, strlen(text)));
276
277         /* Construct MIME part */
278         tny_stream_reset (text_body_stream);
279         tny_mime_part_construct (part, text_body_stream, content_type, "7bit");
280         tny_stream_reset (text_body_stream);
281
282         /* Clean */
283         g_object_unref (G_OBJECT (text_body_stream));
284 }
285
286 static TnyMsg *
287 modest_formatter_do (ModestFormatter *self, TnyMimePart *body, TnyHeader *header, FormatterFunc func,
288                      GList *attachments)
289 {
290         TnyMsg *new_msg = NULL;
291         gchar *body_text = NULL, *txt = NULL;
292         ModestFormatterPrivate *priv;
293         TnyMimePart *body_part = NULL;
294
295         g_return_val_if_fail (self, NULL);
296         g_return_val_if_fail (header, NULL);
297         g_return_val_if_fail (func, NULL);
298
299         /* Build new part */
300         new_msg = modest_formatter_create_message (self, TRUE, attachments != NULL, FALSE);
301         body_part = modest_formatter_create_body_part (self, new_msg);
302
303         if (body)
304                 body_text = extract_text (self, body);
305         else
306                 body_text = g_strdup ("");
307
308         txt = (gchar *) func (self, (const gchar*) body_text, header, attachments);
309         priv = MODEST_FORMATTER_GET_PRIVATE (self);
310         construct_from_text (TNY_MIME_PART (body_part), (const gchar*) txt, priv->content_type);
311         g_object_unref (body_part);
312
313         /* Clean */
314         g_free (body_text);
315         g_free (txt);
316
317         return new_msg;
318 }
319
320 TnyMsg *
321 modest_formatter_cite (ModestFormatter *self, TnyMimePart *body, TnyHeader *header)
322 {
323         return modest_formatter_do (self, body, header, modest_formatter_wrapper_cite, NULL);
324 }
325
326 TnyMsg *
327 modest_formatter_quote (ModestFormatter *self, TnyMimePart *body, TnyHeader *header, GList *attachments)
328 {
329         return modest_formatter_do (self, body, header, modest_formatter_wrapper_quote, attachments);
330 }
331
332 TnyMsg *
333 modest_formatter_inline (ModestFormatter *self, TnyMimePart *body, TnyHeader *header, GList *attachments)
334 {
335         return modest_formatter_do (self, body, header, modest_formatter_wrapper_inline, attachments);
336 }
337
338 TnyMsg *
339 modest_formatter_attach (ModestFormatter *self, TnyMsg *msg, TnyHeader *header)
340 {
341         TnyMsg *new_msg = NULL;
342         TnyMimePart *body_part = NULL;
343         ModestFormatterPrivate *priv;
344         gchar *txt;
345
346         /* Build new part */
347         new_msg     = modest_formatter_create_message (self, TRUE, TRUE, FALSE);
348         body_part = modest_formatter_create_body_part (self, new_msg);
349
350         /* Create the two parts */
351         priv = MODEST_FORMATTER_GET_PRIVATE (self);
352         txt = modest_text_utils_cite ("", priv->content_type, priv->signature,
353                                       NULL, tny_header_get_date_sent (header));
354         construct_from_text (body_part, txt, priv->content_type);
355         g_free (txt);
356         g_object_unref (body_part);
357
358         if (msg) {
359                 /* Add parts */
360                 tny_mime_part_add_part (TNY_MIME_PART (new_msg), TNY_MIME_PART (msg));
361         }
362
363         return new_msg;
364 }
365
366 ModestFormatter*
367 modest_formatter_new (const gchar *content_type, const gchar *signature)
368 {
369         ModestFormatter *formatter;
370         ModestFormatterPrivate *priv;
371
372         formatter = g_object_new (MODEST_TYPE_FORMATTER, NULL);
373         priv = MODEST_FORMATTER_GET_PRIVATE (formatter);
374         priv->content_type = g_strdup (content_type);
375         priv->signature = g_strdup (signature);
376
377         return formatter;
378 }
379
380 static void
381 modest_formatter_instance_init (GTypeInstance *instance, gpointer g_class)
382 {
383         ModestFormatter *self = (ModestFormatter *)instance;
384         ModestFormatterPrivate *priv = MODEST_FORMATTER_GET_PRIVATE (self);
385
386         priv->content_type = NULL;
387         priv->signature = NULL;
388 }
389
390 static void
391 modest_formatter_finalize (GObject *object)
392 {
393         ModestFormatter *self = (ModestFormatter *)object;
394         ModestFormatterPrivate *priv = MODEST_FORMATTER_GET_PRIVATE (self);
395
396         if (priv->content_type)
397                 g_free (priv->content_type);
398
399         if (priv->signature)
400                 g_free (priv->signature);
401
402         (*parent_class->finalize) (object);
403 }
404
405 static void 
406 modest_formatter_class_init (ModestFormatterClass *class)
407 {
408         GObjectClass *object_class;
409
410         parent_class = g_type_class_peek_parent (class);
411         object_class = (GObjectClass*) class;
412         object_class->finalize = modest_formatter_finalize;
413
414         g_type_class_add_private (object_class, sizeof (ModestFormatterPrivate));
415 }
416
417 GType 
418 modest_formatter_get_type (void)
419 {
420         static GType type = 0;
421
422         if (G_UNLIKELY(type == 0))
423         {
424                 static const GTypeInfo info = 
425                 {
426                   sizeof (ModestFormatterClass),
427                   NULL,   /* base_init */
428                   NULL,   /* base_finalize */
429                   (GClassInitFunc) modest_formatter_class_init,   /* class_init */
430                   NULL,   /* class_finalize */
431                   NULL,   /* class_data */
432                   sizeof (ModestFormatter),
433                   0,      /* n_preallocs */
434                   modest_formatter_instance_init    /* instance_init */
435                 };
436                 
437                 type = g_type_register_static (G_TYPE_OBJECT,
438                         "ModestFormatter",
439                         &info, 0);
440         }
441
442         return type;
443 }
444
445 /****************/
446 static gchar *
447 modest_formatter_wrapper_cite (ModestFormatter *self, const gchar *text, TnyHeader *header,
448                                GList *attachments) 
449 {
450         gchar *result, *from;
451         ModestFormatterPrivate *priv = MODEST_FORMATTER_GET_PRIVATE (self);
452         
453         from = tny_header_dup_from (header);
454         result = modest_text_utils_cite (text, 
455                                          priv->content_type, 
456                                          priv->signature,
457                                          from, 
458                                          tny_header_get_date_sent (header));
459         g_free (from);
460         return result;
461 }
462
463 static gchar *
464 modest_formatter_wrapper_inline (ModestFormatter *self, const gchar *text, TnyHeader *header,
465                                  GList *attachments) 
466 {
467         gchar *result, *from, *to, *subject;
468         ModestFormatterPrivate *priv = MODEST_FORMATTER_GET_PRIVATE (self);
469
470         from = tny_header_dup_from (header);
471         to = tny_header_dup_to (header);
472         subject = tny_header_dup_subject (header);
473         result =  modest_text_utils_inline (text, 
474                                             priv->content_type, 
475                                             priv->signature,
476                                             from,
477                                             tny_header_get_date_sent (header),
478                                             to,
479                                             subject);
480         g_free (subject);
481         g_free (to);
482         g_free (from);
483         return result;
484 }
485
486 static gchar *
487 modest_formatter_wrapper_quote (ModestFormatter *self, const gchar *text, TnyHeader *header,
488                                 GList *attachments) 
489 {
490         ModestFormatterPrivate *priv = MODEST_FORMATTER_GET_PRIVATE (self);
491         GList *filenames = NULL;
492         GList *node = NULL;
493         gchar *result = NULL;
494         gchar *from;
495
496         /* First we need a GList of attachments filenames */
497         for (node = attachments; node != NULL; node = g_list_next (node)) {
498                 TnyMimePart *part = (TnyMimePart *) node->data;
499                 gchar *filename = NULL;
500                 if (TNY_IS_MSG (part)) {
501                         TnyHeader *header = tny_msg_get_header (TNY_MSG (part));
502                         filename = tny_header_dup_subject (header);
503                         if ((filename == NULL)||(filename[0] == '\0')) {
504                                 g_free (filename);
505                                 filename = g_strdup (_("mail_va_no_subject"));
506                         }
507                         g_object_unref (header);
508                 } else {
509                         filename = g_strdup (tny_mime_part_get_filename (part));
510                         if ((filename == NULL)||(filename[0] == '\0')) {
511                                 g_free (filename);
512                                 filename = g_strdup ("");
513                         }
514                 }
515                 filenames = g_list_prepend (filenames, filename);
516         }
517
518         /* TODO: get 80 from the configuration */
519         from = tny_header_dup_from (header);
520         result = modest_text_utils_quote (text, 
521                                           priv->content_type, 
522                                           priv->signature,
523                                           from,
524                                           tny_header_get_date_sent (header),
525                                           filenames,
526                                           80);
527         g_free (from);
528
529         g_list_foreach (filenames, (GFunc) g_free, NULL);
530         g_list_free (filenames);
531         return result;
532 }
533
534 TnyMsg * 
535 modest_formatter_create_message (ModestFormatter *self, gboolean single_body, 
536                                  gboolean has_attachments, gboolean has_images)
537 {
538         TnyMsg *result = NULL;
539         TnyPlatformFactory *fact = NULL;
540         TnyMimePart *body_mime_part = NULL;
541         TnyMimePart *related_mime_part = NULL;
542
543         fact    = modest_runtime_get_platform_factory ();
544         result = tny_platform_factory_new_msg (fact);
545         if (has_attachments) {
546                 tny_mime_part_set_content_type (TNY_MIME_PART (result), "multipart/mixed");
547                 if (has_images) {
548                         related_mime_part = tny_platform_factory_new_mime_part (fact);
549                         tny_mime_part_set_content_type (related_mime_part, "multipart/related");
550                         tny_mime_part_add_part (TNY_MIME_PART (result), related_mime_part);
551                 } else {
552                         related_mime_part = g_object_ref (result);
553                 }
554                         
555                 if (!single_body) {
556                         body_mime_part = tny_platform_factory_new_mime_part (fact);
557                         tny_mime_part_set_content_type (body_mime_part, "multipart/alternative");
558                         tny_mime_part_add_part (TNY_MIME_PART (related_mime_part), body_mime_part);
559                         g_object_unref (body_mime_part);
560                 }
561
562                 g_object_unref (related_mime_part);
563         } else if (has_images) {
564                 tny_mime_part_set_content_type (TNY_MIME_PART (result), "multipart/related");
565
566                 if (!single_body) {
567                         body_mime_part = tny_platform_factory_new_mime_part (fact);
568                         tny_mime_part_set_content_type (body_mime_part, "multipart/alternative");
569                         tny_mime_part_add_part (TNY_MIME_PART (result), body_mime_part);
570                         g_object_unref (body_mime_part);
571                 }
572
573         } else if (!single_body) {
574                 tny_mime_part_set_content_type (TNY_MIME_PART (result), "multipart/alternative");
575         }
576
577         return result;
578 }
579
580 TnyMimePart *
581 find_body_parent (TnyMimePart *part)
582 {
583         const gchar *msg_content_type = NULL;
584         msg_content_type = tny_mime_part_get_content_type (part);
585
586         if ((msg_content_type != NULL) &&
587             (!g_ascii_strcasecmp (msg_content_type, "multipart/alternative")))
588                 return g_object_ref (part);
589         else if ((msg_content_type != NULL) &&
590                  (g_str_has_prefix (msg_content_type, "multipart/"))) {
591                 TnyIterator *iter = NULL;
592                 TnyMimePart *alternative_part = NULL;
593                 TnyMimePart *related_part = NULL;
594                 TnyList *parts = TNY_LIST (tny_simple_list_new ());
595                 tny_mime_part_get_parts (TNY_MIME_PART (part), parts);
596                 iter = tny_list_create_iterator (parts);
597
598                 while (!tny_iterator_is_done (iter)) {
599                         TnyMimePart *part = TNY_MIME_PART (tny_iterator_get_current (iter));
600                         if (part && !g_ascii_strcasecmp(tny_mime_part_get_content_type (part), "multipart/alternative")) {
601                                 alternative_part = part;
602                                 break;
603                         } else if (part && !g_ascii_strcasecmp (tny_mime_part_get_content_type (part), "multipart/related")) {
604                                 related_part = part;
605                                 break;
606                         }
607
608                         if (part)
609                                 g_object_unref (part);
610
611                         tny_iterator_next (iter);
612                 }
613                 g_object_unref (iter);
614                 g_object_unref (parts);
615                 if (related_part) {
616                         TnyMimePart *result;
617                         result = find_body_parent (related_part);
618                         g_object_unref (related_part);
619                         return result;
620                 } else if (alternative_part)
621                         return alternative_part;
622                 else 
623                         return g_object_ref (part);
624         } else
625                 return NULL;
626 }
627
628 TnyMimePart * 
629 modest_formatter_create_body_part (ModestFormatter *self, TnyMsg *msg)
630 {
631         TnyMimePart *result = NULL;
632         TnyPlatformFactory *fact = NULL;
633         TnyMimePart *parent = NULL;
634
635         parent = find_body_parent (TNY_MIME_PART (msg));
636         fact = modest_runtime_get_platform_factory ();
637         if (parent != NULL) {
638                 result = tny_platform_factory_new_mime_part (fact);
639                 tny_mime_part_add_part (TNY_MIME_PART (parent), result);
640                 g_object_unref (parent);
641         } else {
642                 result = g_object_ref (msg);
643         }
644
645         return result;
646
647 }