Fix a problem in modest_text_utils_convert_buffer_to_html_start()
[modest] / src / modest-search.c
1 /* Copyright (c) 2006, Nokia Corporation
2  * All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are
6  * met:
7  *
8  * * Redistributions of source code must retain the above copyright
9  *   notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  *   notice, this list of conditions and the following disclaimer in the
12  *   documentation and/or other materials provided with the distribution.
13  * * Neither the name of the Nokia Corporation nor the names of its
14  *   contributors may be used to endorse or promote products derived from
15  *   this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
18  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
20  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
21  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 #ifndef _GNU_SOURCE
31 #define _GNU_SOURCE
32 #endif
33
34 #ifdef HAVE_CONFIG_H
35 #include <config.h>
36 #endif
37
38 #include <string.h>
39
40 #include <tny-shared.h>
41 #include <tny-folder.h>
42 #include <tny-folder-store.h>
43 #include <tny-list.h>
44 #include <tny-iterator.h>
45 #include <tny-simple-list.h>
46 #include <tny-camel-imap-store-account.h>
47 #include <tny-camel-pop-store-account.h>
48
49 #include "modest-text-utils.h"
50 #include "modest-account-mgr.h"
51 #include "modest-tny-account-store.h"
52 #include "modest-tny-account.h"
53 #include "modest-tny-folder.h"
54 #include "modest-search.h"
55 #include "modest-runtime.h"
56 #include "modest-platform.h"
57
58 static gchar *
59 g_strdup_or_null (const gchar *str)
60 {
61         gchar *string = NULL;
62
63         if  (str != NULL) {
64                 string = g_strdup (str);
65         }
66
67         return string;
68 }
69
70 static GList*
71 add_hit (GList *list, TnyHeader *header, TnyFolder *folder)
72 {
73         ModestSearchResultHit *hit;
74         TnyHeaderFlags   flags;
75         char            *furl;
76         char            *msg_url;
77         const char      *uid;
78         const char      *subject;
79         const char      *sender;
80
81         hit = g_slice_new0 (ModestSearchResultHit);
82
83         furl = tny_folder_get_url_string (folder);
84         printf ("DEBUG: %s: folder URL=%s\n", __FUNCTION__, furl);
85         if (!furl) {
86                 g_warning ("%s: tny_folder_get_url_string(): returned NULL for folder. Folder name=%s\n", __FUNCTION__, tny_folder_get_name (folder));
87         }
88         
89         /* Make sure that we use the short UID instead of the long UID,
90          * and/or find out what UID form is used when finding, in camel_data_cache_get().
91          * so we can find what we get. Philip is working on this.
92          */
93         uid = tny_header_get_uid (header);
94         if (!furl) {
95                 g_warning ("%s: tny_header_get_uid(): returned NULL for message with subject=%s\n", __FUNCTION__, tny_header_get_subject (header));
96         }
97         
98         msg_url = g_strdup_printf ("%s/%s", furl, uid);
99         g_free (furl);
100         
101         subject = tny_header_get_subject (header);
102         sender = tny_header_get_from (header);
103         
104         flags = tny_header_get_flags (header);
105
106         hit->msgid = msg_url;
107         hit->subject = g_strdup_or_null (subject);
108         hit->sender = g_strdup_or_null (sender);
109         hit->folder = g_strdup_or_null (tny_folder_get_name (folder));
110         hit->msize = tny_header_get_message_size (header);
111         hit->has_attachment = flags & TNY_HEADER_FLAG_ATTACHMENTS;
112         hit->is_unread = ! (flags & TNY_HEADER_FLAG_SEEN);
113         hit->timestamp = MIN (tny_header_get_date_received (header), tny_header_get_date_sent (header));
114         
115         return g_list_prepend (list, hit);
116 }
117
118 /** Call this until it returns FALSE or nread is set to 0.
119  * 
120  * @result: FALSE is something failed. */
121 static gboolean
122 read_chunk (TnyStream *stream, char *buffer, gsize count, gsize *nread)
123 {
124         gsize _nread = 0;
125         gssize res = 0;
126
127         while (_nread < count) {
128                 res = tny_stream_read (stream,
129                                        buffer + _nread, 
130                                        count - _nread);
131                 if (res == -1) { /* error */
132                         *nread = _nread;
133                         return FALSE;
134                 }
135
136                 _nread += res;
137                 
138                 if (res == 0) { /* no more bytes read. */
139                         *nread = _nread;
140                         return TRUE;
141                 }
142         }
143
144         *nread = _nread;
145         return TRUE;
146 }
147
148 #ifdef MODEST_HAVE_OGS
149 static gboolean
150 search_mime_part_ogs (TnyMimePart *part, ModestSearch *search)
151 {
152         TnyStream *stream = NULL;
153         char       buffer[4096];
154         const gsize len = sizeof (buffer);
155         gsize      nread = 0;
156         gboolean   is_text_html = FALSE;
157         gboolean   found = FALSE;
158         gboolean   res = FALSE;
159
160         gboolean is_text = tny_mime_part_content_type_is (part, "text/*");
161         if (!is_text) {
162                 g_debug ("%s: tny_mime_part_content_type_is() failed to find a "
163                         "text/* MIME part. Content type is %s", 
164                 __FUNCTION__, "Unknown (calling tny_mime_part_get_content_type(part) causes a deadlock)");
165                 
166             /* Retry with specific MIME types, because the wildcard seems to fail
167              * in tinymail.
168              * Actually I'm not sure anymore that it fails, so we could probalby 
169              * remove this later: murrayc */
170             is_text = (
171                 tny_mime_part_content_type_is (part, "text/plain") ||
172                 tny_mime_part_content_type_is (part, "text/html") );
173                 
174                 if (is_text) {
175                   g_debug ("%s: Retryting with text/plain or text/html succeeded", 
176                         __FUNCTION__);  
177                 }
178         }
179         
180         if (!is_text) {
181             return FALSE;
182         }
183         
184         is_text_html = tny_mime_part_content_type_is (part, "text/html");
185
186         stream = tny_mime_part_get_stream (part);
187
188         res = read_chunk (stream, buffer, len, &nread);
189         while (res && (nread > 0)) {
190                 /* search->text_searcher was instantiated in modest_search_folder(). */
191                 
192                 if (is_text_html) {
193
194                         found = ogs_text_searcher_search_html (search->text_searcher,
195                                                                buffer,
196                                                                nread,
197                                                                nread < len);
198                 } else {
199                         found = ogs_text_searcher_search_text (search->text_searcher,
200                                                                buffer,
201                                                                nread);
202                 }
203
204                 if (found) {
205                         break;
206                 }
207                 
208                 nread = 0;
209                 res = read_chunk (stream, buffer, len, &nread);
210         }
211
212         if (!found) {
213                 found = ogs_text_searcher_search_done (search->text_searcher);
214         }
215
216         ogs_text_searcher_reset (search->text_searcher);
217         
218         /* debug stuff:
219         if (!found) {
220                 buffer[len -1] = 0;
221                 printf ("DEBUG: %s: query %s was not found in message text: %s\n", 
222                         __FUNCTION__, search->query, buffer);   
223                 
224         } else {
225                 printf ("DEBUG: %s: found.\n", __FUNCTION__);   
226         }
227         */
228
229         return found;
230 }
231
232 #else
233
234 static gboolean
235 search_mime_part_strcmp (TnyMimePart *part, ModestSearch *search)
236 {
237         TnyStream *stream;
238         char       buffer[8193];
239         char      *chunk[2];
240         gssize     len;
241         gsize     nread;
242         gboolean   found;
243         gboolean   res;
244
245         if (! tny_mime_part_content_type_is (part, "text/*")) {
246                 g_debug ("%s: No text MIME part found.\n", __FUNCTION__);
247                 return FALSE;
248         }
249
250         found = FALSE;
251         len = (sizeof (buffer) - 1) / 2;
252
253         if (strlen (search->body) > len) {
254                 g_warning ("Search term bigger then chunk."
255                            "We might not find everything");     
256         }
257
258         stream = tny_mime_part_get_stream (part);
259
260         memset (buffer, 0, sizeof (buffer));
261         chunk[0] = buffer;
262         chunk[1] = buffer + len;
263
264         res = read_chunk (stream, chunk[0], len, &nread);
265
266         if (res == FALSE) {
267                 goto done;
268         }
269
270         found = !modest_text_utils_utf8_strcmp (search->body,
271                                                 buffer,
272                                                 TRUE);
273         if (found) {
274                 goto done;
275         }
276
277         /* This works like this:
278          * buffer: [ooooooooooo|xxxxxxxxxxxx|\0] 
279          *          ^chunk[0]  ^chunk[1]
280          * we have prefilled chunk[0] now we always read into chunk[1]
281          * and then move the content of chunk[1] to chunk[0].
282          * The idea is to prevent not finding search terms that are
283          * spread across 2 reads:        
284          * buffer: [ooooooooTES|Txxxxxxxxxxx|\0] 
285          * We should catch that because we always search the whole
286          * buffer not only the chunks.
287          *
288          * Of course that breaks for search terms > sizeof (chunk)
289          * but sizeof (chunk) should be big enough I guess (see
290          * the g_warning in this function)
291          * */   
292         while ((res = read_chunk (stream, chunk[1], len, &nread))) {
293                 buffer[len + nread] = '\0';
294
295                 found = !modest_text_utils_utf8_strcmp (search->body,
296                                                         buffer,
297                                                         TRUE);
298
299                 if ((found)||(nread == 0)) {
300                         break;
301                 }
302
303                 /* also move the \0 */
304                 g_memmove (chunk[0], chunk[1], len + 1);
305         }
306
307 done:
308         g_object_unref (stream);
309         return found;
310 }
311 #endif /*MODEST_HAVE_OGS*/
312
313 static gboolean
314 search_string (const char      *what,
315                const char      *where,
316                ModestSearch    *search)
317 {
318         gboolean found;
319 #ifdef MODEST_HAVE_OGS
320         if (search->flags & MODEST_SEARCH_USE_OGS) {
321                 found = ogs_text_searcher_search_text (search->text_searcher,
322                                                        where,
323                                                        strlen (where));
324
325                 ogs_text_searcher_reset (search->text_searcher);
326         } else {
327 #endif
328                 if (what == NULL || where == NULL) {
329                         return FALSE;
330                 }
331
332                 found = !modest_text_utils_utf8_strcmp (what, where, TRUE);
333 #ifdef MODEST_HAVE_OGS
334         }
335 #endif
336         return found;
337 }
338
339
340 static gboolean search_mime_part_and_child_parts (TnyMimePart *part, ModestSearch *search)
341 {
342         gboolean found = FALSE;
343         #ifdef MODEST_HAVE_OGS
344         found = search_mime_part_ogs (part, search);
345         #else
346         found = search_mime_part_strcmp (part, search);
347         #endif
348
349         if (found) {    
350                 return found;           
351         }
352         
353         /* Check the child part too, recursively: */
354         TnyList *child_parts = tny_simple_list_new ();
355         tny_mime_part_get_parts (TNY_MIME_PART (part), child_parts);
356
357         TnyIterator *piter = tny_list_create_iterator (child_parts);
358         while (!found && !tny_iterator_is_done (piter)) {
359                 TnyMimePart *pcur = (TnyMimePart *) tny_iterator_get_current (piter);
360                 if (pcur) {
361                         found = search_mime_part_and_child_parts (pcur, search);
362
363                         g_object_unref (pcur);
364                 }
365
366                 tny_iterator_next (piter);
367         }
368
369         g_object_unref (piter);
370         g_object_unref (child_parts);
371         
372         return found;
373 }
374
375 /**
376  * modest_search:
377  * @folder: a #TnyFolder instance
378  * @search: a #ModestSearch query
379  *
380  * This operation will search @folder for headers that match the query @search,
381  * if the folder itself matches the query.
382  * It will return a doubly linked list with URIs that point to the message.
383  **/
384 GList *
385 modest_search_folder (TnyFolder *folder, ModestSearch *search)
386 {
387         /* Check that we should be searching this folder. */
388         /* Note that we don't try to search sub-folders. 
389          * Maybe we should, but that should be specified. */
390         if (search->folder && strlen (search->folder)) {
391                 if (!strcmp (search->folder, "outbox")) {
392                         if (modest_tny_folder_guess_folder_type (folder) != TNY_FOLDER_TYPE_OUTBOX) {
393                                 return NULL;
394                         }
395                 } else if (strcmp (tny_folder_get_id (folder), search->folder) != 0) {
396                         return NULL;
397                 }
398         }
399         
400         GList *retval = NULL;
401         TnyIterator *iter = NULL;
402         TnyList *list = NULL;
403         
404 #ifdef MODEST_HAVE_OGS
405         if (search->flags & MODEST_SEARCH_USE_OGS) {
406         
407                 if (search->text_searcher == NULL && search->query != NULL) {
408                         OgsTextSearcher *text_searcher; 
409
410                         text_searcher = ogs_text_searcher_new (FALSE);
411                         ogs_text_searcher_parse_query (text_searcher, search->query);
412                         search->text_searcher = text_searcher;
413                 }
414         }
415 #endif
416
417         list = tny_simple_list_new ();
418         GError *error = NULL;
419         tny_folder_get_headers (folder, list, FALSE /* don't refresh */, &error);
420         if (error) {
421                 g_warning ("%s: tny_folder_get_headers() failed with error=%s.\n", 
422                 __FUNCTION__, error->message);
423                 g_error_free (error);
424                 error = NULL;   
425         }
426
427         iter = tny_list_create_iterator (list);
428
429         while (!tny_iterator_is_done (iter)) {
430                 TnyHeader *cur = (TnyHeader *) tny_iterator_get_current (iter);
431                 const time_t t = tny_header_get_date_sent (cur);
432                 gboolean found = FALSE;
433                 
434                 /* Ignore deleted (not yet expunged) emails: */
435                 if (tny_header_get_flags(cur) & TNY_HEADER_FLAG_DELETED)
436                         goto go_next;
437                         
438                 if (search->flags & MODEST_SEARCH_BEFORE)
439                         if (!(t <= search->end_date))
440                                 goto go_next;
441
442                 if (search->flags & MODEST_SEARCH_AFTER)
443                         if (!(t >= search->start_date))
444                                 goto go_next;
445
446                 if (search->flags & MODEST_SEARCH_SIZE)
447                         if (tny_header_get_message_size (cur) < search->minsize)
448                                 goto go_next;
449
450                 if (search->flags & MODEST_SEARCH_SUBJECT) {
451                         const char *str = tny_header_get_subject (cur);
452
453                         if ((found = search_string (search->subject, str, search))) {
454                             retval = add_hit (retval, cur, folder);
455                         }
456                 }
457                 
458                 if (!found && search->flags & MODEST_SEARCH_SENDER) {
459                         char *str = g_strdup (tny_header_get_from (cur));
460
461                         if ((found = search_string (search->from, (const gchar *) str, search))) {
462                                 retval = add_hit (retval, cur, folder);
463                         }
464                         g_free (str);
465                 }
466                 
467                 if (!found && search->flags & MODEST_SEARCH_RECIPIENT) {
468                         const char *str = tny_header_get_to (cur);
469
470                         if ((found = search_string (search->recipient, str, search))) {
471                                 retval = add_hit (retval, cur, folder);
472                         }
473                 }
474         
475                 if (!found && search->flags & MODEST_SEARCH_BODY) {
476                         TnyHeaderFlags flags;
477                         GError      *err = NULL;
478                         TnyMsg      *msg = NULL;
479
480                         flags = tny_header_get_flags (cur);
481
482                         if (!(flags & TNY_HEADER_FLAG_CACHED)) {
483                                 goto go_next;
484                         }
485
486                         msg = tny_folder_get_msg (folder, cur, &err);
487
488                         if (err != NULL || msg == NULL) {
489                                 g_warning ("%s: Could not get message.\n", __FUNCTION__);
490                                 g_error_free (err);
491
492                                 if (msg) {
493                                         g_object_unref (msg);
494                                 }
495                         } else {        
496                         
497                                 found = search_mime_part_and_child_parts (TNY_MIME_PART (msg), 
498                                                                           search);
499                                 if (found) {
500                                         retval = add_hit (retval, cur, folder);
501                                 }
502                         }
503                         
504                         if (msg)
505                                 g_object_unref (msg);
506                 }
507
508 go_next:
509                 g_object_unref (cur);
510                 tny_iterator_next (iter);
511         }
512
513         g_object_unref (iter);
514         g_object_unref (list);
515         return retval;
516 }
517
518 GList *
519 modest_search_account (TnyAccount *account, ModestSearch *search)
520 {
521         TnyFolderStore      *store;
522         TnyIterator         *iter;
523         TnyList             *folders;
524         GList               *hits;
525         GError              *error;
526
527         error = NULL;
528         hits = NULL;
529
530         store = TNY_FOLDER_STORE (account);
531
532         folders = tny_simple_list_new ();
533         tny_folder_store_get_folders (store, folders, NULL, &error);
534         
535         if (error != NULL) {
536                 g_object_unref (folders);
537                 return NULL;
538         }
539
540         iter = tny_list_create_iterator (folders);
541         while (!tny_iterator_is_done (iter)) {
542                 TnyFolder *folder = NULL;
543                 GList     *res = NULL;
544
545                 folder = TNY_FOLDER (tny_iterator_get_current (iter));
546                 if (folder) {
547                         /* g_debug ("DEBUG: %s: searching folder %s.", 
548                                 __FUNCTION__, tny_folder_get_name (folder)); */
549                 
550                         res = modest_search_folder (folder, search);
551
552                         if (res != NULL) {
553                                 if (hits == NULL) {
554                                         hits = res;
555                                 } else {
556                                         hits = g_list_concat (hits, res);
557                                 }
558                         }
559
560                         g_object_unref (folder);
561                 }
562
563                 tny_iterator_next (iter);
564         }
565
566         g_object_unref (iter);
567         g_object_unref (folders);
568
569         /* printf ("DEBUG: %s: hits length = %d\n", __FUNCTION__, g_list_length (hits)); */
570         return hits;
571 }
572
573 GList *
574 modest_search_all_accounts (ModestSearch *search)
575 {
576         /* printf ("DEBUG: %s: query=%s\n", __FUNCTION__, search->query); */
577         ModestTnyAccountStore *astore;
578         TnyList               *accounts;
579         TnyIterator           *iter;
580         GList                 *hits;
581
582         hits = NULL;
583         astore = modest_runtime_get_account_store ();
584
585         accounts = tny_simple_list_new ();
586         tny_account_store_get_accounts (TNY_ACCOUNT_STORE (astore),
587                                         accounts,
588                                         TNY_ACCOUNT_STORE_STORE_ACCOUNTS);
589
590         iter = tny_list_create_iterator (accounts);
591         while (!tny_iterator_is_done (iter)) {
592                 TnyAccount *account = NULL;
593                 GList      *res = NULL;
594
595                 account = TNY_ACCOUNT (tny_iterator_get_current (iter));
596                 if (account) {
597                         /* g_debug ("DEBUG: %s: Searching account %s",
598                          __FUNCTION__, tny_account_get_name (account)); */
599                          
600                         /* Give the account time to go online if necessary, 
601                          * for instance if this is immediately after startup,
602                          * after D-Bus activation: */
603                         modest_platform_check_and_wait_for_account_is_online (account);
604                         
605                         /* Search: */
606                         res = modest_search_account (account, search);
607                         
608                         if (res != NULL) {      
609                                 if (hits == NULL) {
610                                         hits = res;
611                                 } else {
612                                         hits = g_list_concat (hits, res);
613                                 }
614                         }
615                         
616                         g_object_unref (account);
617                 }
618
619                 tny_iterator_next (iter);
620         }
621
622         g_object_unref (accounts);
623         g_object_unref (iter);
624
625         /* printf ("DEBUG: %s: end: hits length=%d\n", __FUNCTION__, g_list_length(hits)); */
626         return hits;
627 }
628
629