Changed search to retry automatically couple of times before failing.
[jenirok] / src / common / eniro.cpp
1 /*
2  * This file is part of Jenirok.
3  *
4  * Jenirok is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * Jenirok is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with Jenirok.  If not, see <http://www.gnu.org/licenses/>.
16  *
17  */
18
19 #include <QtCore/QDebug>
20 #include "eniro.h"
21
22 namespace
23 {
24     static const QString SITE_URLS[Eniro::SITE_COUNT] =
25     {
26             "http://wap.eniro.fi/",
27             "http://wap.eniro.se/",
28             "http://wap.eniro.dk/"
29     };
30
31     static const QString SITE_NAMES[Eniro::SITE_COUNT] =
32     {
33          "finnish",
34          "swedish",
35          "danish"
36     };
37
38     static const QString SITE_IDS[Eniro::SITE_COUNT] =
39     {
40          "fi",
41          "se",
42          "dk"
43     };
44
45     static const QString INVALID_LOGIN_STRING = "Invalid login details";
46     static const QString TIMEOUT_STRING = "Request timed out";
47     static const QString PERSON_REGEXP = "<td class=\"hTd2\">(.*)<b>(.*)</td>";
48     static const QString YELLOW_REGEXP = "<td class=\"hTd2\">(.*)<span class=\"gray\">(.*)</td>";
49     static const QString SINGLE_REGEXP = "<div class=\"header\">(.*)</div>(.*)<div class=\"callRow\">(.*)(<div class=\"block\">|</p>(.*)<br/>|</p>(.*)<br />)";
50     static const QString NUMBER_REGEXP = "<div class=\"callRow\">(.*)</div>";
51     static const QString LOGIN_CHECK = "<input class=\"inpTxt\" id=\"loginformUsername\"";
52 }
53
54 Eniro::Eniro(QObject *parent): Source(parent), site_(Eniro::FI),
55 loggedIn_(false), username_(""), password_(""),
56 timerId_(0), pendingSearches_(), pendingNumberRequests_()
57 {
58 }
59
60 Eniro::~Eniro()
61 {
62 }
63
64 void Eniro::abort()
65 {
66     Source::abort();
67
68     for(searchMap::iterator sit = pendingSearches_.begin();
69     sit != pendingSearches_.end(); sit++)
70     {
71         if(sit.value() != 0)
72         {
73             delete sit.value();
74             sit.value() = 0;
75         }
76     }
77
78     pendingSearches_.clear();
79
80     for(numberMap::iterator nit = pendingNumberRequests_.begin();
81     nit != pendingNumberRequests_.end(); nit++)
82     {
83         if(nit.value() != 0)
84         {
85             delete nit.value();
86             nit.value() = 0;
87         }
88     }
89
90     pendingNumberRequests_.clear();
91     pendingLoginRequests_.clear();
92 }
93
94 void Eniro::setSite(Eniro::Site site)
95 {
96     site_ = site;
97 }
98
99 void Eniro::timerEvent(QTimerEvent* t)
100 {
101     Q_UNUSED(t);
102
103     int currentId = http_.currentId();
104
105     if(currentId)
106     {
107         searchMap::const_iterator it = pendingSearches_.find(currentId);
108
109         if(it != pendingSearches_.end())
110         {
111             QVector <Eniro::Result> results = it.value()->results;
112             SearchDetails details = it.value()->details;
113
114             abort();
115
116             setError(TIMEOUT, TIMEOUT_STRING);
117
118             emit requestFinished(results, details, true);
119         }
120     }
121 }
122
123 void Eniro::login(QString const& username,
124                   QString const& password)
125 {
126     username_ = username;
127     password_ = password;
128     loggedIn_ = true;
129 }
130
131 void Eniro::logout()
132 {
133     username_ = "";
134     password_ = "";
135     loggedIn_ = false;
136 }
137
138 void Eniro::search(SearchDetails const& details)
139 {
140     resetTimeout();
141
142     SearchType type = details.type;
143
144     // Only logged in users can use other than person search
145     if(!loggedIn_ && site_ == FI)
146     {
147         type = PERSONS;
148     }
149
150     QUrl url = createUrl(details.query, details.location);
151     QString what;
152
153     if(loggedIn_ || site_ != FI)
154     {
155         switch(type)
156         {
157         case YELLOW_PAGES:
158             what = "mobcs";
159             break;
160
161         case PERSONS:
162             what = "mobwp";
163             break;
164
165         default:
166             what = "moball";
167             break;
168         }
169
170     }
171     else
172     {
173         what = "moball";
174     }
175
176     url.addQueryItem("what", what);
177
178     http_.setHost(url.host(), url.port(80));
179     int id = http_.get(url.encodedPath() + '?' + url.encodedQuery());
180
181     QVector <Source::Result> results;
182
183     // Store search data for later identification
184     SearchData* newData = new SearchData;
185     newData->details = details;
186     newData->results = results;
187     newData->foundNumbers = 0;
188     newData->numbersTotal = 0;
189
190     // Store request id so that it can be identified later
191     pendingSearches_[id] = newData;
192
193 }
194
195 void Eniro::handleHttpData(int id, QByteArray const& data)
196 {
197     searchMap::const_iterator searchIt;
198     numberMap::const_iterator numberIt;
199
200     // Check if request is pending search request
201     if((searchIt = pendingSearches_.find(id)) !=
202         pendingSearches_.end())
203     {
204         if(data.isEmpty())
205         {
206             setError(CONNECTION_FAILURE, "Server returned empty data");
207             emitRequestFinished(id, searchIt.value(), true);
208             return;
209         }
210
211         // Load results from html data
212         loadResults(id, data);
213     }
214
215     // Check if request is pending number requests
216     else if((numberIt = pendingNumberRequests_.find(id)) !=
217         pendingNumberRequests_.end())
218     {
219         if(data.isEmpty())
220         {
221             setError(CONNECTION_FAILURE, "Server returned empty data");
222             emitRequestFinished(id, searchIt.value(), true);
223             return;
224         }
225
226         // Load number from html data
227         loadNumber(id, data);
228     }
229
230     // Check for login request
231     else if(pendingLoginRequests_.find(id) !=
232         pendingLoginRequests_.end())
233     {
234         bool success = true;
235
236         // If html source contains LOGIN_CHECK, login failed
237         if(data.indexOf(LOGIN_CHECK) != -1)
238         {
239             success = false;
240         }
241
242         emit loginStatus(success);
243     }
244
245 }
246
247 void Eniro::handleHttpError(int id)
248 {
249     searchMap::const_iterator searchIt;
250     numberMap::const_iterator numberIt;
251
252     // Check if request is pending search request
253     if((searchIt = pendingSearches_.find(id)) !=
254         pendingSearches_.end())
255     {
256         setError(CONNECTION_FAILURE, http_.errorString());
257         emitRequestFinished(id, searchIt.value(), true);
258     }
259
260     // Check if request is pending number requests
261     else if((numberIt = pendingNumberRequests_.find(id)) !=
262         pendingNumberRequests_.end())
263     {
264         setError(CONNECTION_FAILURE, http_.errorString());
265         delete pendingNumberRequests_[id];
266         pendingNumberRequests_.remove(id);
267     }
268
269     // Check for login request
270     else if(pendingLoginRequests_.find(id) !=
271         pendingLoginRequests_.end())
272     {
273         emit loginStatus(false);
274     }
275 }
276
277 // Loads results from html source code
278 void Eniro::loadResults(int id, QString const& httpData)
279 {
280     searchMap::iterator it = pendingSearches_.find(id);
281
282     QRegExp rx("((" + YELLOW_REGEXP + ")|(" + PERSON_REGEXP + ")|(" + SINGLE_REGEXP + "))");
283     rx.setMinimal(true);
284
285     bool requestsPending = false;
286     int pos = 0;
287     QString data;
288
289     // Find all matches
290     while((pos = rx.indexIn(httpData, pos)) != -1)
291     {
292         pos += rx.matchedLength();
293
294         data = rx.cap(1);
295
296         data = stripTags(data);
297
298         QStringList rows = data.split('\n');
299
300         for(int i = 0; i < rows.size(); i++)
301         {
302             // Remove white spaces
303             QString trimmed = rows.at(i).trimmed().toLower();
304
305             // Remove empty strings
306             if(trimmed.isEmpty())
307             {
308                 rows.removeAt(i);
309                 i--;
310             }
311             else
312             {
313                 // Convert words to uppercase
314                 rows[i] = ucFirst(trimmed);
315             }
316         }
317
318         Result result;
319
320         switch(site_)
321         {
322         case FI:
323             result.country = "Finland";
324             break;
325         case SE:
326             result.country = "Sweden";
327             break;
328         case DK:
329             result.country = "Denmark";
330             break;
331         }
332
333         int size = rows.size();
334
335         switch(size)
336         {
337         case 1:
338             result.name = rows[0];
339             break;
340
341         case 2:
342             result.name = rows[0];
343             result.city = rows[1];
344             break;
345
346         case 3:
347             if(isPhoneNumber(rows[1]))
348             {
349                 result.name = rows[0];
350                 result.number = cleanUpNumber(rows[1]);
351                 result.city = rows[2];
352             }
353             else
354             {
355                 result.name = rows[0];
356                 result.street = rows[1];
357                 result.city = rows[2];
358             }
359             break;
360
361         case 4:
362             result.name = rows[0];
363             // Remove slashes and spaces from number
364             result.number = cleanUpNumber(rows[1]);
365             result.street = rows[2];
366             result.city = rows[3];
367             break;
368
369         default:
370             bool ok = false;
371
372             for(int a = 0; a < size && a < 8; a++)
373             {
374                 if(isPhoneNumber(rows[a]))
375                 {
376                     result.name = rows[0];
377                     result.number = cleanUpNumber(rows[a]);
378
379                     for(int i = a + 1; i < size && i < 8; i++)
380                     {
381                         if(!isPhoneNumber(rows[i]) && size > i + 1 && isStreet(rows[i]))
382                         {
383                             result.street = rows[i];
384                             result.city = rows[i+1];
385                             ok = true;
386                             break;
387                         }
388                     }
389
390                 }
391
392             }
393
394             if(ok)
395             {
396                 break;
397             }
398
399             continue;
400
401         }
402
403         it.value()->results.push_back(result);
404
405         unsigned int foundResults = ++(it.value()->numbersTotal);
406
407         // If phone number search is enabled, we have to make another
408         // request to find it out
409         if(getFindNumber() && size < 4 && (loggedIn_ || site_ != FI) &&
410                 it.value()->details.type != YELLOW_PAGES)
411         {
412             requestsPending = true;
413             getNumberForResult(id, it.value()->results.size() - 1, it.value()->details);
414         }
415         // Otherwise result is ready
416         else
417         {
418             emit resultAvailable(result, it.value()->details);
419         }
420
421         unsigned int maxResults = getMaxResults();
422
423         // Stop searching if max results is reached
424         if(maxResults && (foundResults >= maxResults))
425         {
426             break;
427         }
428     }
429
430     // If there were no results or no phone numbers needed to
431     // be fetched, the whole request is ready
432     if(it.value()->numbersTotal == 0 || !requestsPending)
433     {
434         bool error = false;
435
436         if(httpData.indexOf(LOGIN_CHECK) != -1)
437         {
438             setError(INVALID_LOGIN, INVALID_LOGIN_STRING),
439             error = true;
440         }
441
442         emitRequestFinished(it.key(), it.value(), error);
443     }
444 }
445
446 // Loads phone number from html source
447 void Eniro::loadNumber(int id, QString const& result)
448 {
449     numberMap::iterator numberIt = pendingNumberRequests_.find(id);
450
451     // Make sure that id exists in pending number requests
452     if(numberIt == pendingNumberRequests_.end() || numberIt.value() == 0)
453     {
454         return;
455     }
456
457     searchMap::iterator searchIt = pendingSearches_.find(numberIt.value()->searchId);
458
459     if(searchIt == pendingSearches_.end() || searchIt.value() == 0)
460     {
461         return;
462     }
463
464     QRegExp rx(NUMBER_REGEXP);
465     rx.setMinimal(true);
466
467     int pos = 0;
468     bool error = true;
469
470     if((pos = rx.indexIn(result, pos)) != -1)
471     {
472         QString data = rx.cap(1);
473         data = stripTags(data);
474
475         QString trimmed = data.trimmed();
476
477         if(!trimmed.isEmpty())
478         {
479             // Remove whitespaces from number
480             searchIt.value()->results[numberIt.value()->index].number = cleanUpNumber(trimmed);
481
482             emit resultAvailable(searchIt.value()->results[numberIt.value()->index], searchIt.value()->details);
483
484             unsigned int found = ++searchIt.value()->foundNumbers;
485
486             // Check if all numbers have been found
487             if(found >= searchIt.value()->numbersTotal)
488             {
489                 emitRequestFinished(searchIt.key(), searchIt.value(), false);
490             }
491
492             // If number was found, there was no error
493             error = false;
494         }
495     }
496
497     if(error)
498     {
499         setError(INVALID_LOGIN, INVALID_LOGIN_STRING);
500         emitRequestFinished(searchIt.key(), searchIt.value(), true);
501     }
502
503     // Remove number request
504     int key = numberIt.key();
505
506     delete pendingNumberRequests_[key];
507     pendingNumberRequests_[key] = 0;
508     pendingNumberRequests_.remove(key);
509
510 }
511
512 QUrl Eniro::createUrl(QString const& query, QString const& location)
513 {
514     QUrl url(SITE_URLS[site_] + "query");
515
516     if(!query.isEmpty())
517     {
518         url.addQueryItem("search_word", query);
519     }
520
521     if(!location.isEmpty())
522     {
523         url.addQueryItem("geo_area", location);
524     }
525
526     unsigned int maxResults = getMaxResults();
527
528     if(maxResults)
529     {
530         url.addQueryItem("hpp", QString::number(maxResults));
531     }
532     if(loggedIn_ && site_ == FI)
533     {
534         url.addQueryItem("login_name", username_);
535         url.addQueryItem("login_password", password_);
536     }
537
538     fixUrl(url);
539
540     return url;
541 }
542
543 // Creates a new request for phone number retrieval
544 void Eniro::getNumberForResult(int id, int index, SearchDetails const& details)
545 {
546     QUrl url = createUrl(details.query, details.location);
547     url.addQueryItem("what", "mobwpinfo");
548     url.addQueryItem("search_number", QString::number(index + 1));
549
550     http_.setHost(url.host(), url.port(80));
551     int requestId = http_.get(url.encodedPath() + '?' + url.encodedQuery());
552     NumberData* number = new NumberData;
553     number->searchId = id;
554     number->index = index;
555     pendingNumberRequests_[requestId] = number;
556
557 }
558
559 void Eniro::emitRequestFinished(int key, SearchData* data, bool error)
560 {
561     emit requestFinished(data->results, data->details, error);
562     delete pendingSearches_[key];
563     pendingSearches_[key] = 0;
564     pendingSearches_.remove(key);
565 }
566
567
568 QMap <Eniro::Site, Eniro::SiteDetails> Eniro::getSites()
569 {
570     QMap <Site, SiteDetails> sites;
571
572     for(int i = 0; i < SITE_COUNT; i++)
573     {
574         SiteDetails details;
575         details.name = SITE_NAMES[i];
576         details.id = SITE_IDS[i];
577         sites[static_cast<Site>(i)] = details;
578     }
579
580     return sites;
581 }
582
583 Eniro::Site Eniro::stringToSite(QString const& str)
584 {
585     Site site = FI;
586     QString lower = str.toLower();
587
588     for(int i = 0; i < SITE_COUNT; i++)
589     {
590         if(lower == SITE_NAMES[i] || lower == SITE_IDS[i])
591         {
592             site = static_cast <Site> (i);
593             break;
594         }
595     }
596
597     return site;
598 }
599
600 bool Eniro::isStreet(QString const& str)
601 {
602     static QRegExp number("([0-9]+)");
603     int a = number.indexIn(str);
604     int b = str.indexOf(" ");
605
606     if((a == -1 && b == -1) || (a != -1 && b != -1))
607     {
608         return true;
609     }
610
611     return false;
612 }