Changed search to retry automatically couple of times before failing.
[jenirok] / src / common / dasoertliche.cpp
1 /*
2  * This file is part of Jenirok.
3  *
4  * Jenirok is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * Jenirok is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with Jenirok.  If not, see <http://www.gnu.org/licenses/>.
16  *
17  */
18
19 #include <QtCore/QDebug>
20 #include "dasoertliche.h"
21
22
23 DasOertliche::DasOertliche(QObject* parent): Source(parent)
24 {
25 }
26
27 DasOertliche::~DasOertliche()
28 {
29     abort();
30 }
31
32 void DasOertliche::abort()
33 {
34     Source::abort();
35
36     for(int i = 0; i < pendingSearches_.size(); i++)
37     {
38         delete pendingSearches_[i];
39         pendingSearches_[i] = 0;
40     }
41
42     pendingSearches_.clear();
43
44 }
45
46 void DasOertliche::search(Source::SearchDetails const& details)
47 {
48     resetTimeout();
49
50     SearchData* newData = new SearchData;
51     newData->details = details;
52     newData->currentPage = 1;
53     newData->finishedSearches = 0;
54
55     if(details.type == Source::BOTH)
56     {
57         newData->totalSearches = 2;
58         Source::SearchDetails tmpDetails = details;
59         tmpDetails.type = Source::PERSONS;
60         int id1 = sendQuery(tmpDetails, 1);
61         tmpDetails.type = Source::YELLOW_PAGES;
62         int id2 = sendQuery(tmpDetails, 1);
63         newData->searchIds.insert(id1);
64         newData->searchIds.insert(id2);
65     }
66     else
67     {
68         newData->totalSearches = 1;
69         int id = sendQuery(details, 1);
70         newData->searchIds.insert(id);
71     }
72
73     pendingSearches_.push_back(newData);
74 }
75
76 void DasOertliche::handleHttpData(int id, QByteArray const& data)
77 {
78     QString decoded = QString::fromUtf8(data.data());
79
80     for(int i = 0; i < pendingSearches_.size(); i++)
81     {
82         if(pendingSearches_.at(i) && pendingSearches_.at(i)->searchIds.find(id) !=
83             pendingSearches_.at(i)->searchIds.end())
84         {
85             addNumbers(pendingSearches_.at(i), decoded, i);
86             break;
87         }
88     }
89 }
90
91 void DasOertliche::handleHttpError(int id)
92 {
93     for(int i = 0; i < pendingSearches_.size(); i++)
94     {
95         if(pendingSearches_.at(i) && pendingSearches_.at(i)->searchIds.find(id) !=
96             pendingSearches_.at(i)->searchIds.end())
97         {
98
99             setError(Source::CONNECTION_FAILURE, http_.errorString());
100             emitRequestFinished(pendingSearches_.at(i), true, i);
101             break;
102         }
103     }
104 }
105
106 void DasOertliche::addNumbers(SearchData* searchData,
107                            QString const& data,
108                            int index)
109 {
110     //qDebug() << "data: " << data;
111
112     if(data.indexOf("<div class=\"counter c02\">") == -1)
113     {
114         addOnlyNumber(searchData, data, index);
115         return;
116     }
117
118     int pos = 0;
119     QRegExp rx("<div class=\"adresse\">(.*)<div class=\"adresse\">");
120     QRegExp name("<div class=\"counter.*\">*.*<a.*>(.*)<span>");
121     QRegExp number("<td class=\"nummer\">(.*)<br/>");
122     QRegExp address("<div class=\"strasse\">*(.*),(.*)</div>");
123
124     rx.setMinimal(true);
125     name.setMinimal(true);
126     address.setMinimal(true);
127     number.setMinimal(true);
128
129     int maxResults = getMaxResults();
130
131     while((pos = rx.indexIn(data, pos)) != -1)
132     {
133         pos += rx.matchedLength();
134
135         if(searchData->results.size() >= maxResults)
136         {
137             break;
138         }
139
140         QString part = rx.cap(1);
141         Source::Result result;
142         QString nameStr;
143         QString numberStr;
144         QString streetStr;
145         QString cityStr;
146
147         if(name.indexIn(part) != -1)
148         {
149             nameStr = name.cap(1);
150         }
151
152         if(address.indexIn(part) != -1)
153         {
154             streetStr = address.cap(1);
155             cityStr = address.cap(2);
156         }
157
158         if(number.indexIn(part) != -1)
159         {
160             numberStr = number.cap(1);
161         }
162
163         if(formatResult(nameStr, numberStr, streetStr,
164                         cityStr, result))
165         {
166             emit resultAvailable(result, searchData->details);
167             searchData->results.push_back(result);
168         }
169
170     }
171
172     searchData->finishedSearches++;
173
174     if(searchData->results.size() >= maxResults)
175     {
176         emitRequestFinished(searchData, false, index);
177     }
178     else
179     {
180         if(data.indexOf("Neste") > 0)
181         {
182             searchData->currentPage++;
183             int id = sendQuery(searchData->details, searchData->currentPage);
184             searchData->searchIds.insert(id);
185         }
186         else if(searchData->finishedSearches >= searchData->totalSearches)
187         {
188             emitRequestFinished(searchData, false, index);
189         }
190     }
191
192 }
193
194 void DasOertliche::addOnlyNumber(SearchData* searchData,
195                               QString const& data,
196                               int index)
197 {
198     QRegExp name("<div class=\"counter c01\">.*<a.*>(.*)<span>");
199     QRegExp number("<td class=\"nummer\">(.*)<br/>");
200     QRegExp address("<div class=\"strasse\">(.*),(.*)</div>");
201     name.setMinimal(true);
202     number.setMinimal(true);
203     address.setMinimal(true);
204
205     Source::Result result;
206
207     QString nameStr;
208     QString numberStr;
209     QString streetStr;
210     QString cityStr;
211
212     if(name.indexIn(data) != -1)
213     {
214         nameStr = name.cap(1);
215     }
216
217     if(number.indexIn(data) != -1)
218     {
219         numberStr = number.cap(1);
220     }
221
222     if(address.indexIn(data) != -1)
223     {
224         streetStr = address.cap(1);
225         cityStr = address.cap(2);
226     }
227
228     if(formatResult(nameStr, numberStr, streetStr,
229                     cityStr, result))
230     {
231         searchData->results.push_back(result);
232         emit resultAvailable(result, searchData->details);
233     }
234
235     emitRequestFinished(searchData, false, index);
236 }
237
238 bool DasOertliche::formatResult(QString& name, QString& number,
239                              QString& street, QString& city,
240                              Source::Result& result)
241 {
242     name = stripTags(name);
243     name = htmlEntityDecode(name);
244     result.name = name.trimmed();
245     number = stripTags(number);
246     number = cleanUpNumber(number);
247     result.number = number.trimmed();
248     street = stripTags(street);
249     street = htmlEntityDecode(street);
250     city = stripTags(city);
251     city = htmlEntityDecode(city);
252     result.street = street.trimmed();
253     result.city = city.trimmed();
254
255     if(!result.name.isEmpty() && (!getFindNumber() || !result.number.isEmpty()))
256     {
257         return true;
258     }
259
260     return false;
261 }
262
263 void DasOertliche::emitRequestFinished(SearchData* data,
264                                     bool error, int index)
265 {
266     QVector<Source::Result> results = data->results;
267     Source::SearchDetails details = data->details;
268
269     emit requestFinished(results, details, error);
270     delete pendingSearches_[index];
271     pendingSearches_[index] = 0;
272     pendingSearches_.removeAt(index);
273 }
274
275 int DasOertliche::sendQuery(Source::SearchDetails const& details,
276                          int page)
277 {
278     QUrl url("http://www.dasoertliche.de/");
279     QString query = details.query;
280
281     if(isPhoneNumber(query))
282     {
283         url.addQueryItem("form_name","search_inv");
284         url.addQueryItem("ph",query);
285     }
286     else
287     {
288         url.addQueryItem("form_name","search_reg");
289         url.addQueryItem("kw",query);
290         //More details?
291         if(!details.location.isEmpty())
292         {
293             url.addQueryItem("ci",details.location);
294         }
295     }
296
297     /* TODO No differentiation between personal and professional entries yet
298     if(details.type == Source::YELLOW_PAGES)
299     {
300         url.addQueryItem("t", "c");
301     }
302     else
303     {
304         url.addQueryItem("t", "p");
305     }
306    */
307
308     /* TODO No multi-page results yet.
309     if(page > 1)
310     {
311         url.addQueryItem("p", QString::number(page));
312     }
313     */
314
315     fixUrl(url);
316
317     //They want us to have a name, therefore setting User-Agent here...
318     QHttpRequestHeader header("GET", url.encodedPath()+ '?' + url.encodedQuery());
319     header.setValue("User-Agent","Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.0.10) Gecko/2009042316 Firefox/3.0.10");
320     header.setValue("Host", url.encodedHost());
321
322     http_.setHost(url.host(), url.port(80));
323
324     return http_.request(header);
325 }