Minor fix for another change in Google's movie pages and a fix in
[movie-schedule] / src / searchclients / movieschedulesearchclient.cpp
1 // Copyright 2010 Jochen Becher
2 //
3 // This file is part of MovieSchedule.
4 //
5 // MovieSchedule is free software: you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // MovieSchedule is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with MovieSchedule.  If not, see <http://www.gnu.org/licenses/>.
17
18 #include "movieschedulesearchclient.h"
19
20 #include "data/cinemaschedule.h"
21 #include "data/cinema.h"
22 #include "data/scheduleentry.h"
23 #include "data/movie.h"
24 #include "utils/timeutils.h"
25 #include "utils/assertedlocker.h"
26
27 #include <QXmlStreamReader>
28 #include <QRegExp>
29 #include <QUrl>
30 #include <iostream>
31
32 MovieScheduleSearchClient::MovieScheduleSearchClient(CinemaSchedule *cinema_schedule, QObject *parent)
33     : AbstractSearchClient(parent),
34     _cinema_schedule(cinema_schedule)
35 {
36 }
37
38 void MovieScheduleSearchClient::SearchSchedule(const MovieKey &movie_key, const QString &url)
39 {
40     setObjectName(QString("MovieScheduleSearchClient:%1").arg(movie_key.GetName()));
41     _semaphore.Activate(GetSearchTaskId());
42     _movie_key = movie_key;
43     _date = QDate::currentDate();
44     _url = QUrl::fromEncoded(QString("http://www.google.com" + url).toAscii(), QUrl::TolerantMode);
45     _dates_seen.clear();
46     _dates_seen.insert("0");
47     _date_urls.clear();
48     Search(0);
49 }
50
51 void MovieScheduleSearchClient::CancelAllRunningSearchs()
52 {
53     _semaphore.CancelAll();
54 }
55
56 void MovieScheduleSearchClient::Search(int start)
57 {
58     AbstractSearchClient::Search(_url, start);
59 }
60
61 void MovieScheduleSearchClient::SearchNextDate()
62 {
63     if (_date_urls.isEmpty()) {
64         return;
65     }
66     QPair<QUrl, QDate> pair = _date_urls.dequeue();
67     _date = pair.second;
68     _url = pair.first;
69     Search(0);
70 }
71
72 enum State {
73     PARSE_HTML,
74     PARSE_DATE_LINK,
75     PARSE_THEATER_DIV,
76     PARSE_THEATER_LINK,
77     PARSE_PHONE_LINK,
78     PARSE_LINK,
79     PARSE_BR,
80     PARSE_SPAN,
81     PARSE_NEXT_PAGE_LINK
82 };
83
84 void MovieScheduleSearchClient::ReplyFinished(QNetworkReply *reply)
85 {
86     //std::cout << "REPLY" << std::endl;
87     //std::cout << reply->readAll().data() << std::endl;
88     QXmlStreamReader xml(reply);
89     State state = PARSE_HTML;
90     int found = 0;
91     QString theater_name;
92     QString theater_address;
93     QString theater_phone;
94     QList<QString> schedule;
95     QRegExp time_pattern("\\d+:\\d+([aApP][mM])*");
96     QString next_page_url;
97     int next_page_start;
98     while (!xml.atEnd()) {
99         QXmlStreamReader::TokenType token = xml.readNext();
100         if (token == QXmlStreamReader::StartElement) {
101             QString attr_href = xml.attributes().value("href").toString();
102             //std::cout << qPrintable(xml.name().toString()) << ", class " << qPrintable(attr_class) << ", href " << qPrintable(attr_href) << std::endl;
103             if (state == PARSE_HTML && xml.name() == "a" && attr_href.startsWith("/m/movies")) {
104                 QUrl url = QUrl::fromEncoded(QString("http://www.google.com" + attr_href).toAscii(), QUrl::TolerantMode);
105                 //std::cout << "LINK " << qPrintable(attr_href) << std::endl;
106                 if (url.hasQueryItem("date")) {
107                     QString v = url.queryItemValue("date");
108                     //std::cout << "FOUND Date Link " << qPrintable(v) << " from " << qPrintable(url.toString()) << std::endl;
109                     if (!_dates_seen.contains(v)) {
110                         // TODO replace location with user selected location (Google simplifies to much)
111                         _dates_seen.insert(v);
112                         _date_urls.append(qMakePair(url, QDate::currentDate().addDays(v.toInt())));
113                     }
114                     state = PARSE_DATE_LINK;
115                 } else if (url.hasQueryItem("tid")) {
116                     theater_name = "";
117                     theater_address = "";
118                     theater_phone = "";
119                     schedule.clear();
120                     state = PARSE_THEATER_LINK;
121                 } else if (url.hasQueryItem("start")) {
122                     QString sort = url.queryItemValue("sort");
123                     QString start = url.queryItemValue("start");
124                     int istart = start.toInt();
125                     if (sort == "0" && istart > GetStartIndex()) {
126                         //std::cout << "next page LINK " << qPrintable(attr_href) << std::endl;
127                         next_page_url = attr_href;
128                         next_page_start = istart;
129                     }
130                     state = PARSE_NEXT_PAGE_LINK;
131                 } else {
132                     state = PARSE_HTML;
133                 }
134             } else if (state == PARSE_THEATER_DIV && xml.name() == "a") {
135                 if (attr_href.startsWith("wtai:")) {
136                     state = PARSE_PHONE_LINK;
137                 } else {
138                     state = PARSE_LINK;
139                 }
140             } else if (state == PARSE_THEATER_DIV && xml.name() == "br") {
141                 state = PARSE_BR;
142             } else if (state == PARSE_THEATER_DIV && xml.name() == "span") {
143                 state = PARSE_SPAN;
144             } else {
145                 state = PARSE_HTML;
146             }
147         } else if (token == QXmlStreamReader::EndElement) {
148             if (state == PARSE_DATE_LINK) {
149                 state = PARSE_HTML;
150             } else if (state == PARSE_THEATER_LINK) {
151                 state = PARSE_THEATER_DIV;
152             } else if (state == PARSE_BR) {
153                 state = PARSE_THEATER_DIV;
154             } else if (state == PARSE_SPAN) {
155                 state = PARSE_THEATER_DIV;
156             } else if (state == PARSE_LINK) {
157                 state = PARSE_THEATER_DIV;
158             } else if (state == PARSE_PHONE_LINK) {
159                 state = PARSE_THEATER_DIV;
160             } else if (state == PARSE_THEATER_DIV) {
161                 state = PARSE_HTML;
162                 if (!theater_name.isEmpty()) {
163                     ++found;
164                     if (!schedule.isEmpty()) {
165                         AssertedWriteLocker locker(_cinema_schedule->GetLock());
166                         if (!_semaphore.IsActive(GetSearchTaskId())) {
167                             break;
168                         }
169                         const Movie *movie = _cinema_schedule->FindMovie(_movie_key);
170                         if (movie != 0) {
171                             //std::cout << "ADD SCHEDULE " << qPrintable(theater_name) << ", " << qPrintable(theater_address) << std::endl;
172                             CinemaKey key(theater_name, theater_address);
173                             Cinema *cinema = _cinema_schedule->FindCinema(key);
174                             if (cinema == 0) {
175                                 cinema = _cinema_schedule->AddCinema(key);
176                             }
177                             if (!theater_phone.isEmpty()) {
178                                 cinema->SetTelephone(theater_phone);
179                             }
180                             QList<QTime> schedule_times = TimesFromString(schedule);
181                             Q_FOREACH(const QTime time, schedule_times) {
182                                 if (time.hour() < 3) {
183                                     // interpret very early times as shifted by 1 day (seems to be a Google logic)
184                                     _cinema_schedule->AddSchedule(cinema, movie, time, _date.addDays(1));
185                                 } else {
186                                     _cinema_schedule->AddSchedule(cinema, movie, time, _date);
187                                 }
188                             }
189                         }
190                     }
191                 }
192             } else if (state == PARSE_NEXT_PAGE_LINK) {
193                 state = PARSE_HTML;
194             }
195         } else if (token == QXmlStreamReader::Characters) {
196             if (state == PARSE_THEATER_LINK) {
197                 //std::cout << "name " << qPrintable(xml.text().toString()) << std::endl;
198                 theater_name = xml.text().toString();
199             } else if (state == PARSE_PHONE_LINK) {
200                 //std::cout << "phone " << qPrintable(xml.text().toString()) << std::endl;
201                 theater_phone = xml.text().toString();
202             } else if (state == PARSE_SPAN) {
203                 QString t = xml.text().toString();
204                 int i = 0;
205                 bool found = false;
206                 while ((i = time_pattern.indexIn(t, i)) != -1) {
207                     int length = time_pattern.matchedLength();
208                     //std::cout << "time " << qPrintable(t.mid(i, length)) << std::endl;
209                     if (length > 0) {
210                         schedule.append(t.mid(i, length));
211                     }
212                     i += length;
213                     found = true;
214                 }
215                 if (!found) {
216                     //std::cout << "address " << qPrintable(t) << std::endl;
217                     theater_address = t;
218                 }
219             }
220         }
221     }
222     if (xml.hasError()) {
223         emit SearchFinished(GetSearchTaskId(), false);
224         std::cout << "xml error (" << xml.lineNumber() << "/" << xml.columnNumber() << "): " << qPrintable(xml.errorString()) << std::endl;
225         emit Error(GetSearchTaskId());
226         deleteLater();
227     } else if (!_semaphore.IsActive(GetSearchTaskId())) {
228         emit Cancelled(GetSearchTaskId());
229         emit SearchFinished(GetSearchTaskId(), false);
230         deleteLater();
231     } else {
232         if (!next_page_url.isEmpty()) {
233             emit Reply(GetSearchTaskId(), true);
234             SearchEncodedUrl(next_page_url, next_page_start);
235         } else {
236             if (!_date_urls.isEmpty()) {
237                 SearchNextDate();
238             } else {
239                 emit Reply(GetSearchTaskId(), false);
240                 emit SearchFinished(GetSearchTaskId(), true);
241                 deleteLater();
242             }
243         }
244     }
245     reply->deleteLater();
246     //std::cout << "REPLY FINISHED" << std::endl;
247 }
248
249 SearchClientSemaphore MovieScheduleSearchClient::_semaphore;