Fixed searchclients to handle new Google URLs correctly; added GUI
[movie-schedule] / src / searchclients / theaterschedulesearchclient.cpp
1 // Copyright 2010 Jochen Becher
2 //
3 // This file is part of MovieSchedule.
4 //
5 // MovieSchedule is free software: you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // MovieSchedule is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with MovieSchedule.  If not, see <http://www.gnu.org/licenses/>.
17
18 #include "theaterschedulesearchclient.h"
19
20 #include "data/cinemaschedule.h"
21 #include "data/cinema.h"
22 #include "data/scheduleentry.h"
23 #include "data/movie.h"
24 #include "utils/timeutils.h"
25 #include "utils/assertedlocker.h"
26
27 #include <QXmlStreamReader>
28 #include <QRegExp>
29 #include <iostream>
30
31 TheaterScheduleSearchClient::TheaterScheduleSearchClient(CinemaSchedule *cinema_schedule, QObject *parent)
32     : AbstractSearchClient(parent),
33     _cinema_schedule(cinema_schedule)
34 {
35 }
36
37 void TheaterScheduleSearchClient::SearchSchedule(const CinemaKey &cinema_key, const QString &url)
38 {
39     setObjectName(QString("TheaterScheduleSearchClient:%1").arg(cinema_key.GetName()));
40     _semaphore.Activate(GetSearchTaskId());
41     _cinema_key = cinema_key;
42     _date = QDate::currentDate();
43     _url = QUrl::fromEncoded(QString("http://www.google.com" + url).toAscii(), QUrl::TolerantMode);
44     _dates_seen.clear();
45     _dates_seen.insert("0");
46     _date_urls.clear();
47     Search(0);
48 }
49
50 void TheaterScheduleSearchClient::CancelAllRunningSearchs()
51 {
52     _semaphore.CancelAll();
53 }
54
55 void TheaterScheduleSearchClient::Search(int start)
56 {
57     AbstractSearchClient::Search(_url, start);
58 }
59
60 void TheaterScheduleSearchClient::SearchNextDate()
61 {
62     if (_date_urls.isEmpty()) {
63         return;
64     }
65     QPair<QUrl, QDate> pair = _date_urls.dequeue();
66     _date = pair.second;
67     _url = pair.first;
68     Search(0);
69 }
70
71 enum State {
72     PARSE_HTML,
73     PARSE_DATE_LINK,
74     PARSE_MOVIE_DIV,
75     PARSE_MOVIE_DIV1,
76     PARSE_MOVIE_LINK,
77     PARSE_MOVIE_EXPECT_DIV2,
78     PARSE_MOVIE_DIV2,
79     PARSE_MOVIE_SPAN,
80     PARSE_MOVIE_TRAILER_LINK,
81     PARSE_MOVIE_RATING,
82     PARSE_MOVIE_EXPECT_DIV3,
83     PARSE_MOVIE_DIV3,
84     PARSE_MOVIE_TIME,
85     PARSE_NEXT_PAGE_LINK
86 };
87
88 void TheaterScheduleSearchClient::ReplyFinished(QNetworkReply *reply)
89 {
90     State state = PARSE_HTML;
91     int found = 0;
92     QString movie_name;
93     QVector<QString> movie_spans;
94     QString theaters_url;
95     double rating = -1.0;
96     QList<QString> schedule;
97     QRegExp time_pattern("\\d+:\\d+([aApP][mM])*");
98     QRegExp duration_pattern("((\\d+)hr )?(\\d+)min");
99     QRegExp reviews_pattern("\\d+ review(s)?");
100     QXmlStreamReader xml(reply);
101     QString next_page_url;
102     int next_page_start;
103     while (!xml.atEnd()) {
104         QXmlStreamReader::TokenType token = xml.readNext();
105         if (token == QXmlStreamReader::StartElement) {
106             QString attr_href = xml.attributes().value("href").toString();
107             //std::cout << "name: " << qPrintable(xml.name().toString()) << ", href " << qPrintable(attr_href) << std::endl;
108             if (state == PARSE_HTML && xml.name() == "a" && attr_href.startsWith("/m/movies")) {
109                 QUrl url = QUrl::fromEncoded(QString("http://www.google.com" + attr_href).toAscii(), QUrl::TolerantMode);
110                 //std::cout << "LINK " << qPrintable(attr_href) << std::endl;
111                 if (url.hasQueryItem("date")) {
112                     QString v = url.queryItemValue("date");
113                     //std::cout << "FOUND Date Link " << qPrintable(v) << " from " << qPrintable(url.toString()) << std::endl;
114                     if (!_dates_seen.contains(v)) {
115                         // TODO replace location with user selected location (Google simplifies to much)
116                         _dates_seen.insert(v);
117                         _date_urls.append(qMakePair(url, QDate::currentDate().addDays(v.toInt())));
118                     }
119                     state = PARSE_DATE_LINK;
120                 } else if (url.hasQueryItem("mid")) {
121                     //std::cout << "FOUND Movie Link" << std::endl;
122                     movie_name = "";
123                     movie_spans.clear();
124                     theaters_url = attr_href;
125                     rating = -1.0;
126                     schedule.clear();
127                     state = PARSE_MOVIE_LINK;
128                 } else if (url.hasQueryItem("start")) {
129                     QString sort = url.queryItemValue("sort");
130                     QString start = url.queryItemValue("start");
131                     int istart = start.toInt();
132                     if (sort == "0" && istart > GetStartIndex()) {
133                         //std::cout << "next page LINK " << qPrintable(attr_href) << std::endl;
134                         next_page_url = attr_href;
135                         next_page_start = istart;
136                     }
137                     state = PARSE_NEXT_PAGE_LINK;
138                 } else {
139                     state = PARSE_HTML;
140                 }
141             } else if (state == PARSE_MOVIE_EXPECT_DIV2 && xml.name() == "div") {
142                 //std::cout << "PARSE_MOVIE_DIV2" << std::endl;
143                 state = PARSE_MOVIE_DIV2;
144             } else if (state == PARSE_MOVIE_DIV2 && xml.name() == "span") {
145                 movie_spans.append("");
146                 //std::cout << "PARSE_MOVIE_SPAN" << std::endl;
147                 state = PARSE_MOVIE_SPAN;
148             } else if (state == PARSE_MOVIE_DIV2 && xml.name() == "a") {
149                 state = PARSE_MOVIE_TRAILER_LINK;
150             } else if (state == PARSE_MOVIE_DIV2 && xml.name() == "img") {
151                 rating = (double) xml.attributes().value("src").at(41).digitValue() / 10.0;
152                 //std::cout << "rate: " << rate << std::endl;
153                 state = PARSE_MOVIE_RATING;
154             } else if (state == PARSE_MOVIE_EXPECT_DIV3 && xml.name() == "div") {
155                 //std::cout << "PARSE_MOVIE_DIV3" << std::endl;
156                 state = PARSE_MOVIE_DIV3;
157             } else if (state == PARSE_MOVIE_DIV3 && xml.name() == "span") {
158                 //std::cout << "PARSE_MOVIE_TIME" << std::endl;
159                 state = PARSE_MOVIE_TIME;
160             } else {
161                 //std::cout << "TAG " << qPrintable(xml.name().toString()) << std::endl;
162                 state = PARSE_HTML;
163             }
164         } else if (token == QXmlStreamReader::EndElement) {
165             if (state == PARSE_DATE_LINK) {
166                 state = PARSE_HTML;
167             } else if (state == PARSE_MOVIE_LINK) {
168                 state = PARSE_MOVIE_DIV1;
169             } else if (state == PARSE_MOVIE_DIV1) {
170                 state = PARSE_MOVIE_EXPECT_DIV2;
171             } else if (state == PARSE_MOVIE_SPAN) {
172                 state = PARSE_MOVIE_DIV2;
173             } else if (state == PARSE_MOVIE_TRAILER_LINK) {
174                 state = PARSE_MOVIE_DIV2;
175             } else if (state == PARSE_MOVIE_RATING) {
176                 state = PARSE_MOVIE_DIV2;
177             } else if (state == PARSE_MOVIE_DIV2) {
178                 state = PARSE_MOVIE_EXPECT_DIV3;
179             } else if (state == PARSE_MOVIE_TIME) {
180                 state = PARSE_MOVIE_DIV3;
181             } else if (state == PARSE_MOVIE_DIV3) {
182                 state = PARSE_MOVIE_DIV;
183             } else if (state == PARSE_MOVIE_DIV) {
184                 if (!movie_name.isEmpty()) {
185                     ++found;
186                     if (!schedule.isEmpty()) {
187                         AssertedWriteLocker locker(_cinema_schedule->GetLock());
188                         if (!_semaphore.IsActive(GetSearchTaskId())) {
189                             break;
190                         }
191                         const Cinema *cinema = _cinema_schedule->FindCinema(_cinema_key);
192                         if (cinema != 0) {
193                             //std::cout << "ADD SCHEDULE " << qPrintable(movie_name) << ", " << qPrintable(duration)
194                             //        << ", " << qPrintable(age) << ", " << rate << std::endl;
195                             MovieKey key(movie_name);
196                             Movie *movie = _cinema_schedule->FindMovie(key);
197                             if (movie == 0) {
198                                 movie = _cinema_schedule->AddMovie(key);
199                             }
200                             if (!theaters_url.isEmpty()) {
201                                 movie->SetTheatersUrl(theaters_url);
202                             }
203                             if (rating >= 0.0) {
204                                 movie->SetRate(rating);
205                             }
206                             Q_FOREACH (QString s, movie_spans) {
207                                 if (duration_pattern.exactMatch(s)) {
208                                     QString hours = duration_pattern.cap(2);
209                                     QString minutes = duration_pattern.cap(3);
210                                     //std::cout << "hours = " << qPrintable(hours) << ", minutes = " << qPrintable(minutes) << ",0: " << qPrintable(duration_pattern.cap(0)) << ", 1: " << qPrintable(duration_pattern.cap(1)) << std::endl;
211                                     movie->SetDuration(QTime(hours.toInt(), minutes.toInt()));
212                                 } else if (reviews_pattern.exactMatch(s)) {
213                                     // Ignore number of reviews
214                                 } else {
215                                     movie->SetComment(s);
216                                 }
217                             }
218                             QList<QTime> schedule_times = TimesFromString(schedule);
219                             Q_FOREACH(const QTime time, schedule_times) {
220                                 if (time.hour() < 3) {
221                                     // interpret very early times as shifted by 1 day (seems to be a Google logic)
222                                     _cinema_schedule->AddSchedule(cinema, movie, time, _date.addDays(1));
223                                 } else {
224                                     _cinema_schedule->AddSchedule(cinema, movie, time, _date);
225                                 }
226                             }
227                         }
228                     }
229                 }
230                 state = PARSE_HTML;
231             } else if (state == PARSE_NEXT_PAGE_LINK) {
232                 state = PARSE_HTML;
233             }
234         } else if (token == QXmlStreamReader::Characters) {
235             if (state == PARSE_MOVIE_LINK) {
236                 //std::cout << "MOVIE " << qPrintable(xml.text().toString()) << std::endl;
237                 movie_name = xml.text().toString();
238             } else if (state == PARSE_MOVIE_SPAN) {
239                 int i = movie_spans.size()-1;
240                 if (movie_spans[i].isEmpty()) {
241                     movie_spans[i] = xml.text().toString();
242                 } else if (!xml.text().isEmpty()) {
243                     movie_spans[i] += " ";
244                     movie_spans[i] += xml.text().toString();
245                 }
246                 //std::cout << " span: " << qPrintable(movie_spans[i]) << std::endl;
247             } else if (state == PARSE_MOVIE_TIME) {
248                 QString t = xml.text().toString();
249                 int i = 0;
250                 while ((i = time_pattern.indexIn(t, i)) != -1) {
251                     int length = time_pattern.matchedLength();
252                     if (length > 0) {
253                         schedule.append(t.mid(i, length));
254                     }
255                     i += length;
256                 }
257             }
258         }
259     }
260     if (xml.hasError()) {
261         emit SearchFinished(GetSearchTaskId(), false);
262         std::cout << "xml error (" << xml.lineNumber() << "/" << xml.columnNumber() << "): " << qPrintable(xml.errorString()) << std::endl;
263         emit Error(GetSearchTaskId());
264         deleteLater();
265     } else if (!_semaphore.IsActive(GetSearchTaskId())) {
266         emit Cancelled(GetSearchTaskId());
267         emit SearchFinished(GetSearchTaskId(), false);
268     } else {
269         if (!next_page_url.isEmpty()) {
270             emit Reply(GetSearchTaskId(), true);
271             SearchEncodedUrl(next_page_url, next_page_start);
272         } else {
273             if (!_date_urls.isEmpty()) {
274                 emit Reply(GetSearchTaskId(), true);
275                 SearchNextDate();
276             } else {
277                 emit Reply(GetSearchTaskId(), false);
278                 emit SearchFinished(GetSearchTaskId(), true);
279                 deleteLater();
280             }
281         }
282     }
283     reply->deleteLater();
284 }
285
286 SearchClientSemaphore TheaterScheduleSearchClient::_semaphore;