1 // Copyright 2010 Jochen Becher
3 // This file is part of MovieSchedule.
5 // MovieSchedule is free software: you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, either version 3 of the License, or
8 // (at your option) any later version.
10 // MovieSchedule is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with MovieSchedule. If not, see <http://www.gnu.org/licenses/>.
18 #include "theaterschedulesearchclient.h"
20 #include "data/cinemaschedule.h"
21 #include "data/cinema.h"
22 #include "data/scheduleentry.h"
23 #include "data/movie.h"
24 #include "utils/timeutils.h"
25 #include "utils/assertedlocker.h"
27 #include <QXmlStreamReader>
31 TheaterScheduleSearchClient::TheaterScheduleSearchClient(CinemaSchedule *cinema_schedule, QObject *parent)
32 : AbstractSearchClient(parent),
33 _cinema_schedule(cinema_schedule)
37 void TheaterScheduleSearchClient::SearchSchedule(const CinemaKey &cinema_key, const QString &url)
39 setObjectName(QString("TheaterScheduleSearchClient:%1").arg(cinema_key.GetName()));
40 _semaphore.Activate(GetSearchTaskId());
41 _cinema_key = cinema_key;
42 _date = QDate::currentDate();
43 _url = QUrl::fromEncoded(QString("http://www.google.com" + url).toAscii(), QUrl::TolerantMode);
45 _dates_seen.insert("0");
50 void TheaterScheduleSearchClient::CancelAllRunningSearchs()
52 _semaphore.CancelAll();
55 void TheaterScheduleSearchClient::Search(int start)
57 AbstractSearchClient::Search(_url, start);
60 void TheaterScheduleSearchClient::SearchNextDate()
62 if (_date_urls.isEmpty()) {
65 QPair<QUrl, QDate> pair = _date_urls.dequeue();
77 PARSE_MOVIE_EXPECT_DIV2,
80 PARSE_MOVIE_TRAILER_LINK,
82 PARSE_MOVIE_EXPECT_DIV3,
87 void TheaterScheduleSearchClient::ReplyFinished(QNetworkReply *reply)
89 State state = PARSE_HTML;
92 QVector<QString> movie_spans;
95 QList<QTime> schedule;
96 QRegExp time_pattern("\\d+:\\d+([aApP][mM])*");
97 QRegExp duration_pattern("((\\d+)hr )?(\\d+)min");
98 QRegExp reviews_pattern("\\d+ review(s)?");
99 QXmlStreamReader xml(reply);
100 while (!xml.atEnd()) {
101 QXmlStreamReader::TokenType token = xml.readNext();
102 if (token == QXmlStreamReader::StartElement) {
103 QString attr_href = xml.attributes().value("href").toString();
104 //std::cout << "name: " << qPrintable(xml.name().toString()) << ", href " << qPrintable(attr_href) << std::endl;
105 if (state == PARSE_HTML && xml.name() == "a" && attr_href.startsWith("/m/movies")) {
106 QUrl url = QUrl::fromEncoded(QString("http://www.google.com" + attr_href).toAscii(), QUrl::TolerantMode);
107 //std::cout << "LINK " << qPrintable(attr_href) << std::endl;
108 if (url.hasQueryItem("date")) {
109 QString v = url.queryItemValue("date");
110 //std::cout << "FOUND Date Link " << qPrintable(v) << " from " << qPrintable(url.toString()) << std::endl;
111 if (!_dates_seen.contains(v)) {
112 // TODO replace location with user selected location (Google simplifies to much)
113 _dates_seen.insert(v);
114 _date_urls.append(qMakePair(url, QDate::currentDate().addDays(v.toInt())));
116 state = PARSE_DATE_LINK;
117 } else if (url.hasQueryItem("mid")) {
118 //std::cout << "FOUND Movie Link" << std::endl;
121 theaters_url = attr_href;
124 state = PARSE_MOVIE_LINK;
128 } else if (state == PARSE_MOVIE_EXPECT_DIV2 && xml.name() == "div") {
129 //std::cout << "PARSE_MOVIE_DIV2" << std::endl;
130 state = PARSE_MOVIE_DIV2;
131 } else if (state == PARSE_MOVIE_DIV2 && xml.name() == "span") {
132 movie_spans.append("");
133 //std::cout << "PARSE_MOVIE_SPAN" << std::endl;
134 state = PARSE_MOVIE_SPAN;
135 } else if (state == PARSE_MOVIE_DIV2 && xml.name() == "a") {
136 state = PARSE_MOVIE_TRAILER_LINK;
137 } else if (state == PARSE_MOVIE_DIV2 && xml.name() == "img") {
138 rating = (double) xml.attributes().value("src").at(41).digitValue() / 10.0;
139 //std::cout << "rate: " << rate << std::endl;
140 state = PARSE_MOVIE_RATING;
141 } else if (state == PARSE_MOVIE_EXPECT_DIV3 && xml.name() == "div") {
142 //std::cout << "PARSE_MOVIE_DIV3" << std::endl;
143 state = PARSE_MOVIE_DIV3;
144 } else if (state == PARSE_MOVIE_DIV3 && xml.name() == "span") {
145 //std::cout << "PARSE_MOVIE_TIME" << std::endl;
146 state = PARSE_MOVIE_TIME;
148 //std::cout << "TAG " << qPrintable(xml.name().toString()) << std::endl;
151 } else if (token == QXmlStreamReader::EndElement) {
152 if (state == PARSE_DATE_LINK) {
154 } else if (state == PARSE_MOVIE_LINK) {
155 state = PARSE_MOVIE_DIV1;
156 } else if (state == PARSE_MOVIE_DIV1) {
157 state = PARSE_MOVIE_EXPECT_DIV2;
158 } else if (state == PARSE_MOVIE_SPAN) {
159 state = PARSE_MOVIE_DIV2;
160 } else if (state == PARSE_MOVIE_TRAILER_LINK) {
161 state = PARSE_MOVIE_DIV2;
162 } else if (state == PARSE_MOVIE_RATING) {
163 state = PARSE_MOVIE_DIV2;
164 } else if (state == PARSE_MOVIE_DIV2) {
165 state = PARSE_MOVIE_EXPECT_DIV3;
166 } else if (state == PARSE_MOVIE_TIME) {
167 state = PARSE_MOVIE_DIV3;
168 } else if (state == PARSE_MOVIE_DIV3) {
169 state = PARSE_MOVIE_DIV;
170 } else if (state == PARSE_MOVIE_DIV) {
171 if (!movie_name.isEmpty()) {
173 if (!schedule.isEmpty()) {
174 AssertedWriteLocker locker(_cinema_schedule->GetLock());
175 if (!_semaphore.IsActive(GetSearchTaskId())) {
178 const Cinema *cinema = _cinema_schedule->FindCinema(_cinema_key);
180 //std::cout << "ADD SCHEDULE " << qPrintable(movie_name) << ", " << qPrintable(duration)
181 // << ", " << qPrintable(age) << ", " << rate << std::endl;
182 MovieKey key(movie_name);
183 Movie *movie = _cinema_schedule->FindMovie(key);
185 movie = _cinema_schedule->AddMovie(key);
187 if (!theaters_url.isEmpty()) {
188 movie->SetTheatersUrl(theaters_url);
191 movie->SetRate(rating);
193 Q_FOREACH (QString s, movie_spans) {
194 if (duration_pattern.exactMatch(s)) {
195 QString hours = duration_pattern.cap(2);
196 QString minutes = duration_pattern.cap(3);
197 //std::cout << "hours = " << qPrintable(hours) << ", minutes = " << qPrintable(minutes) << ",0: " << qPrintable(duration_pattern.cap(0)) << ", 1: " << qPrintable(duration_pattern.cap(1)) << std::endl;
198 movie->SetDuration(QTime(hours.toInt(), minutes.toInt()));
199 } else if (reviews_pattern.exactMatch(s)) {
200 // Ignore number of reviews
202 movie->SetComment(s);
205 Q_FOREACH(const QTime time, schedule) {
206 _cinema_schedule->AddSchedule(cinema, movie, time, _date);
213 } else if (token == QXmlStreamReader::Characters) {
214 if (state == PARSE_MOVIE_LINK) {
215 //std::cout << "MOVIE " << qPrintable(xml.text().toString()) << std::endl;
216 movie_name = xml.text().toString();
217 } else if (state == PARSE_MOVIE_SPAN) {
218 int i = movie_spans.size()-1;
219 if (movie_spans[i].isEmpty()) {
220 movie_spans[i] = xml.text().toString();
221 } else if (!xml.text().isEmpty()) {
222 movie_spans[i] += " ";
223 movie_spans[i] += xml.text().toString();
225 //std::cout << " span: " << qPrintable(movie_spans[i]) << std::endl;
226 } else if (state == PARSE_MOVIE_TIME) {
227 QString t = xml.text().toString();
229 while ((i = time_pattern.indexIn(t, i)) != -1) {
230 int length = time_pattern.matchedLength();
231 QTime time = TimeUtils::FromTimeString(t.mid(i, length));
232 if (time.isValid()) {
233 schedule.append(time);
235 //std::cout << "ERROR: time " << qPrintable(t.mid(i, length)) << " is invalid." << std::endl;
242 if (xml.hasError()) {
243 emit SearchFinished(GetSearchTaskId(), false);
244 std::cout << "xml error (" << xml.lineNumber() << "/" << xml.columnNumber() << "): " << qPrintable(xml.errorString()) << std::endl;
245 emit Error(GetSearchTaskId());
247 } else if (!_semaphore.IsActive(GetSearchTaskId())) {
248 emit Cancelled(GetSearchTaskId());
249 emit SearchFinished(GetSearchTaskId(), false);
251 // all movies are listed on one page
252 // no repetition of search with start parameter
253 if (!_date_urls.isEmpty()) {
254 emit Reply(GetSearchTaskId(), true);
257 emit Reply(GetSearchTaskId(), false);
258 emit SearchFinished(GetSearchTaskId(), true);
262 reply->deleteLater();
265 SearchClientSemaphore TheaterScheduleSearchClient::_semaphore;