1 // Copyright 2010 Jochen Becher
3 // This file is part of MovieSchedule.
5 // MovieSchedule is free software: you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, either version 3 of the License, or
8 // (at your option) any later version.
10 // MovieSchedule is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with MovieSchedule. If not, see <http://www.gnu.org/licenses/>.
18 #include "theaterschedulesearchclient.h"
20 #include "data/cinemaschedule.h"
21 #include "data/cinema.h"
22 #include "data/scheduleentry.h"
23 #include "data/movie.h"
24 #include "utils/timeutils.h"
25 #include "utils/assertedlocker.h"
27 #include <QXmlStreamReader>
31 TheaterScheduleSearchClient::TheaterScheduleSearchClient(CinemaSchedule *cinema_schedule, QObject *parent)
32 : AbstractSearchClient(parent),
33 _cinema_schedule(cinema_schedule)
37 void TheaterScheduleSearchClient::SearchSchedule(const CinemaKey &cinema_key, const QString &url)
39 setObjectName(QString("TheaterScheduleSearchClient:%1").arg(cinema_key.GetName()));
40 _semaphore.Activate(GetSearchTaskId());
41 _cinema_key = cinema_key;
42 _date = QDate::currentDate();
43 _url = QUrl::fromEncoded(QString("http://www.google.com" + url).toAscii(), QUrl::TolerantMode);
45 _dates_seen.insert("0");
50 void TheaterScheduleSearchClient::CancelAllRunningSearchs()
52 _semaphore.CancelAll();
55 void TheaterScheduleSearchClient::Search(int start)
57 AbstractSearchClient::Search(_url, start);
60 void TheaterScheduleSearchClient::SearchNextDate()
62 if (_date_urls.isEmpty()) {
65 QPair<QUrl, QDate> pair = _date_urls.dequeue();
77 PARSE_MOVIE_EXPECT_DIV2,
80 PARSE_MOVIE_TRAILER_LINK,
82 PARSE_MOVIE_EXPECT_DIV3,
88 void TheaterScheduleSearchClient::ReplyFinished(QNetworkReply *reply)
90 State state = PARSE_HTML;
93 QVector<QString> movie_spans;
96 QList<QString> schedule;
97 QRegExp time_pattern("\\d+:\\d+([aApP][mM])*");
98 QRegExp duration_pattern("((\\d+)hr )?(\\d+)min");
99 QRegExp reviews_pattern("\\d+ review(s)?");
100 QXmlStreamReader xml(reply);
101 QString next_page_url;
103 while (!xml.atEnd()) {
104 QXmlStreamReader::TokenType token = xml.readNext();
105 if (token == QXmlStreamReader::StartElement) {
106 QString attr_href = xml.attributes().value("href").toString();
107 //std::cout << "name: " << qPrintable(xml.name().toString()) << ", href " << qPrintable(attr_href) << std::endl;
108 if (state == PARSE_HTML && xml.name() == "a" && attr_href.startsWith("/m/movies")) {
109 QUrl url = QUrl::fromEncoded(QString("http://www.google.com" + attr_href).toAscii(), QUrl::TolerantMode);
110 //std::cout << "LINK " << qPrintable(attr_href) << std::endl;
111 if (url.hasQueryItem("date")) {
112 QString v = url.queryItemValue("date");
113 //std::cout << "FOUND Date Link " << qPrintable(v) << " from " << qPrintable(url.toString()) << std::endl;
114 if (!_dates_seen.contains(v)) {
115 // TODO replace location with user selected location (Google simplifies to much)
116 _dates_seen.insert(v);
117 _date_urls.append(qMakePair(url, QDate::currentDate().addDays(v.toInt())));
119 state = PARSE_DATE_LINK;
120 } else if (url.hasQueryItem("mid")) {
121 //std::cout << "FOUND Movie Link" << std::endl;
124 theaters_url = attr_href;
127 state = PARSE_MOVIE_LINK;
128 } else if (url.hasQueryItem("start")) {
129 QString sort = url.queryItemValue("sort");
130 QString start = url.queryItemValue("start");
131 int istart = start.toInt();
132 if (sort == "0" && istart > GetStartIndex()) {
133 //std::cout << "next page LINK " << qPrintable(attr_href) << std::endl;
134 next_page_url = attr_href;
135 next_page_start = istart;
137 state = PARSE_NEXT_PAGE_LINK;
141 } else if (state == PARSE_MOVIE_EXPECT_DIV2 && xml.name() == "div") {
142 //std::cout << "PARSE_MOVIE_DIV2" << std::endl;
143 state = PARSE_MOVIE_DIV2;
144 } else if (state == PARSE_MOVIE_DIV2 && xml.name() == "span") {
145 movie_spans.append("");
146 //std::cout << "PARSE_MOVIE_SPAN" << std::endl;
147 state = PARSE_MOVIE_SPAN;
148 } else if (state == PARSE_MOVIE_DIV2 && xml.name() == "a") {
149 state = PARSE_MOVIE_TRAILER_LINK;
150 } else if (state == PARSE_MOVIE_DIV2 && xml.name() == "img") {
151 rating = (double) xml.attributes().value("src").at(41).digitValue() / 10.0;
152 //std::cout << "rate: " << rate << std::endl;
153 state = PARSE_MOVIE_RATING;
154 } else if (state == PARSE_MOVIE_EXPECT_DIV3 && xml.name() == "div") {
155 //std::cout << "PARSE_MOVIE_DIV3" << std::endl;
156 state = PARSE_MOVIE_DIV3;
157 } else if (state == PARSE_MOVIE_DIV3 && xml.name() == "span") {
158 //std::cout << "PARSE_MOVIE_TIME" << std::endl;
159 state = PARSE_MOVIE_TIME;
161 //std::cout << "TAG " << qPrintable(xml.name().toString()) << std::endl;
164 } else if (token == QXmlStreamReader::EndElement) {
165 if (state == PARSE_DATE_LINK) {
167 } else if (state == PARSE_MOVIE_LINK) {
168 state = PARSE_MOVIE_DIV1;
169 } else if (state == PARSE_MOVIE_DIV1) {
170 state = PARSE_MOVIE_EXPECT_DIV2;
171 } else if (state == PARSE_MOVIE_SPAN) {
172 state = PARSE_MOVIE_DIV2;
173 } else if (state == PARSE_MOVIE_TRAILER_LINK) {
174 state = PARSE_MOVIE_DIV2;
175 } else if (state == PARSE_MOVIE_RATING) {
176 state = PARSE_MOVIE_DIV2;
177 } else if (state == PARSE_MOVIE_DIV2) {
178 state = PARSE_MOVIE_EXPECT_DIV3;
179 } else if (state == PARSE_MOVIE_TIME) {
180 state = PARSE_MOVIE_DIV3;
181 } else if (state == PARSE_MOVIE_DIV3) {
182 state = PARSE_MOVIE_DIV;
183 } else if (state == PARSE_MOVIE_DIV) {
184 if (!movie_name.isEmpty()) {
186 if (!schedule.isEmpty()) {
187 AssertedWriteLocker locker(_cinema_schedule->GetLock());
188 if (!_semaphore.IsActive(GetSearchTaskId())) {
191 const Cinema *cinema = _cinema_schedule->FindCinema(_cinema_key);
193 //std::cout << "ADD SCHEDULE " << qPrintable(movie_name) << ", " << qPrintable(duration)
194 // << ", " << qPrintable(age) << ", " << rate << std::endl;
195 MovieKey key(movie_name);
196 Movie *movie = _cinema_schedule->FindMovie(key);
198 movie = _cinema_schedule->AddMovie(key);
200 if (!theaters_url.isEmpty()) {
201 movie->SetTheatersUrl(theaters_url);
204 movie->SetRate(rating);
206 Q_FOREACH (QString s, movie_spans) {
207 if (duration_pattern.exactMatch(s)) {
208 QString hours = duration_pattern.cap(2);
209 QString minutes = duration_pattern.cap(3);
210 //std::cout << "hours = " << qPrintable(hours) << ", minutes = " << qPrintable(minutes) << ",0: " << qPrintable(duration_pattern.cap(0)) << ", 1: " << qPrintable(duration_pattern.cap(1)) << std::endl;
211 movie->SetDuration(QTime(hours.toInt(), minutes.toInt()));
212 } else if (reviews_pattern.exactMatch(s)) {
213 // Ignore number of reviews
215 movie->SetComment(s);
218 QList<QTime> schedule_times = TimesFromString(schedule);
219 Q_FOREACH(const QTime time, schedule_times) {
220 if (time.hour() < 3) {
221 // interpret very early times as shifted by 1 day (seems to be a Google logic)
222 _cinema_schedule->AddSchedule(cinema, movie, time, _date.addDays(1));
224 _cinema_schedule->AddSchedule(cinema, movie, time, _date);
231 } else if (state == PARSE_NEXT_PAGE_LINK) {
234 } else if (token == QXmlStreamReader::Characters) {
235 if (state == PARSE_MOVIE_LINK) {
236 //std::cout << "MOVIE " << qPrintable(xml.text().toString()) << std::endl;
237 movie_name = xml.text().toString();
238 } else if (state == PARSE_MOVIE_SPAN) {
239 int i = movie_spans.size()-1;
240 if (movie_spans[i].isEmpty()) {
241 movie_spans[i] = xml.text().toString();
242 } else if (!xml.text().isEmpty()) {
243 movie_spans[i] += " ";
244 movie_spans[i] += xml.text().toString();
246 //std::cout << " span: " << qPrintable(movie_spans[i]) << std::endl;
247 } else if (state == PARSE_MOVIE_TIME) {
248 QString t = xml.text().toString();
250 while ((i = time_pattern.indexIn(t, i)) != -1) {
251 int length = time_pattern.matchedLength();
253 schedule.append(t.mid(i, length));
260 if (xml.hasError()) {
261 emit SearchFinished(GetSearchTaskId(), false);
262 std::cout << "xml error (" << xml.lineNumber() << "/" << xml.columnNumber() << "): " << qPrintable(xml.errorString()) << std::endl;
263 emit Error(GetSearchTaskId());
265 } else if (!_semaphore.IsActive(GetSearchTaskId())) {
266 emit Cancelled(GetSearchTaskId());
267 emit SearchFinished(GetSearchTaskId(), false);
269 if (!next_page_url.isEmpty()) {
270 emit Reply(GetSearchTaskId(), true);
271 SearchEncodedUrl(next_page_url, next_page_start);
273 if (!_date_urls.isEmpty()) {
274 emit Reply(GetSearchTaskId(), true);
277 emit Reply(GetSearchTaskId(), false);
278 emit SearchFinished(GetSearchTaskId(), true);
283 reply->deleteLater();
286 SearchClientSemaphore TheaterScheduleSearchClient::_semaphore;