7e1d0ce3e34d7f9504849d048a752d435d0ddd62
[movie-schedule] / src / searchclients / theaterschedulesearchclient.cpp
1 // Copyright 2010 Jochen Becher
2 //
3 // This file is part of MovieSchedule.
4 //
5 // MovieSchedule is free software: you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // MovieSchedule is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with MovieSchedule.  If not, see <http://www.gnu.org/licenses/>.
17
18 #include "theaterschedulesearchclient.h"
19
20 #include "data/cinemaschedule.h"
21 #include "data/cinema.h"
22 #include "data/scheduleentry.h"
23 #include "data/movie.h"
24 #include "utils/timeutils.h"
25 #include "utils/assertedlocker.h"
26
27 #include <QXmlStreamReader>
28 #include <QRegExp>
29 #include <iostream>
30
31 TheaterScheduleSearchClient::TheaterScheduleSearchClient(CinemaSchedule *cinema_schedule, QObject *parent)
32     : AbstractSearchClient(parent),
33     _cinema_schedule(cinema_schedule)
34 {
35 }
36
37 void TheaterScheduleSearchClient::SearchSchedule(const CinemaKey &cinema_key, const QString &url)
38 {
39     setObjectName(QString("TheaterScheduleSearchClient:%1").arg(cinema_key.GetName()));
40     _semaphore.Activate(GetSearchTaskId());
41     _cinema_key = cinema_key;
42     _date = QDate::currentDate();
43     _url = QUrl::fromEncoded(QString("http://www.google.com" + url).toAscii(), QUrl::TolerantMode);
44     _dates_seen.clear();
45     _dates_seen.insert("0");
46     _date_urls.clear();
47     Search(0);
48 }
49
50 void TheaterScheduleSearchClient::CancelAllRunningSearchs()
51 {
52     _semaphore.CancelAll();
53 }
54
55 void TheaterScheduleSearchClient::Search(int start)
56 {
57     AbstractSearchClient::Search(_url, start);
58 }
59
60 void TheaterScheduleSearchClient::SearchNextDate()
61 {
62     if (_date_urls.isEmpty()) {
63         return;
64     }
65     QPair<QUrl, QDate> pair = _date_urls.dequeue();
66     _date = pair.second;
67     _url = pair.first;
68     Search(0);
69 }
70
71 enum State {
72     PARSE_HTML,
73     PARSE_DATE_LINK,
74     PARSE_MOVIE_DIV,
75     PARSE_MOVIE_DIV1,
76     PARSE_MOVIE_LINK,
77     PARSE_MOVIE_EXPECT_DIV2,
78     PARSE_MOVIE_DIV2,
79     PARSE_MOVIE_SPAN,
80     PARSE_MOVIE_TRAILER_LINK,
81     PARSE_MOVIE_RATING,
82     PARSE_MOVIE_EXPECT_DIV3,
83     PARSE_MOVIE_DIV3,
84     PARSE_MOVIE_TIME,
85 };
86
87 void TheaterScheduleSearchClient::ReplyFinished(QNetworkReply *reply)
88 {
89     State state = PARSE_HTML;
90     int found = 0;
91     QString movie_name;
92     QVector<QString> movie_spans;
93     QString theaters_url;
94     double rating = -1.0;
95     QList<QTime> schedule;
96     QRegExp time_pattern("\\d+:\\d+([aApP][mM])*");
97     QRegExp duration_pattern("((\\d+)hr )?(\\d+)min");
98     QRegExp reviews_pattern("\\d+ review(s)?");
99     QXmlStreamReader xml(reply);
100     while (!xml.atEnd()) {
101         QXmlStreamReader::TokenType token = xml.readNext();
102         if (token == QXmlStreamReader::StartElement) {
103             QString attr_href = xml.attributes().value("href").toString();
104             //std::cout << "name: " << qPrintable(xml.name().toString()) << ", href " << qPrintable(attr_href) << std::endl;
105             if (state == PARSE_HTML && xml.name() == "a" && attr_href.startsWith("/m/movies")) {
106                 QUrl url = QUrl::fromEncoded(QString("http://www.google.com" + attr_href).toAscii(), QUrl::TolerantMode);
107                 //std::cout << "LINK " << qPrintable(attr_href) << std::endl;
108                 if (url.hasQueryItem("date")) {
109                     QString v = url.queryItemValue("date");
110                     //std::cout << "FOUND Date Link " << qPrintable(v) << " from " << qPrintable(url.toString()) << std::endl;
111                     if (!_dates_seen.contains(v)) {
112                         // TODO replace location with user selected location (Google simplifies to much)
113                         _dates_seen.insert(v);
114                         _date_urls.append(qMakePair(url, QDate::currentDate().addDays(v.toInt())));
115                     }
116                     state = PARSE_DATE_LINK;
117                 } else if (url.hasQueryItem("mid")) {
118                     //std::cout << "FOUND Movie Link" << std::endl;
119                     movie_name = "";
120                     movie_spans.clear();
121                     theaters_url = attr_href;
122                     rating = -1.0;
123                     schedule.clear();
124                     state = PARSE_MOVIE_LINK;
125                 } else {
126                     state = PARSE_HTML;
127                 }
128             } else if (state == PARSE_MOVIE_EXPECT_DIV2 && xml.name() == "div") {
129                 //std::cout << "PARSE_MOVIE_DIV2" << std::endl;
130                 state = PARSE_MOVIE_DIV2;
131             } else if (state == PARSE_MOVIE_DIV2 && xml.name() == "span") {
132                 movie_spans.append("");
133                 //std::cout << "PARSE_MOVIE_SPAN" << std::endl;
134                 state = PARSE_MOVIE_SPAN;
135             } else if (state == PARSE_MOVIE_DIV2 && xml.name() == "a") {
136                 state = PARSE_MOVIE_TRAILER_LINK;
137             } else if (state == PARSE_MOVIE_DIV2 && xml.name() == "img") {
138                 rating = (double) xml.attributes().value("src").at(41).digitValue() / 10.0;
139                 //std::cout << "rate: " << rate << std::endl;
140                 state = PARSE_MOVIE_RATING;
141             } else if (state == PARSE_MOVIE_EXPECT_DIV3 && xml.name() == "div") {
142                 //std::cout << "PARSE_MOVIE_DIV3" << std::endl;
143                 state = PARSE_MOVIE_DIV3;
144             } else if (state == PARSE_MOVIE_DIV3 && xml.name() == "span") {
145                 //std::cout << "PARSE_MOVIE_TIME" << std::endl;
146                 state = PARSE_MOVIE_TIME;
147             } else {
148                 //std::cout << "TAG " << qPrintable(xml.name().toString()) << std::endl;
149                 state = PARSE_HTML;
150             }
151         } else if (token == QXmlStreamReader::EndElement) {
152             if (state == PARSE_DATE_LINK) {
153                 state = PARSE_HTML;
154             } else if (state == PARSE_MOVIE_LINK) {
155                 state = PARSE_MOVIE_DIV1;
156             } else if (state == PARSE_MOVIE_DIV1) {
157                 state = PARSE_MOVIE_EXPECT_DIV2;
158             } else if (state == PARSE_MOVIE_SPAN) {
159                 state = PARSE_MOVIE_DIV2;
160             } else if (state == PARSE_MOVIE_TRAILER_LINK) {
161                 state = PARSE_MOVIE_DIV2;
162             } else if (state == PARSE_MOVIE_RATING) {
163                 state = PARSE_MOVIE_DIV2;
164             } else if (state == PARSE_MOVIE_DIV2) {
165                 state = PARSE_MOVIE_EXPECT_DIV3;
166             } else if (state == PARSE_MOVIE_TIME) {
167                 state = PARSE_MOVIE_DIV3;
168             } else if (state == PARSE_MOVIE_DIV3) {
169                 state = PARSE_MOVIE_DIV;
170             } else if (state == PARSE_MOVIE_DIV) {
171                 if (!movie_name.isEmpty()) {
172                     ++found;
173                     if (!schedule.isEmpty()) {
174                         AssertedWriteLocker locker(_cinema_schedule->GetLock());
175                         if (!_semaphore.IsActive(GetSearchTaskId())) {
176                             break;
177                         }
178                         const Cinema *cinema = _cinema_schedule->FindCinema(_cinema_key);
179                         if (cinema != 0) {
180                             //std::cout << "ADD SCHEDULE " << qPrintable(movie_name) << ", " << qPrintable(duration)
181                             //        << ", " << qPrintable(age) << ", " << rate << std::endl;
182                             MovieKey key(movie_name);
183                             Movie *movie = _cinema_schedule->FindMovie(key);
184                             if (movie == 0) {
185                                 movie = _cinema_schedule->AddMovie(key);
186                             }
187                             if (!theaters_url.isEmpty()) {
188                                 movie->SetTheatersUrl(theaters_url);
189                             }
190                             if (rating >= 0.0) {
191                                 movie->SetRate(rating);
192                             }
193                             Q_FOREACH (QString s, movie_spans) {
194                                 if (duration_pattern.exactMatch(s)) {
195                                     QString hours = duration_pattern.cap(2);
196                                     QString minutes = duration_pattern.cap(3);
197                                     //std::cout << "hours = " << qPrintable(hours) << ", minutes = " << qPrintable(minutes) << ",0: " << qPrintable(duration_pattern.cap(0)) << ", 1: " << qPrintable(duration_pattern.cap(1)) << std::endl;
198                                     movie->SetDuration(QTime(hours.toInt(), minutes.toInt()));
199                                 } else if (reviews_pattern.exactMatch(s)) {
200                                     // Ignore number of reviews
201                                 } else {
202                                     movie->SetComment(s);
203                                 }
204                             }
205                             Q_FOREACH(const QTime time, schedule) {
206                                 _cinema_schedule->AddSchedule(cinema, movie, time, _date);
207                             }
208                         }
209                     }
210                 }
211                 state = PARSE_HTML;
212             }
213         } else if (token == QXmlStreamReader::Characters) {
214             if (state == PARSE_MOVIE_LINK) {
215                 //std::cout << "MOVIE " << qPrintable(xml.text().toString()) << std::endl;
216                 movie_name = xml.text().toString();
217             } else if (state == PARSE_MOVIE_SPAN) {
218                 int i = movie_spans.size()-1;
219                 if (movie_spans[i].isEmpty()) {
220                     movie_spans[i] = xml.text().toString();
221                 } else if (!xml.text().isEmpty()) {
222                     movie_spans[i] += " ";
223                     movie_spans[i] += xml.text().toString();
224                 }
225                 //std::cout << " span: " << qPrintable(movie_spans[i]) << std::endl;
226             } else if (state == PARSE_MOVIE_TIME) {
227                 QString t = xml.text().toString();
228                 int i = 0;
229                 while ((i = time_pattern.indexIn(t, i)) != -1) {
230                     int length = time_pattern.matchedLength();
231                     QTime time = TimeUtils::FromTimeString(t.mid(i, length));
232                     if (time.isValid()) {
233                         schedule.append(time);
234                     } else {
235                         //std::cout << "ERROR: time " << qPrintable(t.mid(i, length)) << " is invalid." << std::endl;
236                     }
237                     i += length;
238                 }
239             }
240         }
241     }
242     if (xml.hasError()) {
243         emit SearchFinished(GetSearchTaskId(), false);
244         std::cout << "xml error (" << xml.lineNumber() << "/" << xml.columnNumber() << "): " << qPrintable(xml.errorString()) << std::endl;
245         emit Error(GetSearchTaskId());
246         deleteLater();
247     } else if (!_semaphore.IsActive(GetSearchTaskId())) {
248         emit Cancelled(GetSearchTaskId());
249         emit SearchFinished(GetSearchTaskId(), false);
250     } else {
251         // all movies are listed on one page
252         // no repetition of search with start parameter
253         if (!_date_urls.isEmpty()) {
254             emit Reply(GetSearchTaskId(), true);
255             SearchNextDate();
256         } else {
257             emit Reply(GetSearchTaskId(), false);
258             emit SearchFinished(GetSearchTaskId(), true);
259             deleteLater();
260         }
261     }
262     reply->deleteLater();
263 }
264
265 SearchClientSemaphore TheaterScheduleSearchClient::_semaphore;