ec642ec2d69e28c811675893c12b9134c77802f4
[movie-schedule] / src / searchclients / moviesearchclient.cpp
1 // Copyright 2010 Jochen Becher
2 //
3 // This file is part of MovieSchedule.
4 //
5 // MovieSchedule is free software: you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // MovieSchedule is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with MovieSchedule.  If not, see <http://www.gnu.org/licenses/>.
17
18 #include "moviesearchclient.h"
19
20 #include "data/cinemaschedule.h"
21 #include "data/movie.h"
22 #include "data/moviekey.h"
23 #include "utils/assertedlocker.h"
24
25 #include <QXmlStreamReader>
26 #include <QRegExp>
27 #include <iostream>
28
29 MovieSearchClient::MovieSearchClient(CinemaSchedule *cinema_schedule, QObject *parent)
30     : AbstractSearchClient(parent),
31     _cinema_schedule(cinema_schedule)
32 {
33 }
34
35 void MovieSearchClient::SearchMovie(const QString &town)
36 {
37     _semaphore.Activate(GetSearchTaskId());
38     setObjectName(QString("MovieSearchClient:%1").arg(town));
39     _town = town;
40     Search(0);
41 }
42
43 void MovieSearchClient::CancelAllRunningSearchs()
44 {
45     _semaphore.CancelAll();
46 }
47
48 void MovieSearchClient::Search(int start)
49 {
50     QUrl url("http://www.google.com/m/movies");
51     url.addQueryItem("action","chgloc");
52     url.addQueryItem("loc", _town);
53     url.addQueryItem("sort", QString::number(1));
54     AbstractSearchClient::Search(url, start);
55 }
56
57 enum State {
58     PARSE_HTML,
59     PARSE_MOVIE_LINK,
60     PARSE_EXPECT_DIV1,
61     PARSE_EXPECT_DIV2, PARSE_DIV2,
62     PARSE_SPAN, PARSE_RATING, PARSE_TRAILER,
63     PARSE_NEXT_PAGE_LINK
64 };
65
66 void MovieSearchClient::ReplyFinished(QNetworkReply *reply)
67 {
68     //std::cout << qPrintable(QString(reply->readAll())) << std::endl;
69     QXmlStreamReader xml(reply);
70     State state = PARSE_HTML;
71     int found = 0;
72     QString movie_name;
73     QVector<QString> movie_spans;
74     double movie_rating = -1.0;
75     QString movie_theaters_url;
76     QRegExp duration_pattern("((\\d+)hr )?(\\d+)min");
77     QRegExp reviews_pattern("\\d+ review(s)?");
78     QString next_page_url;
79     int next_page_start;
80     while (!xml.atEnd()) {
81         QXmlStreamReader::TokenType token = xml.readNext();
82         if (token == QXmlStreamReader::StartElement) {
83             QString attr_class = xml.attributes().value("class").toString();
84             QString attr_href = xml.attributes().value("href").toString();
85             //std::cout << "tag = " << qPrintable(xml.name().toString()) << ", class = " << qPrintable(attr_class) << ", href = " << qPrintable(attr_href) << std::endl;
86             if (state == PARSE_HTML && xml.name() == "a" && attr_href.startsWith("/m/movies")) {
87                 QUrl url = QUrl::fromEncoded(QString("http://www.google.com" + attr_href).toAscii(), QUrl::TolerantMode);
88                 //std::cout << "LINK " << qPrintable(attr_href) << std::endl;
89                 if (url.hasQueryItem("mid")) {
90                     movie_name = "";
91                     movie_spans.clear();
92                     movie_rating = -1.0;
93                     movie_theaters_url = attr_href;
94                     state = PARSE_MOVIE_LINK;
95                 } else if (url.hasQueryItem("start")) {
96                     QString sort = url.queryItemValue("sort");
97                     QString start = url.queryItemValue("start");
98                     int istart = start.toInt();
99                     if (sort == "1" && istart > GetStartIndex()) {
100                         //std::cout << "next page LINK " << qPrintable(attr_href) << std::endl;
101                         next_page_url = attr_href;
102                         next_page_start = istart;
103                     }
104                     state = PARSE_NEXT_PAGE_LINK;
105                 } else {
106                     state = PARSE_HTML;
107                 }
108             } else if (state == PARSE_EXPECT_DIV2 && xml.name() == "div") {
109                 state = PARSE_DIV2;
110             } else if (state == PARSE_DIV2 && xml.name() == "span" && attr_class.isEmpty()) {
111                 movie_spans.append("");
112                 state = PARSE_SPAN;
113             } else if (state == PARSE_DIV2 && xml.name() == "img") {
114                 movie_rating = (double) xml.attributes().value("src").at(41).digitValue() / 10.0;
115                 //std::cout << "rating: " << movie_rating << std::endl;
116                 state = PARSE_RATING;
117             } else if (state == PARSE_DIV2 && xml.name() == "a") {
118                 state = PARSE_TRAILER;
119             } else if (state != PARSE_HTML) {
120                 state = PARSE_HTML;
121             }
122         } else if (token == QXmlStreamReader::EndElement) {
123             if (state == PARSE_MOVIE_LINK) {
124                 state = PARSE_EXPECT_DIV1;
125             } else if (state == PARSE_EXPECT_DIV1) {
126                 state = PARSE_EXPECT_DIV2;
127             } else if (state == PARSE_EXPECT_DIV2) {
128                 state = PARSE_HTML;
129             } else if (state == PARSE_SPAN) {
130                 state = PARSE_DIV2;
131             } else if (state == PARSE_RATING) {
132                 state = PARSE_DIV2;
133             } else if (state == PARSE_TRAILER) {
134                 state = PARSE_DIV2;
135             } else if (state == PARSE_DIV2) {
136                 if (!movie_name.isEmpty()) {
137                     AssertedWriteLocker locker(_cinema_schedule->GetLock());
138                     if (!_semaphore.IsActive(GetSearchTaskId())) {
139                         break;
140                     }
141                     //std::cout << "ADD MOVIE " << qPrintable(movie_name) << std::endl;
142                     ++found;
143                     MovieKey key(movie_name);
144                     Movie *movie = _cinema_schedule->FindMovie(key);
145                     if (movie == 0) {
146                         movie = _cinema_schedule->AddMovie(key);
147                     }
148                     if (!movie_theaters_url.isEmpty()) {
149                         movie->SetTheatersUrl(movie_theaters_url);
150                     }
151                     if (movie_rating >= 0.0) {
152                         movie->SetRate(movie_rating);
153                     }
154                     Q_FOREACH (QString s, movie_spans) {
155                         if (duration_pattern.exactMatch(s)) {
156                             QString hours = duration_pattern.cap(2);
157                             QString minutes = duration_pattern.cap(3);
158                             //std::cout << "hours = " << qPrintable(hours) << ", minutes = " << qPrintable(minutes) << ",0: " << qPrintable(duration_pattern.cap(0)) << ", 1: " << qPrintable(duration_pattern.cap(1)) << std::endl;
159                             movie->SetDuration(QTime(hours.toInt(), minutes.toInt()));
160                         } else if (reviews_pattern.exactMatch(s)) {
161                             // Ignore number of reviews
162                         } else {
163                             movie->SetComment(s);
164                         }
165                     }
166                 }
167                 state = PARSE_HTML;
168             } else if (state == PARSE_NEXT_PAGE_LINK) {
169                 state = PARSE_HTML;
170             }
171         } else if (token == QXmlStreamReader::Characters) {
172             if (state == PARSE_MOVIE_LINK) {
173                 //std::cout << "movie: " << qPrintable(xml.text().toString()) << std::endl;
174                 movie_name = xml.text().toString();
175             } else if (state == PARSE_SPAN) {
176                 int i = movie_spans.size()-1;
177                 if (movie_spans[i].isEmpty()) {
178                     movie_spans[i] = xml.text().toString();
179                 } else if (!xml.text().isEmpty()) {
180                     movie_spans[i] += " ";
181                     movie_spans[i] += xml.text().toString();
182                 }
183                 //std::cout << " span: " << qPrintable(movie_spans[i]) << std::endl;
184             }
185         }
186     }
187     if (xml.hasError()) {
188         std::cout << "xml error (" << xml.lineNumber() << "/" << xml.columnNumber() << "): " << qPrintable(xml.errorString()) << std::endl;
189         emit Error(GetSearchTaskId());
190         emit SearchFinished(GetSearchTaskId(), false);
191         deleteLater();
192     } else if (!_semaphore.IsActive(GetSearchTaskId())) {
193         emit Cancelled(GetSearchTaskId());
194         emit SearchFinished(GetSearchTaskId(), false);
195         deleteLater();
196     } else {
197         if (!next_page_url.isEmpty()) {
198             emit Reply(GetSearchTaskId(), true);
199             SearchEncodedUrl(next_page_url, next_page_start);
200         } else {
201             emit Reply(GetSearchTaskId(), false);
202             emit SearchFinished(GetSearchTaskId(), true);
203             deleteLater();
204         }
205     }
206     reply->deleteLater();
207 }
208
209 SearchClientSemaphore MovieSearchClient::_semaphore;