Initial commit (software version 0.2.0)
[movie-schedule] / src / searchclients / moviesearchclient.cpp
1 // Copyright 2010 Jochen Becher
2 //
3 // This file is part of MovieSchedule.
4 //
5 // MovieSchedule is free software: you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // MovieSchedule is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with MovieSchedule.  If not, see <http://www.gnu.org/licenses/>.
17
18 #include "moviesearchclient.h"
19
20 #include "data/cinemaschedule.h"
21 #include "data/movie.h"
22 #include "data/moviekey.h"
23 #include "utils/assertedlocker.h"
24
25 #include <QXmlStreamReader>
26 #include <QRegExp>
27 #include <iostream>
28
29 MovieSearchClient::MovieSearchClient(CinemaSchedule *cinema_schedule, QObject *parent)
30     : AbstractSearchClient(parent),
31     _cinema_schedule(cinema_schedule)
32 {
33 }
34
35 void MovieSearchClient::SearchMovie(const QString &town)
36 {
37     _semaphore.Activate(GetSearchTaskId());
38     setObjectName(QString("MovieSearchClient:%1").arg(town));
39     _town = town;
40     Search(0);
41 }
42
43 void MovieSearchClient::CancelAllRunningSearchs()
44 {
45     _semaphore.CancelAll();
46 }
47
48 void MovieSearchClient::Search(int start)
49 {
50     QUrl url("http://www.google.com/m/movies");
51     url.addQueryItem("loc", _town);
52     url.addQueryItem("sort", QString::number(1));
53     AbstractSearchClient::Search(url, start);
54 }
55
56 enum State {
57     PARSE_HTML,
58     PARSE_MOVIE_LINK,
59     PARSE_EXPECT_DIV1,
60     PARSE_EXPECT_DIV2, PARSE_DIV2,
61     PARSE_SPAN, PARSE_RATING, PARSE_TRAILER
62 };
63
64 void MovieSearchClient::ReplyFinished(QNetworkReply *reply)
65 {
66     //std::cout << qPrintable(QString(reply->readAll())) << std::endl;
67     QXmlStreamReader xml(reply);
68     State state = PARSE_HTML;
69     int found = 0;
70     QString movie_name;
71     QVector<QString> movie_spans;
72     double movie_rating = -1.0;
73     QString movie_theaters_url;
74     QRegExp duration_pattern("((\\d+)hr )?(\\d+)min");
75     QRegExp reviews_pattern("\\d+ review(s)?");
76     while (!xml.atEnd()) {
77         QXmlStreamReader::TokenType token = xml.readNext();
78         if (token == QXmlStreamReader::StartElement) {
79             QString attr_class = xml.attributes().value("class").toString();
80             QString attr_href = xml.attributes().value("href").toString();
81             //std::cout << "tag = " << qPrintable(xml.name().toString()) << ", class = " << qPrintable(attr_class) << ", href = " << qPrintable(attr_href) << std::endl;
82             if (state == PARSE_HTML && xml.name() == "a" && attr_href.startsWith("/m/movies")) {
83                 QUrl url = QUrl::fromEncoded(QString("http://www.google.com" + attr_href).toAscii(), QUrl::TolerantMode);
84                 //std::cout << "LINK " << qPrintable(attr_href) << std::endl;
85                 if (url.hasQueryItem("mid")) {
86                     movie_name = "";
87                     movie_spans.clear();
88                     movie_rating = -1.0;
89                     movie_theaters_url = attr_href;
90                     state = PARSE_MOVIE_LINK;
91                 } else {
92                     state = PARSE_HTML;
93                 }
94             } else if (state == PARSE_EXPECT_DIV2 && xml.name() == "div") {
95                 state = PARSE_DIV2;
96             } else if (state == PARSE_DIV2 && xml.name() == "span" && attr_class.isEmpty()) {
97                 movie_spans.append("");
98                 state = PARSE_SPAN;
99             } else if (state == PARSE_DIV2 && xml.name() == "img") {
100                 movie_rating = (double) xml.attributes().value("src").at(41).digitValue() / 10.0;
101                 //std::cout << "rating: " << movie_rating << std::endl;
102                 state = PARSE_RATING;
103             } else if (state == PARSE_DIV2 && xml.name() == "a") {
104                 state = PARSE_TRAILER;
105             } else if (state != PARSE_HTML) {
106                 state = PARSE_HTML;
107             }
108         } else if (token == QXmlStreamReader::EndElement) {
109             if (state == PARSE_MOVIE_LINK) {
110                 state = PARSE_EXPECT_DIV1;
111             } else if (state == PARSE_EXPECT_DIV1) {
112                 state = PARSE_EXPECT_DIV2;
113             } else if (state == PARSE_EXPECT_DIV2) {
114                 state = PARSE_HTML;
115             } else if (state == PARSE_SPAN) {
116                 state = PARSE_DIV2;
117             } else if (state == PARSE_RATING) {
118                 state = PARSE_DIV2;
119             } else if (state == PARSE_TRAILER) {
120                 state = PARSE_DIV2;
121             } else if (state == PARSE_DIV2) {
122                 if (!movie_name.isEmpty()) {
123                     AssertedWriteLocker locker(_cinema_schedule->GetLock());
124                     if (!_semaphore.IsActive(GetSearchTaskId())) {
125                         break;
126                     }
127                     //std::cout << "ADD MOVIE " << qPrintable(movie_name) << std::endl;
128                     ++found;
129                     MovieKey key(movie_name);
130                     Movie *movie = _cinema_schedule->FindMovie(key);
131                     if (movie == 0) {
132                         movie = _cinema_schedule->AddMovie(key);
133                     }
134                     if (!movie_theaters_url.isEmpty()) {
135                         movie->SetTheatersUrl(movie_theaters_url);
136                     }
137                     if (movie_rating >= 0.0) {
138                         movie->SetRate(movie_rating);
139                     }
140                     Q_FOREACH (QString s, movie_spans) {
141                         if (duration_pattern.exactMatch(s)) {
142                             QString hours = duration_pattern.cap(2);
143                             QString minutes = duration_pattern.cap(3);
144                             //std::cout << "hours = " << qPrintable(hours) << ", minutes = " << qPrintable(minutes) << ",0: " << qPrintable(duration_pattern.cap(0)) << ", 1: " << qPrintable(duration_pattern.cap(1)) << std::endl;
145                             movie->SetDuration(QTime(hours.toInt(), minutes.toInt()));
146                         } else if (reviews_pattern.exactMatch(s)) {
147                             // Ignore number of reviews
148                         } else {
149                             movie->SetComment(s);
150                         }
151                     }
152                 }
153                 state = PARSE_HTML;
154             }
155         } else if (token == QXmlStreamReader::Characters) {
156             if (state == PARSE_MOVIE_LINK) {
157                 //std::cout << "movie: " << qPrintable(xml.text().toString()) << std::endl;
158                 movie_name = xml.text().toString();
159             } else if (state == PARSE_SPAN) {
160                 int i = movie_spans.size()-1;
161                 if (movie_spans[i].isEmpty()) {
162                     movie_spans[i] = xml.text().toString();
163                 } else if (!xml.text().isEmpty()) {
164                     movie_spans[i] += " ";
165                     movie_spans[i] += xml.text().toString();
166                 }
167                 //std::cout << " span: " << qPrintable(movie_spans[i]) << std::endl;
168             }
169         }
170     }
171     if (xml.hasError()) {
172         std::cout << "xml error (" << xml.lineNumber() << "/" << xml.columnNumber() << "): " << qPrintable(xml.errorString()) << std::endl;
173         emit Error(GetSearchTaskId());
174         emit SearchFinished(GetSearchTaskId(), false);
175         deleteLater();
176     } else if (!_semaphore.IsActive(GetSearchTaskId())) {
177         emit Cancelled(GetSearchTaskId());
178         emit SearchFinished(GetSearchTaskId(), false);
179         deleteLater();
180     } else {
181         if (found > 0) {
182             emit Reply(GetSearchTaskId(), true);
183             Search(GetStartIndex() + found);
184         } else {
185             emit Reply(GetSearchTaskId(), false);
186             emit SearchFinished(GetSearchTaskId(), true);
187             deleteLater();
188         }
189     }
190     reply->deleteLater();
191 }
192
193 SearchClientSemaphore MovieSearchClient::_semaphore;