7041c5ec430ef0a9cb0ff8e09ce4dbc0b65815b1
[movie-schedule] / src / searchclients / movieschedulesearchclient.cpp
1 // Copyright 2010 Jochen Becher
2 //
3 // This file is part of MovieSchedule.
4 //
5 // MovieSchedule is free software: you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // MovieSchedule is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with MovieSchedule.  If not, see <http://www.gnu.org/licenses/>.
17
18 #include "movieschedulesearchclient.h"
19
20 #include "data/cinemaschedule.h"
21 #include "data/cinema.h"
22 #include "data/scheduleentry.h"
23 #include "data/movie.h"
24 #include "utils/timeutils.h"
25 #include "utils/assertedlocker.h"
26
27 #include <QXmlStreamReader>
28 #include <QRegExp>
29 #include <QUrl>
30 #include <iostream>
31
32 MovieScheduleSearchClient::MovieScheduleSearchClient(CinemaSchedule *cinema_schedule, QObject *parent)
33     : AbstractSearchClient(parent),
34     _cinema_schedule(cinema_schedule)
35 {
36 }
37
38 void MovieScheduleSearchClient::SearchSchedule(const MovieKey &movie_key, const QString &url)
39 {
40     setObjectName(QString("MovieScheduleSearchClient:%1").arg(movie_key.GetName()));
41     _semaphore.Activate(GetSearchTaskId());
42     _movie_key = movie_key;
43     _date = QDate::currentDate();
44     _url = QUrl::fromEncoded(QString("http://www.google.com" + url).toAscii(), QUrl::TolerantMode);
45     _dates_seen.clear();
46     _dates_seen.insert("0");
47     _date_urls.clear();
48     Search(0);
49 }
50
51 void MovieScheduleSearchClient::CancelAllRunningSearchs()
52 {
53     _semaphore.CancelAll();
54 }
55
56 void MovieScheduleSearchClient::Search(int start)
57 {
58     AbstractSearchClient::Search(_url, start);
59 }
60
61 void MovieScheduleSearchClient::SearchNextDate()
62 {
63     if (_date_urls.isEmpty()) {
64         return;
65     }
66     QPair<QUrl, QDate> pair = _date_urls.dequeue();
67     _date = pair.second;
68     _url = pair.first;
69     Search(0);
70 }
71
72 enum State {
73     PARSE_HTML,
74     PARSE_DATE_LINK,
75     PARSE_THEATER_DIV,
76     PARSE_THEATER_LINK,
77     PARSE_PHONE_LINK,
78     PARSE_LINK,
79     PARSE_BR,
80     PARSE_SPAN,
81 };
82
83 void MovieScheduleSearchClient::ReplyFinished(QNetworkReply *reply)
84 {
85     //std::cout << "REPLY" << std::endl;
86     //std::cout << reply->readAll().data() << std::endl;
87     QXmlStreamReader xml(reply);
88     State state = PARSE_HTML;
89     int found = 0;
90     QString theater_name;
91     QString theater_address;
92     QString theater_phone;
93     QList<QString> schedule;
94     QRegExp time_pattern("\\d+:\\d+([aApP][mM])*");
95     while (!xml.atEnd()) {
96         QXmlStreamReader::TokenType token = xml.readNext();
97         if (token == QXmlStreamReader::StartElement) {
98             QString attr_href = xml.attributes().value("href").toString();
99             //std::cout << qPrintable(xml.name().toString()) << ", class " << qPrintable(attr_class) << ", href " << qPrintable(attr_href) << std::endl;
100             if (state == PARSE_HTML && xml.name() == "a" && attr_href.startsWith("/m/movies")) {
101                 QUrl url = QUrl::fromEncoded(QString("http://www.google.com" + attr_href).toAscii(), QUrl::TolerantMode);
102                 //std::cout << "LINK " << qPrintable(attr_href) << std::endl;
103                 if (url.hasQueryItem("date")) {
104                     QString v = url.queryItemValue("date");
105                     std::cout << "FOUND Date Link " << qPrintable(v) << " from " << qPrintable(url.toString()) << std::endl;
106                     if (!_dates_seen.contains(v)) {
107                         // TODO replace location with user selected location (Google simplifies to much)
108                         _dates_seen.insert(v);
109                         _date_urls.append(qMakePair(url, QDate::currentDate().addDays(v.toInt())));
110                     }
111                     state = PARSE_DATE_LINK;
112                 } else if (url.hasQueryItem("tid")) {
113                     theater_name = "";
114                     theater_address = "";
115                     theater_phone = "";
116                     schedule.clear();
117                     state = PARSE_THEATER_LINK;
118                 } else {
119                     state = PARSE_HTML;
120                 }
121             } else if (state == PARSE_THEATER_DIV && xml.name() == "a") {
122                 if (attr_href.startsWith("wtai:")) {
123                     state = PARSE_PHONE_LINK;
124                 } else {
125                     state = PARSE_LINK;
126                 }
127             } else if (state == PARSE_THEATER_DIV && xml.name() == "br") {
128                 state = PARSE_BR;
129             } else if (state == PARSE_THEATER_DIV && xml.name() == "span") {
130                 state = PARSE_SPAN;
131             } else {
132                 state = PARSE_HTML;
133             }
134         } else if (token == QXmlStreamReader::EndElement) {
135             if (state == PARSE_DATE_LINK) {
136                 state = PARSE_HTML;
137             } else if (state == PARSE_THEATER_LINK) {
138                 state = PARSE_THEATER_DIV;
139             } else if (state == PARSE_BR) {
140                 state = PARSE_THEATER_DIV;
141             } else if (state == PARSE_SPAN) {
142                 state = PARSE_THEATER_DIV;
143             } else if (state == PARSE_LINK) {
144                 state = PARSE_THEATER_DIV;
145             } else if (state == PARSE_PHONE_LINK) {
146                 state = PARSE_THEATER_DIV;
147             } else if (state == PARSE_THEATER_DIV) {
148                 state = PARSE_HTML;
149                 if (!theater_name.isEmpty()) {
150                     ++found;
151                     if (!schedule.isEmpty()) {
152                         AssertedWriteLocker locker(_cinema_schedule->GetLock());
153                         if (!_semaphore.IsActive(GetSearchTaskId())) {
154                             break;
155                         }
156                         const Movie *movie = _cinema_schedule->FindMovie(_movie_key);
157                         if (movie != 0) {
158                             //std::cout << "ADD SCHEDULE " << qPrintable(theater_name) << ", " << qPrintable(theater_address) << std::endl;
159                             CinemaKey key(theater_name, theater_address);
160                             Cinema *cinema = _cinema_schedule->FindCinema(key);
161                             if (cinema == 0) {
162                                 cinema = _cinema_schedule->AddCinema(key);
163                             }
164                             if (!theater_phone.isEmpty()) {
165                                 cinema->SetTelephone(theater_phone);
166                             }
167                             QList<QTime> schedule_times = TimesFromString(schedule);
168                             Q_FOREACH(const QTime time, schedule_times) {
169                                 if (time.hour() < 3) {
170                                     // interpret very early times as shifted by 1 day (seems to be a Google logic)
171                                     _cinema_schedule->AddSchedule(cinema, movie, time, _date.addDays(1));
172                                 } else {
173                                     _cinema_schedule->AddSchedule(cinema, movie, time, _date);
174                                 }
175                             }
176                         }
177                     }
178                 }
179             }
180         } else if (token == QXmlStreamReader::Characters) {
181             if (state == PARSE_THEATER_LINK) {
182                 //std::cout << "name " << qPrintable(xml.text().toString()) << std::endl;
183                 theater_name = xml.text().toString();
184             } else if (state == PARSE_PHONE_LINK) {
185                 //std::cout << "phone " << qPrintable(xml.text().toString()) << std::endl;
186                 theater_phone = xml.text().toString();
187             } else if (state == PARSE_SPAN) {
188                 QString t = xml.text().toString();
189                 int i = 0;
190                 bool found = false;
191                 while ((i = time_pattern.indexIn(t, i)) != -1) {
192                     int length = time_pattern.matchedLength();
193                     //std::cout << "time " << qPrintable(t.mid(i, length)) << std::endl;
194                     if (length > 0) {
195                         schedule.append(t.mid(i, length));
196                     }
197                     i += length;
198                     found = true;
199                 }
200                 if (!found) {
201                     //std::cout << "address " << qPrintable(t) << std::endl;
202                     theater_address = t;
203                 }
204             }
205         }
206     }
207     if (xml.hasError()) {
208         emit SearchFinished(GetSearchTaskId(), false);
209         std::cout << "xml error (" << xml.lineNumber() << "/" << xml.columnNumber() << "): " << qPrintable(xml.errorString()) << std::endl;
210         emit Error(GetSearchTaskId());
211         deleteLater();
212     } else if (!_semaphore.IsActive(GetSearchTaskId())) {
213         emit Cancelled(GetSearchTaskId());
214         emit SearchFinished(GetSearchTaskId(), false);
215         deleteLater();
216     } else {
217         if (found > 0) {
218             emit Reply(GetSearchTaskId(), true);
219             Search(GetStartIndex() + found);
220         } else {
221             if (!_date_urls.isEmpty()) {
222                 SearchNextDate();
223             } else {
224                 emit Reply(GetSearchTaskId(), false);
225                 emit SearchFinished(GetSearchTaskId(), true);
226                 deleteLater();
227             }
228         }
229     }
230     reply->deleteLater();
231     //std::cout << "REPLY FINISHED" << std::endl;
232 }
233
234 SearchClientSemaphore MovieScheduleSearchClient::_semaphore;