// Copyright 2010 Jochen Becher
//
// This file is part of MovieSchedule.
//
// MovieSchedule is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// MovieSchedule is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with MovieSchedule. If not, see .
#include "theaterschedulesearchclient.h"
#include "data/cinemaschedule.h"
#include "data/cinema.h"
#include "data/scheduleentry.h"
#include "data/movie.h"
#include "utils/timeutils.h"
#include "utils/assertedlocker.h"
#include
#include
#include
TheaterScheduleSearchClient::TheaterScheduleSearchClient(CinemaSchedule *cinema_schedule, QObject *parent)
: AbstractSearchClient(parent),
_cinema_schedule(cinema_schedule)
{
}
void TheaterScheduleSearchClient::SearchSchedule(const CinemaKey &cinema_key, const QString &url)
{
setObjectName(QString("TheaterScheduleSearchClient:%1").arg(cinema_key.GetName()));
_semaphore.Activate(GetSearchTaskId());
_cinema_key = cinema_key;
_date = QDate::currentDate();
_url = QUrl::fromEncoded(QString("http://www.google.com" + url).toAscii(), QUrl::TolerantMode);
_dates_seen.clear();
_dates_seen.insert("0");
_date_urls.clear();
Search(0);
}
void TheaterScheduleSearchClient::CancelAllRunningSearchs()
{
_semaphore.CancelAll();
}
void TheaterScheduleSearchClient::Search(int start)
{
AbstractSearchClient::Search(_url, start);
}
void TheaterScheduleSearchClient::SearchNextDate()
{
if (_date_urls.isEmpty()) {
return;
}
QPair pair = _date_urls.dequeue();
_date = pair.second;
_url = pair.first;
Search(0);
}
enum State {
PARSE_HTML,
PARSE_DATE_LINK,
PARSE_MOVIE_DIV,
PARSE_MOVIE_DIV1,
PARSE_MOVIE_LINK,
PARSE_MOVIE_EXPECT_DIV2,
PARSE_MOVIE_DIV2,
PARSE_MOVIE_SPAN,
PARSE_MOVIE_TRAILER_LINK,
PARSE_MOVIE_RATING,
PARSE_MOVIE_EXPECT_DIV3,
PARSE_MOVIE_DIV3,
PARSE_MOVIE_TIME,
PARSE_NEXT_PAGE_LINK
};
void TheaterScheduleSearchClient::ReplyFinished(QNetworkReply *reply)
{
State state = PARSE_HTML;
int found = 0;
QString movie_name;
QVector movie_spans;
QString theaters_url;
double rating = -1.0;
QList schedule;
QRegExp time_pattern("\\d+:\\d+([aApP][mM])*");
QRegExp duration_pattern("((\\d+)hr )?(\\d+)min");
QRegExp reviews_pattern("\\d+ review(s)?");
QXmlStreamReader xml(reply);
QString next_page_url;
int next_page_start;
while (!xml.atEnd()) {
QXmlStreamReader::TokenType token = xml.readNext();
if (token == QXmlStreamReader::StartElement) {
QString attr_href = xml.attributes().value("href").toString();
//std::cout << "name: " << qPrintable(xml.name().toString()) << ", href " << qPrintable(attr_href) << std::endl;
if (state == PARSE_HTML && xml.name() == "a" && attr_href.startsWith("/m/movies")) {
QUrl url = QUrl::fromEncoded(QString("http://www.google.com" + attr_href).toAscii(), QUrl::TolerantMode);
//std::cout << "LINK " << qPrintable(attr_href) << std::endl;
if (url.hasQueryItem("date")) {
QString v = url.queryItemValue("date");
//std::cout << "FOUND Date Link " << qPrintable(v) << " from " << qPrintable(url.toString()) << std::endl;
if (!_dates_seen.contains(v)) {
// TODO replace location with user selected location (Google simplifies to much)
_dates_seen.insert(v);
_date_urls.append(qMakePair(url, QDate::currentDate().addDays(v.toInt())));
}
state = PARSE_DATE_LINK;
} else if (url.hasQueryItem("mid")) {
//std::cout << "FOUND Movie Link" << std::endl;
movie_name = "";
movie_spans.clear();
theaters_url = attr_href;
rating = -1.0;
schedule.clear();
state = PARSE_MOVIE_LINK;
} else if (url.hasQueryItem("start")) {
QString sort = url.queryItemValue("sort");
QString start = url.queryItemValue("start");
int istart = start.toInt();
if (sort == "0" && istart > GetStartIndex()) {
//std::cout << "next page LINK " << qPrintable(attr_href) << std::endl;
next_page_url = attr_href;
next_page_start = istart;
}
state = PARSE_NEXT_PAGE_LINK;
} else {
state = PARSE_HTML;
}
} else if (state == PARSE_MOVIE_EXPECT_DIV2 && xml.name() == "div") {
//std::cout << "PARSE_MOVIE_DIV2" << std::endl;
state = PARSE_MOVIE_DIV2;
} else if (state == PARSE_MOVIE_DIV2 && xml.name() == "span") {
movie_spans.append("");
//std::cout << "PARSE_MOVIE_SPAN" << std::endl;
state = PARSE_MOVIE_SPAN;
} else if (state == PARSE_MOVIE_DIV2 && xml.name() == "a") {
state = PARSE_MOVIE_TRAILER_LINK;
} else if (state == PARSE_MOVIE_DIV2 && xml.name() == "img") {
rating = (double) xml.attributes().value("src").at(41).digitValue() / 10.0;
//std::cout << "rate: " << rate << std::endl;
state = PARSE_MOVIE_RATING;
} else if (state == PARSE_MOVIE_EXPECT_DIV3 && xml.name() == "div") {
//std::cout << "PARSE_MOVIE_DIV3" << std::endl;
state = PARSE_MOVIE_DIV3;
} else if (state == PARSE_MOVIE_DIV3 && xml.name() == "span") {
//std::cout << "PARSE_MOVIE_TIME" << std::endl;
state = PARSE_MOVIE_TIME;
} else {
//std::cout << "TAG " << qPrintable(xml.name().toString()) << std::endl;
state = PARSE_HTML;
}
} else if (token == QXmlStreamReader::EndElement) {
if (state == PARSE_DATE_LINK) {
state = PARSE_HTML;
} else if (state == PARSE_MOVIE_LINK) {
state = PARSE_MOVIE_DIV1;
} else if (state == PARSE_MOVIE_DIV1) {
state = PARSE_MOVIE_EXPECT_DIV2;
} else if (state == PARSE_MOVIE_SPAN) {
state = PARSE_MOVIE_DIV2;
} else if (state == PARSE_MOVIE_TRAILER_LINK) {
state = PARSE_MOVIE_DIV2;
} else if (state == PARSE_MOVIE_RATING) {
state = PARSE_MOVIE_DIV2;
} else if (state == PARSE_MOVIE_DIV2) {
state = PARSE_MOVIE_EXPECT_DIV3;
} else if (state == PARSE_MOVIE_TIME) {
state = PARSE_MOVIE_DIV3;
} else if (state == PARSE_MOVIE_DIV3) {
state = PARSE_MOVIE_DIV;
} else if (state == PARSE_MOVIE_DIV) {
if (!movie_name.isEmpty()) {
++found;
if (!schedule.isEmpty()) {
AssertedWriteLocker locker(_cinema_schedule->GetLock());
if (!_semaphore.IsActive(GetSearchTaskId())) {
break;
}
const Cinema *cinema = _cinema_schedule->FindCinema(_cinema_key);
if (cinema != 0) {
//std::cout << "ADD SCHEDULE " << qPrintable(movie_name) << ", " << qPrintable(duration)
// << ", " << qPrintable(age) << ", " << rate << std::endl;
MovieKey key(movie_name);
Movie *movie = _cinema_schedule->FindMovie(key);
if (movie == 0) {
movie = _cinema_schedule->AddMovie(key);
}
if (!theaters_url.isEmpty()) {
movie->SetTheatersUrl(theaters_url);
}
if (rating >= 0.0) {
movie->SetRate(rating);
}
Q_FOREACH (QString s, movie_spans) {
if (duration_pattern.exactMatch(s)) {
QString hours = duration_pattern.cap(2);
QString minutes = duration_pattern.cap(3);
//std::cout << "hours = " << qPrintable(hours) << ", minutes = " << qPrintable(minutes) << ",0: " << qPrintable(duration_pattern.cap(0)) << ", 1: " << qPrintable(duration_pattern.cap(1)) << std::endl;
movie->SetDuration(QTime(hours.toInt(), minutes.toInt()));
} else if (reviews_pattern.exactMatch(s)) {
// Ignore number of reviews
} else {
movie->SetComment(s);
}
}
QList schedule_times = TimesFromString(schedule);
Q_FOREACH(const QTime time, schedule_times) {
if (time.hour() < 3) {
// interpret very early times as shifted by 1 day (seems to be a Google logic)
_cinema_schedule->AddSchedule(cinema, movie, time, _date.addDays(1));
} else {
_cinema_schedule->AddSchedule(cinema, movie, time, _date);
}
}
}
}
}
state = PARSE_HTML;
} else if (state == PARSE_NEXT_PAGE_LINK) {
state = PARSE_HTML;
}
} else if (token == QXmlStreamReader::Characters) {
if (state == PARSE_MOVIE_LINK) {
//std::cout << "MOVIE " << qPrintable(xml.text().toString()) << std::endl;
movie_name = xml.text().toString();
} else if (state == PARSE_MOVIE_SPAN) {
int i = movie_spans.size()-1;
if (movie_spans[i].isEmpty()) {
movie_spans[i] = xml.text().toString();
} else if (!xml.text().isEmpty()) {
movie_spans[i] += " ";
movie_spans[i] += xml.text().toString();
}
//std::cout << " span: " << qPrintable(movie_spans[i]) << std::endl;
} else if (state == PARSE_MOVIE_TIME) {
QString t = xml.text().toString();
int i = 0;
while ((i = time_pattern.indexIn(t, i)) != -1) {
int length = time_pattern.matchedLength();
if (length > 0) {
schedule.append(t.mid(i, length));
}
i += length;
}
}
}
}
if (xml.hasError()) {
emit SearchFinished(GetSearchTaskId(), false);
std::cout << "xml error (" << xml.lineNumber() << "/" << xml.columnNumber() << "): " << qPrintable(xml.errorString()) << std::endl;
emit Error(GetSearchTaskId());
deleteLater();
} else if (!_semaphore.IsActive(GetSearchTaskId())) {
emit Cancelled(GetSearchTaskId());
emit SearchFinished(GetSearchTaskId(), false);
} else {
if (!next_page_url.isEmpty()) {
emit Reply(GetSearchTaskId(), true);
SearchEncodedUrl(next_page_url, next_page_start);
} else {
if (!_date_urls.isEmpty()) {
emit Reply(GetSearchTaskId(), true);
SearchNextDate();
} else {
emit Reply(GetSearchTaskId(), false);
emit SearchFinished(GetSearchTaskId(), true);
deleteLater();
}
}
}
reply->deleteLater();
}
SearchClientSemaphore TheaterScheduleSearchClient::_semaphore;