// Copyright 2010 Jochen Becher
//
// This file is part of MovieSchedule.
//
// MovieSchedule is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// MovieSchedule is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with MovieSchedule. If not, see .
#include "movieschedulesearchclient.h"
#include "data/cinemaschedule.h"
#include "data/cinema.h"
#include "data/scheduleentry.h"
#include "data/movie.h"
#include "utils/timeutils.h"
#include "utils/assertedlocker.h"
#include
#include
#include
#include
MovieScheduleSearchClient::MovieScheduleSearchClient(CinemaSchedule *cinema_schedule, QObject *parent)
: AbstractSearchClient(parent),
_cinema_schedule(cinema_schedule)
{
}
void MovieScheduleSearchClient::SearchSchedule(const MovieKey &movie_key, const QString &url)
{
setObjectName(QString("MovieScheduleSearchClient:%1").arg(movie_key.GetName()));
_semaphore.Activate(GetSearchTaskId());
_movie_key = movie_key;
_date = QDate::currentDate();
_url = QUrl::fromEncoded(QString("http://www.google.com" + url).toAscii(), QUrl::TolerantMode);
_dates_seen.clear();
_dates_seen.insert("0");
_date_urls.clear();
Search(0);
}
void MovieScheduleSearchClient::CancelAllRunningSearchs()
{
_semaphore.CancelAll();
}
void MovieScheduleSearchClient::Search(int start)
{
AbstractSearchClient::Search(_url, start);
}
void MovieScheduleSearchClient::SearchNextDate()
{
if (_date_urls.isEmpty()) {
return;
}
QPair pair = _date_urls.dequeue();
_date = pair.second;
_url = pair.first;
Search(0);
}
enum State {
PARSE_HTML,
PARSE_DATE_LINK,
PARSE_THEATER_DIV,
PARSE_THEATER_LINK,
PARSE_PHONE_LINK,
PARSE_LINK,
PARSE_BR,
PARSE_SPAN,
PARSE_NEXT_PAGE_LINK
};
void MovieScheduleSearchClient::ReplyFinished(QNetworkReply *reply)
{
//std::cout << "REPLY" << std::endl;
//std::cout << reply->readAll().data() << std::endl;
QXmlStreamReader xml(reply);
State state = PARSE_HTML;
int found = 0;
QString theater_name;
QString theater_address;
QString theater_phone;
QList schedule;
QRegExp time_pattern("\\d+:\\d+([aApP][mM])*");
QString next_page_url;
int next_page_start;
while (!xml.atEnd()) {
QXmlStreamReader::TokenType token = xml.readNext();
if (token == QXmlStreamReader::StartElement) {
QString attr_href = xml.attributes().value("href").toString();
//std::cout << qPrintable(xml.name().toString()) << ", class " << qPrintable(attr_class) << ", href " << qPrintable(attr_href) << std::endl;
if (state == PARSE_HTML && xml.name() == "a" && attr_href.startsWith("/m/movies")) {
QUrl url = QUrl::fromEncoded(QString("http://www.google.com" + attr_href).toAscii(), QUrl::TolerantMode);
//std::cout << "LINK " << qPrintable(attr_href) << std::endl;
if (url.hasQueryItem("date")) {
QString v = url.queryItemValue("date");
//std::cout << "FOUND Date Link " << qPrintable(v) << " from " << qPrintable(url.toString()) << std::endl;
if (!_dates_seen.contains(v)) {
// TODO replace location with user selected location (Google simplifies to much)
_dates_seen.insert(v);
_date_urls.append(qMakePair(url, QDate::currentDate().addDays(v.toInt())));
}
state = PARSE_DATE_LINK;
} else if (url.hasQueryItem("tid")) {
theater_name = "";
theater_address = "";
theater_phone = "";
schedule.clear();
state = PARSE_THEATER_LINK;
} else if (url.hasQueryItem("start")) {
QString sort = url.queryItemValue("sort");
QString start = url.queryItemValue("start");
int istart = start.toInt();
if (sort == "0" && istart > GetStartIndex()) {
//std::cout << "next page LINK " << qPrintable(attr_href) << std::endl;
next_page_url = attr_href;
next_page_start = istart;
}
state = PARSE_NEXT_PAGE_LINK;
} else {
state = PARSE_HTML;
}
} else if (state == PARSE_THEATER_DIV && xml.name() == "a") {
if (attr_href.startsWith("wtai:")) {
state = PARSE_PHONE_LINK;
} else {
state = PARSE_LINK;
}
} else if (state == PARSE_THEATER_DIV && xml.name() == "br") {
state = PARSE_BR;
} else if (state == PARSE_THEATER_DIV && xml.name() == "span") {
state = PARSE_SPAN;
} else {
state = PARSE_HTML;
}
} else if (token == QXmlStreamReader::EndElement) {
if (state == PARSE_DATE_LINK) {
state = PARSE_HTML;
} else if (state == PARSE_THEATER_LINK) {
state = PARSE_THEATER_DIV;
} else if (state == PARSE_BR) {
state = PARSE_THEATER_DIV;
} else if (state == PARSE_SPAN) {
state = PARSE_THEATER_DIV;
} else if (state == PARSE_LINK) {
state = PARSE_THEATER_DIV;
} else if (state == PARSE_PHONE_LINK) {
state = PARSE_THEATER_DIV;
} else if (state == PARSE_THEATER_DIV) {
state = PARSE_HTML;
if (!theater_name.isEmpty()) {
++found;
if (!schedule.isEmpty()) {
AssertedWriteLocker locker(_cinema_schedule->GetLock());
if (!_semaphore.IsActive(GetSearchTaskId())) {
break;
}
const Movie *movie = _cinema_schedule->FindMovie(_movie_key);
if (movie != 0) {
//std::cout << "ADD SCHEDULE " << qPrintable(theater_name) << ", " << qPrintable(theater_address) << std::endl;
CinemaKey key(theater_name, theater_address);
Cinema *cinema = _cinema_schedule->FindCinema(key);
if (cinema == 0) {
cinema = _cinema_schedule->AddCinema(key);
}
if (!theater_phone.isEmpty()) {
cinema->SetTelephone(theater_phone);
}
QList schedule_times = TimesFromString(schedule);
Q_FOREACH(const QTime time, schedule_times) {
if (time.hour() < 3) {
// interpret very early times as shifted by 1 day (seems to be a Google logic)
_cinema_schedule->AddSchedule(cinema, movie, time, _date.addDays(1));
} else {
_cinema_schedule->AddSchedule(cinema, movie, time, _date);
}
}
}
}
}
} else if (state == PARSE_NEXT_PAGE_LINK) {
state = PARSE_HTML;
}
} else if (token == QXmlStreamReader::Characters) {
if (state == PARSE_THEATER_LINK) {
//std::cout << "name " << qPrintable(xml.text().toString()) << std::endl;
theater_name = xml.text().toString();
} else if (state == PARSE_PHONE_LINK) {
//std::cout << "phone " << qPrintable(xml.text().toString()) << std::endl;
theater_phone = xml.text().toString();
} else if (state == PARSE_SPAN) {
QString t = xml.text().toString();
int i = 0;
bool found = false;
while ((i = time_pattern.indexIn(t, i)) != -1) {
int length = time_pattern.matchedLength();
//std::cout << "time " << qPrintable(t.mid(i, length)) << std::endl;
if (length > 0) {
schedule.append(t.mid(i, length));
}
i += length;
found = true;
}
if (!found) {
//std::cout << "address " << qPrintable(t) << std::endl;
theater_address = t;
}
}
}
}
if (xml.hasError()) {
emit SearchFinished(GetSearchTaskId(), false);
std::cout << "xml error (" << xml.lineNumber() << "/" << xml.columnNumber() << "): " << qPrintable(xml.errorString()) << std::endl;
emit Error(GetSearchTaskId());
deleteLater();
} else if (!_semaphore.IsActive(GetSearchTaskId())) {
emit Cancelled(GetSearchTaskId());
emit SearchFinished(GetSearchTaskId(), false);
deleteLater();
} else {
if (!next_page_url.isEmpty()) {
emit Reply(GetSearchTaskId(), true);
SearchEncodedUrl(next_page_url, next_page_start);
} else {
if (!_date_urls.isEmpty()) {
SearchNextDate();
} else {
emit Reply(GetSearchTaskId(), false);
emit SearchFinished(GetSearchTaskId(), true);
deleteLater();
}
}
}
reply->deleteLater();
//std::cout << "REPLY FINISHED" << std::endl;
}
SearchClientSemaphore MovieScheduleSearchClient::_semaphore;