From 233c098702e04ca9eae7e06f2ec229f6656df26e Mon Sep 17 00:00:00 2001 From: eshe Date: Sun, 18 Jul 2010 21:45:03 +0100 Subject: [PATCH] Small fix to Das Telefonbuch code. --- src/common/dasoertliche.cpp | 325 +++++++++++++++++++++++++++++++++++++++ src/common/dasoertliche.h | 43 ++++++ src/common/dastelefonbuch.cpp | 335 +++++++++++++++++++++++++++++++++++++++++ src/common/dastelefonbuch.h | 44 ++++++ www/index.html | 6 + 5 files changed, 753 insertions(+) create mode 100755 src/common/dasoertliche.cpp create mode 100755 src/common/dasoertliche.h create mode 100755 src/common/dastelefonbuch.cpp create mode 100755 src/common/dastelefonbuch.h diff --git a/src/common/dasoertliche.cpp b/src/common/dasoertliche.cpp new file mode 100755 index 0000000..bc4596f --- /dev/null +++ b/src/common/dasoertliche.cpp @@ -0,0 +1,325 @@ +/* + * This file is part of Jenirok. + * + * Jenirok is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Jenirok is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jenirok. If not, see . + * + */ + +#include +#include "dasoertliche.h" + + +DasOertliche::DasOertliche(QObject* parent): Source(parent) +{ +} + +DasOertliche::~DasOertliche() +{ + abort(); +} + +void DasOertliche::abort() +{ + Source::abort(); + + for(int i = 0; i < pendingSearches_.size(); i++) + { + delete pendingSearches_[i]; + pendingSearches_[i] = 0; + } + + pendingSearches_.clear(); + +} + +void DasOertliche::search(Source::SearchDetails const& details) +{ + resetTimeout(); + + SearchData* newData = new SearchData; + newData->details = details; + newData->currentPage = 1; + newData->finishedSearches = 0; + + if(details.type == Source::BOTH) + { + newData->totalSearches = 2; + Source::SearchDetails tmpDetails = details; + tmpDetails.type = Source::PERSONS; + int id1 = sendQuery(tmpDetails, 1); + tmpDetails.type = Source::YELLOW_PAGES; + int id2 = sendQuery(tmpDetails, 1); + newData->searchIds.insert(id1); + newData->searchIds.insert(id2); + } + else + { + newData->totalSearches = 1; + int id = sendQuery(details, 1); + newData->searchIds.insert(id); + } + + pendingSearches_.push_back(newData); +} + +void DasOertliche::handleHttpData(int id, QByteArray const& data) +{ + QString decoded = QString::fromUtf8(data.data()); + + for(int i = 0; i < pendingSearches_.size(); i++) + { + if(pendingSearches_.at(i) && pendingSearches_.at(i)->searchIds.find(id) != + pendingSearches_.at(i)->searchIds.end()) + { + addNumbers(pendingSearches_.at(i), decoded, i); + break; + } + } +} + +void DasOertliche::handleHttpError(int id) +{ + for(int i = 0; i < pendingSearches_.size(); i++) + { + if(pendingSearches_.at(i) && pendingSearches_.at(i)->searchIds.find(id) != + pendingSearches_.at(i)->searchIds.end()) + { + + setError(Source::CONNECTION_FAILURE, http_.errorString()); + emitRequestFinished(pendingSearches_.at(i), true, i); + break; + } + } +} + +void DasOertliche::addNumbers(SearchData* searchData, + QString const& data, + int index) +{ + //qDebug() << "data: " << data; + + if(data.indexOf("
") == -1) + { + addOnlyNumber(searchData, data, index); + return; + } + + int pos = 0; + QRegExp rx("
(.*)
"); + QRegExp name("
*.*(.*)"); + QRegExp number("(.*)
"); + QRegExp address("
*(.*),(.*)
"); + + rx.setMinimal(true); + name.setMinimal(true); + address.setMinimal(true); + number.setMinimal(true); + + int maxResults = getMaxResults(); + + while((pos = rx.indexIn(data, pos)) != -1) + { + pos += rx.matchedLength(); + + if(searchData->results.size() >= maxResults) + { + break; + } + + QString part = rx.cap(1); + Source::Result result; + QString nameStr; + QString numberStr; + QString streetStr; + QString cityStr; + + if(name.indexIn(part) != -1) + { + nameStr = name.cap(1); + } + + if(address.indexIn(part) != -1) + { + streetStr = address.cap(1); + cityStr = address.cap(2); + } + + if(number.indexIn(part) != -1) + { + numberStr = number.cap(1); + } + + if(formatResult(nameStr, numberStr, streetStr, + cityStr, result)) + { + emit resultAvailable(result, searchData->details); + searchData->results.push_back(result); + } + + } + + searchData->finishedSearches++; + + if(searchData->results.size() >= maxResults) + { + emitRequestFinished(searchData, false, index); + } + else + { + if(data.indexOf("Neste") > 0) + { + searchData->currentPage++; + int id = sendQuery(searchData->details, searchData->currentPage); + searchData->searchIds.insert(id); + } + else if(searchData->finishedSearches >= searchData->totalSearches) + { + emitRequestFinished(searchData, false, index); + } + } + +} + +void DasOertliche::addOnlyNumber(SearchData* searchData, + QString const& data, + int index) +{ + QRegExp name("
.*(.*)"); + QRegExp number("(.*)
"); + QRegExp address("
(.*),(.*)
"); + name.setMinimal(true); + number.setMinimal(true); + address.setMinimal(true); + + Source::Result result; + + QString nameStr; + QString numberStr; + QString streetStr; + QString cityStr; + + if(name.indexIn(data) != -1) + { + nameStr = name.cap(1); + } + + if(number.indexIn(data) != -1) + { + numberStr = number.cap(1); + } + + if(address.indexIn(data) != -1) + { + streetStr = address.cap(1); + cityStr = address.cap(2); + } + + if(formatResult(nameStr, numberStr, streetStr, + cityStr, result)) + { + searchData->results.push_back(result); + emit resultAvailable(result, searchData->details); + } + + emitRequestFinished(searchData, false, index); +} + +bool DasOertliche::formatResult(QString& name, QString& number, + QString& street, QString& city, + Source::Result& result) +{ + name = stripTags(name); + name = htmlEntityDecode(name); + result.name = name.trimmed(); + number = stripTags(number); + number = cleanUpNumber(number); + result.number = number.trimmed(); + street = stripTags(street); + street = htmlEntityDecode(street); + city = stripTags(city); + city = htmlEntityDecode(city); + result.street = street.trimmed(); + result.city = city.trimmed(); + + if(!result.name.isEmpty() && (!getFindNumber() || !result.number.isEmpty())) + { + return true; + } + + return false; +} + +void DasOertliche::emitRequestFinished(SearchData* data, + bool error, int index) +{ + QVector results = data->results; + Source::SearchDetails details = data->details; + + emit requestFinished(results, details, error); + delete pendingSearches_[index]; + pendingSearches_[index] = 0; + pendingSearches_.removeAt(index); +} + +int DasOertliche::sendQuery(Source::SearchDetails const& details, + int page) +{ + QUrl url("http://www.dasoertliche.de/"); + QString query = details.query; + + if(isPhoneNumber(query)) + { + url.addQueryItem("form_name","search_inv"); + url.addQueryItem("ph",query); + } + else + { + url.addQueryItem("form_name","search_reg"); + url.addQueryItem("kw",query); + //More details? + if(!details.location.isEmpty()) + { + url.addQueryItem("ci",details.location); + } + } + + /* TODO No differentiation between personal and professional entries yet + if(details.type == Source::YELLOW_PAGES) + { + url.addQueryItem("t", "c"); + } + else + { + url.addQueryItem("t", "p"); + } + */ + + /* TODO No multi-page results yet. + if(page > 1) + { + url.addQueryItem("p", QString::number(page)); + } + */ + + fixUrl(url); + + //They want us to have a name, therefore setting User-Agent here... + QHttpRequestHeader header("GET", url.encodedPath()+ '?' + url.encodedQuery()); + header.setValue("User-Agent","Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.0.10) Gecko/2009042316 Firefox/3.0.10"); + header.setValue("Host", url.encodedHost()); + + http_.setHost(url.host(), url.port(80)); + + return http_.request(header); +} diff --git a/src/common/dasoertliche.h b/src/common/dasoertliche.h new file mode 100755 index 0000000..159d609 --- /dev/null +++ b/src/common/dasoertliche.h @@ -0,0 +1,43 @@ +#ifndef DASTELEFONBUCH_H +#define DASTELEFONBUCH_H + +#include +#include +#include +#include +#include "source.h" + +class DasOertliche : public Source +{ +public: + DasOertliche(QObject* parent = 0); + ~DasOertliche(); + virtual void abort(); + virtual void search(Source::SearchDetails const& details); + +private: + + struct SearchData + { + QSet searchIds; + Source::SearchDetails details; + QVector results; + int currentPage; + int totalSearches; + int finishedSearches; + }; + + void addNumbers(SearchData* searchData, QString const& data, int index); + void addOnlyNumber(SearchData* searchData, QString const& data, int index); + bool formatResult(QString& name, QString& number, QString& street, QString& city, Source::Result& result); + void emitRequestFinished(SearchData* data, bool error, int index); + int sendQuery(Source::SearchDetails const& details, int page = 1); + + QList pendingSearches_; + + Q_DISABLE_COPY(DasOertliche); + virtual void handleHttpData(int id, QByteArray const& data); + virtual void handleHttpError(int id); +}; + +#endif // DASTELEFONBUCH_H diff --git a/src/common/dastelefonbuch.cpp b/src/common/dastelefonbuch.cpp new file mode 100755 index 0000000..da890f8 --- /dev/null +++ b/src/common/dastelefonbuch.cpp @@ -0,0 +1,335 @@ +/* + * This file is part of Jenirok. + * + * Jenirok is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Jenirok is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jenirok. If not, see . + * + */ + +#include +#include "dastelefonbuch.h" + + +DasTelefonbuch::DasTelefonbuch(QObject* parent): Source(parent) +{ +} + +DasTelefonbuch::~DasTelefonbuch() +{ + abort(); +} + +void DasTelefonbuch::abort() +{ + Source::abort(); + + for(int i = 0; i < pendingSearches_.size(); i++) + { + delete pendingSearches_[i]; + pendingSearches_[i] = 0; + } + + pendingSearches_.clear(); +} + +void DasTelefonbuch::search(Source::SearchDetails const& details) +{ + resetTimeout(); + + SearchData* newData = new SearchData; + newData->details = details; + newData->currentPage = 1; + newData->finishedSearches = 0; + + if(details.type == Source::BOTH) + { + newData->totalSearches = 2; + Source::SearchDetails tmpDetails = details; + tmpDetails.type = Source::PERSONS; + int id1 = sendQuery(tmpDetails, 1); + tmpDetails.type = Source::YELLOW_PAGES; + int id2 = sendQuery(tmpDetails, 1); + newData->searchIds.insert(id1); + newData->searchIds.insert(id2); + } + else + { + newData->totalSearches = 1; + int id = sendQuery(details, 1); + newData->searchIds.insert(id); + } + + pendingSearches_.push_back(newData); +} + +void DasTelefonbuch::handleHttpData(int id, QByteArray const& data) +{ + QString strData(data); + + for(int i = 0; i < pendingSearches_.size(); i++) + { + if(pendingSearches_.at(i) && pendingSearches_.at(i)->searchIds.find(id) != + pendingSearches_.at(i)->searchIds.end()) + { + addNumbers(pendingSearches_.at(i), strData, i); + break; + } + } +} + +void DasTelefonbuch::handleHttpError(int id) +{ + for(int i = 0; i < pendingSearches_.size(); i++) + { + if(pendingSearches_.at(i) && pendingSearches_.at(i)->searchIds.find(id) != + pendingSearches_.at(i)->searchIds.end()) + { + + setError(Source::CONNECTION_FAILURE, http_.errorString()); + emitRequestFinished(pendingSearches_.at(i), true, i); + break; + } + } +} + +void DasTelefonbuch::addNumbers(SearchData* searchData, + QString const& data, + int index) +{ + if(data.indexOf("1 Gesamttreffer") > 0) + { + addOnlyNumber(searchData, data, index); + return; + } + + int pos = 0; + QRegExp rx(""); + QRegExp name("
(.*)"); + QRegExp number("(.*)"); + QRegExp address("(.*).*(.*)"); + + rx.setMinimal(true); + name.setMinimal(true); + address.setMinimal(true); + number.setMinimal(true); + + int maxResults = getMaxResults(); + + while((pos = rx.indexIn(data, pos)) != -1) + { + pos += rx.matchedLength(); + + if(searchData->results.size() >= maxResults) + { + break; + } + + QString part = rx.cap(1); + Source::Result result; + QString nameStr; + QString numberStr; + QString streetStr; + QString cityStr; + + if(name.indexIn(part) != -1) + { + nameStr = name.cap(1); + } + + if(address.indexIn(part) != -1) + { + streetStr = address.cap(1); + cityStr = address.cap(2); + } + + if(number.indexIn(part) != -1) + { + numberStr = number.cap(1); + } + + if(formatResult(nameStr, numberStr, streetStr, + cityStr, result)) + { + emit resultAvailable(result, searchData->details); + searchData->results.push_back(result); + } + + } + + searchData->finishedSearches++; + + if(searchData->results.size() >= maxResults) + { + emitRequestFinished(searchData, false, index); + } + else + { + /* TODO: Paging not implemented yet + if(data.indexOf("Neste") > 0) + { + searchData->currentPage++; + int id = sendQuery(searchData->details, searchData->currentPage); + searchData->searchIds.insert(id); + } + */ + if (false) + { + } + else if(searchData->finishedSearches >= searchData->totalSearches) + { + emitRequestFinished(searchData, false, index); + } + } + +} + +void DasTelefonbuch::addOnlyNumber(SearchData* searchData, + QString const& data, + int index) +{ + QRegExp name("
(.*) (.*)"); + QRegExp number("(.*)"); + QRegExp address("(.*).*(.*)"); + name.setMinimal(true); + number.setMinimal(true); + address.setMinimal(true); + + Source::Result result; + + QString nameStr; + QString numberStr; + QString streetStr; + QString cityStr; + + if(name.indexIn(data) != -1) + { + nameStr = name.cap(2).simplified() + name.cap(1).simplified(); + nameStr.replace("\n",""); + } + + if(number.indexIn(data) != -1) + { + numberStr = number.cap(1); + } + + if(address.indexIn(data) != -1) + { + streetStr = address.cap(1); + cityStr = address.cap(2); + } + + if(formatResult(nameStr, numberStr, streetStr, + cityStr, result)) + { + searchData->results.push_back(result); + emit resultAvailable(result, searchData->details); + } + + emitRequestFinished(searchData, false, index); +} + +bool DasTelefonbuch::formatResult(QString& name, QString& number, + QString& street, QString& city, + Source::Result& result) +{ + name = stripTags(name); + name = htmlEntityDecode(name); + result.name = name.trimmed(); + number = stripTags(number); + number = cleanUpNumber(number); + result.number = number.trimmed(); + street = stripTags(street); + street = htmlEntityDecode(street); + city = stripTags(city); + city = htmlEntityDecode(city); + result.street = street.trimmed(); + result.city = city.trimmed(); + + if(!result.name.isEmpty() && (!getFindNumber() || !result.number.isEmpty())) + { + return true; + } + return false; +} + +void DasTelefonbuch::emitRequestFinished(SearchData* data, + bool error, int index) +{ + QVector results = data->results; + Source::SearchDetails details = data->details; + + emit requestFinished(results, details, error); + delete pendingSearches_[index]; + pendingSearches_[index] = 0; + pendingSearches_.removeAt(index); +} + +int DasTelefonbuch::sendQuery(Source::SearchDetails const& details, + int page) +{ + Q_UNUSED(page); + + QUrl url("http://www.dastelefonbuch.de/"); + + //Pretending we are a Firefox-Plugin allows a simpler query-String + url.addQueryItem("sourceid","Mozilla-search"); + //But we will need to give a proper User-Agent-String...see below + + url.addQueryItem("cmd","search"); + + QString query = details.query; + + if(!details.location.isEmpty()) + { + query += "+" + details.location; + } + + //Query is added as "kw" + url.addQueryItem("kw", query); + + /* TODO No differentiation between personal and professional entries yet + if(details.type == Source::YELLOW_PAGES) + { + url.addQueryItem("t", "c"); + } + else + { + url.addQueryItem("t", "p"); + } + */ + + /* TODO No multi-page results yet. + if(page > 1) + { + url.addQueryItem("p", QString::number(page)); + } + */ + + fixUrl(url); + + //Remember, we are firefox, therefore setting User-Agent here... + QHttpRequestHeader header("GET", url.encodedPath()+ '?' + url.encodedQuery()); + + header.setValue("User-Agent","Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.0.10) Gecko/2009042316 Firefox/3.0.10"); + header.setValue("Host", url.encodedHost()); + + http_.setHost(url.host(), url.port(80)); + + return http_.request(header); +} + +void DasTelefonbuch::getSearchTypes(QList& types) const +{ + types.clear(); + types.push_back(BOTH); +} diff --git a/src/common/dastelefonbuch.h b/src/common/dastelefonbuch.h new file mode 100755 index 0000000..8215305 --- /dev/null +++ b/src/common/dastelefonbuch.h @@ -0,0 +1,44 @@ +#ifndef DASTELEFONBUCH_H +#define DASTELEFONBUCH_H + +#include +#include +#include +#include +#include "source.h" + +class DasTelefonbuch : public Source +{ +public: + DasTelefonbuch(QObject* parent = 0); + ~DasTelefonbuch(); + virtual void abort(); + virtual void search(Source::SearchDetails const& details); + +private: + + struct SearchData + { + QSet searchIds; + Source::SearchDetails details; + QVector results; + int currentPage; + int totalSearches; + int finishedSearches; + }; + + void addNumbers(SearchData* searchData, QString const& data, int index); + void addOnlyNumber(SearchData* searchData, QString const& data, int index); + bool formatResult(QString& name, QString& number, QString& street, QString& city, Source::Result& result); + void emitRequestFinished(SearchData* data, bool error, int index); + int sendQuery(Source::SearchDetails const& details, int page = 1); + virtual void getSearchTypes(QList& types) const; + + QList pendingSearches_; + + Q_DISABLE_COPY(DasTelefonbuch); + virtual void handleHttpData(int id, QByteArray const& data); + virtual void handleHttpError(int id); +}; + +#endif // DASTELEFONBUCH_H diff --git a/www/index.html b/www/index.html index 8a888e2..670c2bd 100644 --- a/www/index.html +++ b/www/index.html @@ -61,6 +61,12 @@ make fakeroot make install +

Thanks

+

Konrad Feldmeier - Das Telefonbuch implementation

+

Eirik Lindem and Jørgen Sogn - Norwegian translation

+

Torbjörn Svangård - Swedish translation

+ +

Copyright (C) 2010 Jesse Hakanen

-- 1.7.9.5