Finnish Eniro search fixed. Changelog updated. Some modifications to control file.
[jenirok] / src / common / eniro.cpp
index 56f27fe..85d9fc3 100644 (file)
 
 namespace
 {
-    static const QString SITE_URLS[] =
+    static const QString SITE_URLS[Eniro::SITE_COUNT] =
     {
-        "http://wap.eniro.fi/",
-        "http://wap.eniro.se/",
-        "http://wap.eniro.dk/"
+            "http://wap.eniro.fi/",
+            "http://wap.eniro.se/",
+            "http://wap.eniro.dk/"
     };
 
-    const QString INVALID_LOGIN_STRING = "Invalid login details";
-    const QString PERSON_REGEXP = "<td class=\"hTd2\">(.*)<b>(.*)</td>";
-    const QString YELLOW_REGEXP = "<td class=\"hTd2\">(.*)<span class=\"gray\"\\}>(.*)</td>";
-    const QString NUMBER_REGEXP = "<div class=\"callRow\">(.*)</div>";
-    const QString LOGIN_CHECK = "<input class=\"inpTxt\" id=\"loginformUsername\"";
-}
+    static const QString SITE_NAMES[Eniro::SITE_COUNT] =
+    {
+         "finnish",
+         "swedish",
+         "danish"
+    };
 
-// Regexp used to remove numbers from string
-QRegExp Eniro::numberCleaner_ = QRegExp("([^0-9]+)");
+    static const QString SITE_IDS[Eniro::SITE_COUNT] =
+    {
+         "fi",
+         "se",
+         "dk"
+    };
 
-// Removes html tags from string
-QRegExp Eniro::tagStripper_ = QRegExp("<([^>]+)>");
+    static const QString INVALID_LOGIN_STRING = "Invalid login details";
+    static const QString TIMEOUT_STRING = "Request timed out";
+    static const QString PERSON_REGEXP = "<td class=\"hTd2\">(.*)<b>(.*)</td>";
+    static const QString YELLOW_REGEXP = "<td class=\"hTd2\">(.*)<span class=\"gray\">(.*)</td>|<td class=\"hTd2\">(.*)<span class=\"bold\"\\}>(.*)</td>";
+    static const QString SINGLE_REGEXP = "<div class=\"header\">(.*)</div>(.*)<div class=\"callRow\">(.*)(<div class=\"block\">|</p>(.*)<br/>|</p>(.*)<br />)";
+    static const QString NUMBER_REGEXP = "<div class=\"callRow\">(.*)</div>";
+    static const QString LOGIN_CHECK = "<input class=\"inpTxt\" id=\"loginformUsername\"";
+}
 
-Eniro::Eniro(Site site, QObject *parent): QObject(parent), site_(site),
-username_(""), password_(""), loggedIn_(false), error_(NO_ERROR),
-errorString_(""), maxResults_(10), findNumber_(true),
-pendingSearches_(), pendingNumberRequests_()
+Eniro::Eniro(QObject *parent): Source(parent), site_(Eniro::FI),
+loggedIn_(false), username_(""), password_(""),
+timerId_(0), pendingSearches_(), pendingNumberRequests_()
 {
-    connect(&http_, SIGNAL(requestFinished(int, bool)), this, SLOT(httpReady(int, bool)));
 }
 
 Eniro::~Eniro()
 {
-    abort();
 }
 
 void Eniro::abort()
 {
-    http_.abort();
+    Source::abort();
 
     for(searchMap::iterator sit = pendingSearches_.begin();
-        sit != pendingSearches_.end(); sit++)
+    sit != pendingSearches_.end(); sit++)
     {
         if(sit.value() != 0)
         {
@@ -71,7 +78,7 @@ void Eniro::abort()
     pendingSearches_.clear();
 
     for(numberMap::iterator nit = pendingNumberRequests_.begin();
-        nit != pendingNumberRequests_.end(); nit++)
+    nit != pendingNumberRequests_.end(); nit++)
     {
         if(nit.value() != 0)
         {
@@ -84,19 +91,33 @@ void Eniro::abort()
     pendingLoginRequests_.clear();
 }
 
-void Eniro::setMaxResults(unsigned int value)
+void Eniro::setSite(Eniro::Site site)
 {
-    maxResults_ = value;
+    site_ = site;
 }
 
-void Eniro::setFindNumber(bool value)
+void Eniro::timerEvent(QTimerEvent* t)
 {
-       findNumber_ = value;
-}
+    Q_UNUSED(t);
 
-void Eniro::setSite(Eniro::Site site)
-{
-    site_ = site;
+    int currentId = http_.currentId();
+
+    if(currentId)
+    {
+        searchMap::const_iterator it = pendingSearches_.find(currentId);
+
+        if(it != pendingSearches_.end())
+        {
+            QVector <Eniro::Result> results = it.value()->results;
+            SearchDetails details = it.value()->details;
+
+            abort();
+
+            setError(TIMEOUT, TIMEOUT_STRING);
+
+            emit requestFinished(results, details, true);
+        }
+    }
 }
 
 void Eniro::login(QString const& username,
@@ -114,23 +135,14 @@ void Eniro::logout()
     loggedIn_ = false;
 }
 
-void Eniro::testLogin()
+void Eniro::search(SearchDetails const& details)
 {
-    QUrl url = createUrl("", "");
+    resetTimeout();
 
-    url.addQueryItem("what", "mobwp");
-    http_.setHost(url.host(), url.port(80));
-    int id = http_.get(url.encodedPath() + '?' + url.encodedQuery());
-
-    pendingLoginRequests_.insert(id);
-}
-
-bool Eniro::search(SearchDetails const& details)
-{
     SearchType type = details.type;
 
     // Only logged in users can use other than person search
-    if(!loggedIn_)
+    if(!loggedIn_ && site_ == FI)
     {
         type = PERSONS;
     }
@@ -138,7 +150,13 @@ bool Eniro::search(SearchDetails const& details)
     QUrl url = createUrl(details.query, details.location);
     QString what;
 
-    if(loggedIn_)
+    // We must use full search instead of wap page because wap search is currently not
+    // working for persons
+    if(loggedIn_ && type == PERSONS && site_ == FI && getMaxResults() > 1)
+    {
+        what = "wp";
+    }
+    else if(loggedIn_ || site_ != FI)
     {
         switch(type)
         {
@@ -152,6 +170,7 @@ bool Eniro::search(SearchDetails const& details)
 
         default:
             what = "moball";
+            break;
         }
 
     }
@@ -165,7 +184,9 @@ bool Eniro::search(SearchDetails const& details)
     http_.setHost(url.host(), url.port(80));
     int id = http_.get(url.encodedPath() + '?' + url.encodedQuery());
 
-    QVector <Result> results;
+    //qDebug() << "Url: " << url.host() << url.encodedPath() << "?" << url.encodedQuery();
+
+    QVector <Source::Result> results;
 
     // Store search data for later identification
     SearchData* newData = new SearchData;
@@ -177,111 +198,103 @@ bool Eniro::search(SearchDetails const& details)
     // Store request id so that it can be identified later
     pendingSearches_[id] = newData;
 
-    return true;
-}
-
-Eniro::Error Eniro::error() const
-{
-    return error_;
-}
-
-const QString& Eniro::errorString() const
-{
-    return errorString_;
 }
 
-void Eniro::httpReady(int id, bool error)
+void Eniro::handleHttpData(int id, QByteArray const& data)
 {
-    if(error)
-    {
-        qDebug() << "Error: " << http_.errorString();
-    }
-
     searchMap::const_iterator searchIt;
     numberMap::const_iterator numberIt;
 
     // Check if request is pending search request
     if((searchIt = pendingSearches_.find(id)) !=
-       pendingSearches_.end())
+        pendingSearches_.end())
     {
-        if(error)
+        if(data.isEmpty())
         {
-            error_ = CONNECTION_FAILURE;
-            errorString_ = http_.errorString();
+            setError(CONNECTION_FAILURE, "Server returned empty data");
             emitRequestFinished(id, searchIt.value(), true);
             return;
         }
 
-        QString result(http_.readAll());
-
         // Load results from html data
-        loadResults(id, result);
+        loadResults(id, data);
     }
 
     // Check if request is pending number requests
     else if((numberIt = pendingNumberRequests_.find(id)) !=
-            pendingNumberRequests_.end())
+        pendingNumberRequests_.end())
     {
-        if(error)
+        if(data.isEmpty())
         {
-            error_ = CONNECTION_FAILURE;
-            errorString_ = http_.errorString();
-            delete pendingNumberRequests_[id];
-            pendingNumberRequests_.remove(id);
+            setError(CONNECTION_FAILURE, "Server returned empty data");
+            emitRequestFinished(id, searchIt.value(), true);
             return;
         }
 
-        QString result(http_.readAll());
-
         // Load number from html data
-        loadNumber(id, result);
+        loadNumber(id, data);
     }
 
     // Check for login request
     else if(pendingLoginRequests_.find(id) !=
-            pendingLoginRequests_.end())
+        pendingLoginRequests_.end())
     {
-       bool success = true;
-
-       if(!error)
-       {
-               QString result(http_.readAll());
-
-               // If html source contains LOGIN_CHECK, login failed
-               if(result.indexOf(LOGIN_CHECK) != -1)
-               {
-                       success = false;
-               }
-       }
-       else
-       {
-               success = false;
-       }
+        bool success = true;
+
+        // If html source contains LOGIN_CHECK, login failed
+        if(data.indexOf(LOGIN_CHECK) != -1)
+        {
+            success = false;
+        }
 
         emit loginStatus(success);
     }
 
 }
 
+void Eniro::handleHttpError(int id)
+{
+    searchMap::const_iterator searchIt;
+    numberMap::const_iterator numberIt;
+
+    // Check if request is pending search request
+    if((searchIt = pendingSearches_.find(id)) !=
+        pendingSearches_.end())
+    {
+        setError(CONNECTION_FAILURE, http_.errorString());
+        emitRequestFinished(id, searchIt.value(), true);
+    }
+
+    // Check if request is pending number requests
+    else if((numberIt = pendingNumberRequests_.find(id)) !=
+        pendingNumberRequests_.end())
+    {
+        setError(CONNECTION_FAILURE, http_.errorString());
+        delete pendingNumberRequests_[id];
+        pendingNumberRequests_.remove(id);
+    }
+
+    // Check for login request
+    else if(pendingLoginRequests_.find(id) !=
+        pendingLoginRequests_.end())
+    {
+        emit loginStatus(false);
+    }
+}
+
 // Loads results from html source code
 void Eniro::loadResults(int id, QString const& httpData)
 {
     searchMap::iterator it = pendingSearches_.find(id);
-    QString expr;
 
-    switch(it.value()->details.type)
+    // Finnish person search is not working in wap mode so we have to use different type of loading
+    if(getMaxResults() > 1 && loggedIn_ && site_ == FI && it.value()->details.type == PERSONS)
     {
-      case YELLOW_PAGES:
-        expr = YELLOW_REGEXP;
-        break;
-      case PERSONS:
-        expr = PERSON_REGEXP;
-        break;
-      default:
+        loadFinnishPersonResults(id, httpData);
         return;
     }
 
-    QRegExp rx(expr);
+    QRegExp rx("((" + YELLOW_REGEXP + ")|(" + PERSON_REGEXP + ")|(" + SINGLE_REGEXP + "))");
     rx.setMinimal(true);
 
     bool requestsPending = false;
@@ -291,15 +304,17 @@ void Eniro::loadResults(int id, QString const& httpData)
     // Find all matches
     while((pos = rx.indexIn(httpData, pos)) != -1)
     {
-       pos += rx.matchedLength();
+        pos += rx.matchedLength();
 
-       data = rx.cap(2);
-       data = stripTags(data);
-       QStringList rows = data.split('\n');
+        data = rx.cap(1);
 
-       for(int i = 0; i < rows.size(); i++)
-       {
-           // Remove white spaces
+        data = stripTags(data);
+
+        QStringList rows = data.split('\n');
+
+        for(int i = 0; i < rows.size(); i++)
+        {
+            // Remove white spaces
             QString trimmed = rows.at(i).trimmed().toLower();
 
             // Remove empty strings
@@ -310,71 +325,124 @@ void Eniro::loadResults(int id, QString const& httpData)
             }
             else
             {
-               // Convert words to uppercase
+                // Convert words to uppercase
                 rows[i] = ucFirst(trimmed);
             }
-       }
+        }
+
+        Result result;
+
+        switch(site_)
+        {
+        case FI:
+            result.country = "Finland";
+            break;
+        case SE:
+            result.country = "Sweden";
+            break;
+        case DK:
+            result.country = "Denmark";
+            break;
+        }
 
-       Result result;
+        int size = rows.size();
+
+        switch(size)
+        {
+        case 1:
+            result.name = rows[0];
+            break;
+
+        case 2:
+            result.name = rows[0];
+            result.city = rows[1];
+            break;
+
+        case 3:
+            if(isPhoneNumber(rows[1]))
+            {
+                result.name = rows[0];
+                result.number = cleanUpNumber(rows[1]);
+                result.city = rows[2];
+            }
+            else
+            {
+                result.name = rows[0];
+                result.street = rows[1];
+                result.city = rows[2];
+            }
+            break;
 
-       int size = rows.size();
+        case 4:
+            result.name = rows[0];
+            // Remove slashes and spaces from number
+            result.number = cleanUpNumber(rows[1]);
+            result.street = rows[2];
+            result.city = rows[3];
+            break;
 
-       switch(size)
-       {
-         case 1:
-           result.name = rows[0];
-           break;
+        default:
+            bool ok = false;
 
-         case 2:
-           result.name = rows[0];
-           result.city = rows[1];
-           break;
+            for(int a = 0; a < size && a < 8; a++)
+            {
+                if(isPhoneNumber(rows[a]))
+                {
+                    result.name = rows[0];
+                    result.number = cleanUpNumber(rows[a]);
+
+                    for(int i = a + 1; i < size && i < 8; i++)
+                    {
+                        if(!isPhoneNumber(rows[i]) && size > i + 1 && isStreet(rows[i]))
+                        {
+                            result.street = rows[i];
+                            result.city = rows[i+1];
+                            ok = true;
+                            break;
+                        }
+                    }
+
+                }
 
-         case 3:
-           result.name = rows[0];
-           result.street = rows[1];
-           result.city = rows[2];
-           break;
+            }
 
-         case 4:
-           result.name = rows[0];
-           // Remove slashes and spaces from number
-           result.number = cleanUpNumber(rows[1]);
-           result.street = rows[2];
-           result.city = rows[3];
-           break;
+            if(ok)
+            {
+                break;
+            }
 
-         default:
-           continue;
+            continue;
 
-       }
+        }
 
-       it.value()->results.push_back(result);
+        it.value()->results.push_back(result);
 
-       unsigned int foundResults = ++(it.value()->numbersTotal);
+        unsigned int foundResults = ++(it.value()->numbersTotal);
 
-       // If phone number searh is enabled, we have to make another
-       // request to find it out
-       if(findNumber_ && size < 4 && loggedIn_ &&
-          it.value()->details.type != YELLOW_PAGES)
-       {
+        // If phone number search is enabled, we have to make another
+        // request to find it out
+        if(getFindNumber() && size < 4 && (loggedIn_ || site_ != FI) &&
+                it.value()->details.type != YELLOW_PAGES)
+        {
             requestsPending = true;
             getNumberForResult(id, it.value()->results.size() - 1, it.value()->details);
-       }
-       // Otherwise result is ready
-       else
-       {
+        }
+        // Otherwise result is ready
+        else
+        {
             emit resultAvailable(result, it.value()->details);
-       }
+        }
 
-       // Stop searching if max results is reached
-       if(maxResults_ && (foundResults >= maxResults_))
-       {
-           break;
-       }
+        unsigned int maxResults = getMaxResults();
+
+        // Stop searching if max results is reached
+        if(maxResults && (foundResults >= maxResults))
+        {
+            break;
+        }
     }
 
-    // If number there were no results or no phone numbers needed to
+    // If there were no results or no phone numbers needed to
     // be fetched, the whole request is ready
     if(it.value()->numbersTotal == 0 || !requestsPending)
     {
@@ -382,8 +450,7 @@ void Eniro::loadResults(int id, QString const& httpData)
 
         if(httpData.indexOf(LOGIN_CHECK) != -1)
         {
-            error_ = INVALID_LOGIN;
-            errorString_ = INVALID_LOGIN_STRING;
+            setError(INVALID_LOGIN, INVALID_LOGIN_STRING),
             error = true;
         }
 
@@ -391,6 +458,102 @@ void Eniro::loadResults(int id, QString const& httpData)
     }
 }
 
+void Eniro::loadFinnishPersonResults(int id, QString const& httpData)
+{
+    searchMap::iterator it = pendingSearches_.find(id);
+
+    static QRegExp rx("<div id=\"hit_(.*)<p class=\"adLinks\">");
+    static QRegExp name("<a class=\"fn expand\" href=\"#\">(.*)</a>");
+    static QRegExp number("<!-- sphoneid(.*)-->(.*)<!--");
+    static QRegExp street("<span class=\"street-address\">(.*)</span>");
+    static QRegExp zipCode("<span class=\"postal-code\">(.*)</span>");
+    static QRegExp city("<span class=\"locality\">(.*)</span>");
+    rx.setMinimal(true);
+    name.setMinimal(true);
+    number.setMinimal(true);
+    street.setMinimal(true);
+    zipCode.setMinimal(true);
+    city.setMinimal(true);
+
+    int pos = 0;
+
+    unsigned int maxResults = getMaxResults();
+    unsigned int foundResults = 0;
+
+    while((pos = rx.indexIn(httpData, pos)) != -1)
+    {
+        pos += rx.matchedLength();
+
+        QString data = rx.cap(0);
+
+        Result result;
+
+        if(name.indexIn(data) != -1)
+        {
+            result.name = name.cap(1);
+        }
+        else
+        {
+            continue;
+        }
+
+        if(number.indexIn(data) != -1)
+        {
+            result.number = number.cap(2);
+        }
+
+        if(street.indexIn(data) != -1)
+        {
+            result.street = street.cap(1);
+        }
+
+        QString cityStr;
+
+        if(zipCode.indexIn(data) != -1)
+        {
+            cityStr = zipCode.cap(1) + " ";
+        }
+
+        if(city.indexIn(data) != -1)
+        {
+            cityStr += city.cap(1);
+        }
+
+        result.city = cityStr;
+
+        result.name = cleanUpString(result.name);
+        result.street = cleanUpString(result.street);
+        result.number = cleanUpNumber(result.number);
+        result.city = cleanUpString(result.city);
+        result.country = "Finland";
+
+        it.value()->results.push_back(result);
+        emit resultAvailable(result, it.value()->details);
+
+        foundResults++;
+
+        if(foundResults >= maxResults)
+        {
+            break;
+        }
+
+    }
+
+    emitRequestFinished(it.key(), it.value(), false);
+
+}
+
+QString& Eniro::cleanUpString(QString& str)
+{
+    str = htmlEntityDecode(str);
+    str = str.toLower();
+    str = str.trimmed();
+    static QRegExp cleaner("(\r\n|\n|\t| )+");
+    str = str.replace(cleaner, " ");
+    str = ucFirst(str);
+    return str;
+}
+
 // Loads phone number from html source
 void Eniro::loadNumber(int id, QString const& result)
 {
@@ -424,7 +587,7 @@ void Eniro::loadNumber(int id, QString const& result)
 
         if(!trimmed.isEmpty())
         {
-               // Remove whitespaces from number
+            // Remove whitespaces from number
             searchIt.value()->results[numberIt.value()->index].number = cleanUpNumber(trimmed);
 
             emit resultAvailable(searchIt.value()->results[numberIt.value()->index], searchIt.value()->details);
@@ -434,7 +597,7 @@ void Eniro::loadNumber(int id, QString const& result)
             // Check if all numbers have been found
             if(found >= searchIt.value()->numbersTotal)
             {
-               emitRequestFinished(searchIt.key(), searchIt.value(), false);
+                emitRequestFinished(searchIt.key(), searchIt.value(), false);
             }
 
             // If number was found, there was no error
@@ -444,8 +607,7 @@ void Eniro::loadNumber(int id, QString const& result)
 
     if(error)
     {
-        error_ = INVALID_LOGIN;
-        errorString_ = INVALID_LOGIN;
+        setError(INVALID_LOGIN, INVALID_LOGIN_STRING);
         emitRequestFinished(searchIt.key(), searchIt.value(), true);
     }
 
@@ -472,18 +634,19 @@ QUrl Eniro::createUrl(QString const& query, QString const& location)
         url.addQueryItem("geo_area", location);
     }
 
-    if(maxResults_)
+    unsigned int maxResults = getMaxResults();
+
+    if(maxResults)
     {
-        url.addQueryItem("hpp", QString::number(maxResults_));
+        url.addQueryItem("hpp", QString::number(maxResults));
     }
-    if(loggedIn_)
+    if(loggedIn_ && site_ == FI)
     {
         url.addQueryItem("login_name", username_);
         url.addQueryItem("login_password", password_);
     }
 
-    QByteArray path = url.encodedQuery().replace('+', "%2B");
-    url.setEncodedQuery(path);
+    fixUrl(url);
 
     return url;
 }
@@ -506,87 +669,55 @@ void Eniro::getNumberForResult(int id, int index, SearchDetails const& details)
 
 void Eniro::emitRequestFinished(int key, SearchData* data, bool error)
 {
-
-       // Do not emit "Request aborted" error
-       if(!(error && (http_.error() == QHttp::Aborted)))
-       {
-               emit requestFinished(data->results, data->details, error);
-       }
-
+    emit requestFinished(data->results, data->details, error);
     delete pendingSearches_[key];
     pendingSearches_[key] = 0;
     pendingSearches_.remove(key);
 }
 
-QString Eniro::ucFirst(QString& str)
-{
-       if (str.size() < 1) {
-               return "";
-       }
-
-       QStringList tokens = str.split(" ");
-       QList<QString>::iterator tokItr;
-
-       for (tokItr = tokens.begin(); tokItr != tokens.end(); ++tokItr)
-       {
-               (*tokItr) = (*tokItr).at(0).toUpper() + (*tokItr).mid(1);
-       }
-
-       return tokens.join(" ");
-}
-
-QString& Eniro::cleanUpNumber(QString& number)
-{
-    return number.replace(numberCleaner_, "");
-}
-
-QString& Eniro::stripTags(QString& string)
-{
-    return string.replace(tagStripper_, "");
-}
 
 QMap <Eniro::Site, Eniro::SiteDetails> Eniro::getSites()
 {
-       QMap <Site, SiteDetails> sites;
-       SiteDetails details;
-       details.name = tr("Finnish");
-       details.id = "fi";
-       sites[FI] = details;
+    QMap <Site, SiteDetails> sites;
 
-       details.name = tr("Swedish");
-       details.id = "se";
-       sites[SE] = details;
-
-       details.name = tr("Danish");
-       details.id = "dk";
-       sites[DK] = details;
+    for(int i = 0; i < SITE_COUNT; i++)
+    {
+        SiteDetails details;
+        details.name = SITE_NAMES[i];
+        details.id = SITE_IDS[i];
+        sites[static_cast<Site>(i)] = details;
+    }
 
-       return sites;
+    return sites;
 }
 
 Eniro::Site Eniro::stringToSite(QString const& str)
 {
-       Site site = FI;
-
-       QString lower = str.toLower();
+    Site site = FI;
+    QString lower = str.toLower();
 
-       if(lower == "se" || lower == "swedish")
-       {
-               site = SE;
-       }
-       else if(lower == "dk" || lower == "danish")
-       {
-               site = DK;
-       }
+    for(int i = 0; i < SITE_COUNT; i++)
+    {
+        if(lower == SITE_NAMES[i] || lower == SITE_IDS[i])
+        {
+            site = static_cast <Site> (i);
+            break;
+        }
+    }
 
-       return site;
+    return site;
 }
 
-Eniro::SearchDetails::SearchDetails(QString const& q,
-                                    QString const& loc,
-                                    SearchType t)
+bool Eniro::isStreet(QString const& str)
 {
-    query = q;
-    location = loc;
-    type = t;
+    static QRegExp number("([0-9]+)");
+    int a = number.indexIn(str);
+    int b = str.indexOf(" ");
+
+    if((a == -1 && b == -1) || (a != -1 && b != -1))
+    {
+        return true;
+    }
+
+    return false;
 }