+/*
+ * Copyright (C) 2011, Jamie Thompson
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#include "CSV.h"
+
+#include <QDebug>
+
+#include <QFile>
+#include <QHash>
+#include <QString>
+#include <QTextStream>
+
+class SortByValueDesc
+{
+public:
+ inline bool operator()(const QPair<QChar, uint> &a, const QPair<QChar, uint> &b) const
+ {
+ return b.second < a.second;
+ }
+};
+
+CSV::CSV()
+ : m_IsValid(false), m_File(NULL), m_Stream(NULL), m_LineNumber(0), m_RecordNumber(0)
+{
+}
+
+CSV::CSV(QChar delimiter, int numColumnsPerRecord, const ColumnIndicesHash &headingIndices)
+ : m_IsValid(false), m_File(NULL), m_Stream(NULL), m_LineNumber(0), m_RecordNumber(0)
+{
+ Delimiter(delimiter);
+ NumColumnsPerRecord(numColumnsPerRecord);
+
+ UpdateHeadings(headingIndices);
+
+ IsValid(true);
+}
+
+CSV::~CSV()
+{
+}
+
+void CSV::Open(QFile &file)
+{
+ // Ready the file...
+ LineNumber(0);
+ RecordNumber(0);
+ File(&file);
+ File()->seek(0);
+
+ // Read the first line
+ Stream(new QTextStream(&file));
+
+ // Set up the properties...
+ if(!IsValid())
+ {
+ QString firstLineContent(Stream()->readLine());
+ DetermineDelimiter(firstLineContent);
+ GetHeadings(firstLineContent);
+ }
+ // We accept we've already done the hard work, so advance to the first
+ // actual record (i.e. the 2nd row)
+ else
+ ReadRecord();
+}
+
+void CSV::Open(QFile &file, QChar delimiter, int numColumnsPerRecord, const ColumnIndicesHash &headingIndices)
+{
+ // Set up the properties...
+ Delimiter(delimiter);
+ NumColumnsPerRecord(numColumnsPerRecord);
+ UpdateHeadings(headingIndices);
+ IsValid(true);
+
+ // Ready the file...
+ File(&file);
+ File()->seek(0);
+
+ // Advance to the first actual record (i.e. the 2nd row)
+ ReadRecord();
+}
+
+void CSV::Close()
+{
+ IsValid(false);
+ File(NULL);
+}
+
+bool CSV::AtEnd() const
+{
+ return Stream()->atEnd();
+}
+
+QHash<QString, QString> CSV::ReadRecord()
+{
+ // If we have something more to read...
+ if(LineValues().count() < NumColumnsPerRecord() && !Stream()->atEnd())
+ {
+ // ...read a line's worth but make sure we have enough columns (i.e. handle newlines in values)
+ while(LineValues().count() < NumColumnsPerRecord())
+ {
+ QStringList nextValues(QString(Stream()->readLine()).split(Delimiter()));
+ if(LineValues().count() > 0)
+ {
+ // Merge the first value of the next line with the last of the previous...
+ LineValues().last().append('\n');
+ nextValues.removeAt(0);
+ }
+ LineValues().append(nextValues);
+ ++LineNumber();
+ }
+ }
+
+ // The extract enough values to complete a record
+ QHash<QString, QString> recordValues;
+ for(int i(NumColumnsPerRecord() - 1); i >= 0 && LineValues().count() >= 0; --i)
+ {
+ recordValues.insert(HeadingNames().value(i), LineValues().value(i));
+ LineValues().removeAt(i);
+ }
+ return recordValues;
+}
+
+void CSV::GetHeadings(const QString &firstLineContent)
+{
+ QStringList headingsRaw(QString(firstLineContent).split(Delimiter(), QString::KeepEmptyParts, Qt::CaseSensitive));
+
+ // We have this many fields per record
+ NumColumnsPerRecord(headingsRaw.count());
+
+ // Grab each column heading, and tidy it up.
+ ColumnIndicesHash indices;
+ indices.reserve(headingsRaw.count());
+ for(QStringList::size_type i(0); i < headingsRaw.count(); ++i)
+ {
+ QString heading(ExtractString(headingsRaw.value(i)));
+ qDebug() << headingsRaw.value(i) << " : " << heading;
+
+ indices[heading] = i;
+ }
+
+ UpdateHeadings(indices);
+}
+
+const QStringList CSV::HasRequiredHeadings(const QStringList &requiredHeadings)
+{
+ QStringList missingRequiredHeadings(requiredHeadings);
+
+ // Check over the required headings
+ foreach(const QString requiredHeading, requiredHeadings)
+ {
+ if(HeadingIndices().contains(requiredHeading.toLower()))
+ missingRequiredHeadings.removeOne(requiredHeading);
+ }
+
+ return missingRequiredHeadings;
+}
+
+void CSV::DetermineDelimiter(const QString &firstLineContent)
+{
+ // Count the non-alphanumeric characters used
+ QHash<QChar, uint> counts;
+ foreach(const QChar c, firstLineContent)
+ ++counts[c];
+
+ QList<QPair<QChar, uint> > orderedCounts;
+ orderedCounts.reserve(counts.size());
+ foreach(const QChar c, counts.keys())
+ if(!QChar(c).isLetterOrNumber())
+ orderedCounts.append(QPair<QChar, uint>(c, counts.value(c)));
+
+ qSort(orderedCounts.begin(), orderedCounts.end(), SortByValueDesc());
+
+ // Work around Q_FOREACH macro limitation when dealing with
+ // multi-typed templates (comma issue)
+ typedef QPair<QChar, uint> bodge;
+ foreach(bodge count, orderedCounts)
+ qDebug() << count.first << " = " << count.second;
+
+ // No-one would be mad enough to use quotation marks or apostrophes
+ // as their delimiter,but just in case, check the second most
+ // frequent character is present the right number of times for
+ // the quotation marks to be present on every column heading (two
+ // per heading, less one as they're seperators)
+ if((orderedCounts.value(0).first == '"' || orderedCounts.value(0).first == '\'')
+ && ((orderedCounts.value(0).second / 2) - 1 == orderedCounts.value(1).second ))
+ {
+ // We're good.
+ Delimiter(orderedCounts.value(1).first);
+ }
+ else
+ Delimiter(orderedCounts.value(0).first);
+}
+
+const QString CSV::ExtractString(const QString &originalString)
+{
+ QRegExp content("^[\"\']?(.*)?[\"\']?$");
+ content.indexIn(originalString.trimmed());
+ return content.cap(1);
+}
+
+void CSV::UpdateHeadings(const ColumnIndicesHash &headingIndices)
+{
+ HeadingIndices().clear();
+ HeadingIndices().reserve(headingIndices.count());
+ foreach(QString columnName, headingIndices.keys())
+ HeadingIndices().insert(columnName.toLower(), headingIndices.value(columnName));
+
+ // ..and prepare the bidirectional hash (toLower not needed as above
+ // value reused)
+ HeadingNames().clear();
+ HeadingNames().reserve(headingIndices.count());
+ foreach(QString columnName, HeadingIndices().keys())
+ HeadingNames().insert(HeadingIndices().value(columnName), columnName);
+}