/*
* Copyright (C) 2011, Jamie Thompson
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; If not, see
* .
*/
#include "CSV.h"
#include
#include
#include
#include
#include
class SortByValueDesc
{
public:
inline bool operator()(const QPair &a, const QPair &b) const
{
return b.second < a.second;
}
};
CSV::CSV()
: m_IsValid(false), m_File(NULL), m_Stream(NULL), m_LineNumber(0), m_RecordNumber(0)
{
}
CSV::CSV(QChar delimiter, int numColumnsPerRecord, const ColumnIndicesHash &headingIndices)
: m_IsValid(false), m_File(NULL), m_Stream(NULL), m_LineNumber(0), m_RecordNumber(0)
{
Delimiter(delimiter);
NumColumnsPerRecord(numColumnsPerRecord);
UpdateHeadings(headingIndices);
IsValid(true);
}
CSV::~CSV()
{
}
void CSV::Open(QFile &file)
{
// Ready the file...
LineNumber(0);
RecordNumber(0);
File(&file);
File()->seek(0);
// Read the first line
Stream(new QTextStream(&file));
// Set up the properties...
if(!IsValid())
{
QString firstLineContent(Stream()->readLine());
DetermineDelimiter(firstLineContent);
GetHeadings(firstLineContent);
}
// We accept we've already done the hard work, so advance to the first
// actual record (i.e. the 2nd row)
else
ReadRecord();
}
void CSV::Open(QFile &file, QChar delimiter, int numColumnsPerRecord, const ColumnIndicesHash &headingIndices)
{
// Set up the properties...
Delimiter(delimiter);
NumColumnsPerRecord(numColumnsPerRecord);
UpdateHeadings(headingIndices);
IsValid(true);
// Ready the file...
File(&file);
File()->seek(0);
// Advance to the first actual record (i.e. the 2nd row)
ReadRecord();
}
void CSV::Close()
{
IsValid(false);
File(NULL);
}
bool CSV::AtEnd() const
{
return Stream()->atEnd();
}
QHash CSV::ReadRecord()
{
// If we have something more to read...
if(LineValues().count() < NumColumnsPerRecord() && !Stream()->atEnd())
{
// ...read a line's worth but make sure we have enough columns (i.e. handle newlines in values)
while(LineValues().count() < NumColumnsPerRecord())
{
QStringList nextValues(QString(Stream()->readLine()).split(Delimiter()));
if(LineValues().count() > 0)
{
// Merge the first value of the next line with the last of the previous...
LineValues().last().append('\n');
nextValues.removeAt(0);
}
LineValues().append(nextValues);
++LineNumber();
}
}
// The extract enough values to complete a record
QHash recordValues;
for(int i(NumColumnsPerRecord() - 1); i >= 0 && LineValues().count() >= 0; --i)
{
recordValues.insert(HeadingNames().value(i), LineValues().value(i));
LineValues().removeAt(i);
}
return recordValues;
}
void CSV::GetHeadings(const QString &firstLineContent)
{
QStringList headingsRaw(QString(firstLineContent).split(Delimiter(), QString::KeepEmptyParts, Qt::CaseSensitive));
// We have this many fields per record
NumColumnsPerRecord(headingsRaw.count());
// Grab each column heading, and tidy it up.
ColumnIndicesHash indices;
indices.reserve(headingsRaw.count());
for(QStringList::size_type i(0); i < headingsRaw.count(); ++i)
{
QString heading(ExtractString(headingsRaw.value(i)));
qDebug() << headingsRaw.value(i) << " : " << heading;
indices[heading] = i;
}
UpdateHeadings(indices);
}
const QStringList CSV::HasRequiredHeadings(const QStringList &requiredHeadings)
{
QStringList missingRequiredHeadings(requiredHeadings);
// Check over the required headings
foreach(const QString requiredHeading, requiredHeadings)
{
if(HeadingIndices().contains(requiredHeading.toLower()))
missingRequiredHeadings.removeOne(requiredHeading);
}
return missingRequiredHeadings;
}
void CSV::DetermineDelimiter(const QString &firstLineContent)
{
// Count the non-alphanumeric characters used
QHash counts;
foreach(const QChar c, firstLineContent)
++counts[c];
QList > orderedCounts;
orderedCounts.reserve(counts.size());
foreach(const QChar c, counts.keys())
if(!QChar(c).isLetterOrNumber())
orderedCounts.append(QPair(c, counts.value(c)));
qSort(orderedCounts.begin(), orderedCounts.end(), SortByValueDesc());
// Work around Q_FOREACH macro limitation when dealing with
// multi-typed templates (comma issue)
typedef QPair bodge;
foreach(bodge count, orderedCounts)
qDebug() << count.first << " = " << count.second;
// No-one would be mad enough to use quotation marks or apostrophes
// as their delimiter,but just in case, check the second most
// frequent character is present the right number of times for
// the quotation marks to be present on every column heading (two
// per heading, less one as they're seperators)
if((orderedCounts.value(0).first == '"' || orderedCounts.value(0).first == '\'')
&& ((orderedCounts.value(0).second / 2) - 1 == orderedCounts.value(1).second ))
{
// We're good.
Delimiter(orderedCounts.value(1).first);
}
else
Delimiter(orderedCounts.value(0).first);
}
const QString CSV::ExtractString(const QString &originalString)
{
QRegExp content("^[\"\']?(.*)?[\"\']?$");
content.indexIn(originalString.trimmed());
return content.cap(1);
}
void CSV::UpdateHeadings(const ColumnIndicesHash &headingIndices)
{
HeadingIndices().clear();
HeadingIndices().reserve(headingIndices.count());
foreach(QString columnName, headingIndices.keys())
HeadingIndices().insert(columnName.toLower(), headingIndices.value(columnName));
// ..and prepare the bidirectional hash (toLower not needed as above
// value reused)
HeadingNames().clear();
HeadingNames().reserve(headingIndices.count());
foreach(QString columnName, HeadingIndices().keys())
HeadingNames().insert(HeadingIndices().value(columnName), columnName);
}