2 * Copyright (C) 2011, Jamie Thompson
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License as published by the Free Software Foundation; either
7 * version 3 of the License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public
15 * License along with this program; If not, see
16 * <http://www.gnu.org/licenses/>.
26 #include <QTextStream>
31 inline bool operator()(const QPair<QChar, uint> &a, const QPair<QChar, uint> &b) const
33 return b.second < a.second;
38 : m_IsValid(false), m_File(NULL), m_Stream(NULL), m_LineNumber(0), m_RecordNumber(0)
42 CSV::CSV(QChar delimiter, int numColumnsPerRecord, const ColumnIndicesHash &headingIndices)
43 : m_IsValid(false), m_File(NULL), m_Stream(NULL), m_LineNumber(0), m_RecordNumber(0)
46 NumColumnsPerRecord(numColumnsPerRecord);
48 UpdateHeadings(headingIndices);
57 void CSV::Open(QFile &file)
65 // Read the first line
66 Stream(new QTextStream(&file));
68 // Set up the properties...
71 QString firstLineContent(Stream()->readLine());
72 DetermineDelimiter(firstLineContent);
73 GetHeadings(firstLineContent);
75 // We accept we've already done the hard work, so advance to the first
76 // actual record (i.e. the 2nd row)
81 void CSV::Open(QFile &file, QChar delimiter, int numColumnsPerRecord, const ColumnIndicesHash &headingIndices)
83 // Set up the properties...
85 NumColumnsPerRecord(numColumnsPerRecord);
86 UpdateHeadings(headingIndices);
93 // Advance to the first actual record (i.e. the 2nd row)
103 bool CSV::AtEnd() const
105 return Stream()->atEnd();
108 QHash<QString, QString> CSV::ReadRecord()
110 // If we have something more to read...
111 if(LineValues().count() < NumColumnsPerRecord() && !Stream()->atEnd())
113 // ...read a line's worth but make sure we have enough columns (i.e. handle newlines in values)
114 while(LineValues().count() < NumColumnsPerRecord())
116 QStringList nextValues(QString(Stream()->readLine()).split(Delimiter()));
117 if(LineValues().count() > 0)
119 // Merge the first value of the next line with the last of the previous...
120 LineValues().last().append('\n');
121 nextValues.removeAt(0);
123 LineValues().append(nextValues);
128 // The extract enough values to complete a record
129 QHash<QString, QString> recordValues;
130 for(int i(NumColumnsPerRecord() - 1); i >= 0 && LineValues().count() >= 0; --i)
132 recordValues.insert(HeadingNames().value(i), LineValues().value(i));
133 LineValues().removeAt(i);
138 void CSV::GetHeadings(const QString &firstLineContent)
140 QStringList headingsRaw(QString(firstLineContent).split(Delimiter(), QString::KeepEmptyParts, Qt::CaseSensitive));
142 // We have this many fields per record
143 NumColumnsPerRecord(headingsRaw.count());
145 // Grab each column heading, and tidy it up.
146 ColumnIndicesHash indices;
147 indices.reserve(headingsRaw.count());
148 for(QStringList::size_type i(0); i < headingsRaw.count(); ++i)
150 QString heading(ExtractString(headingsRaw.value(i)));
151 qDebug() << headingsRaw.value(i) << " : " << heading;
153 indices[heading] = i;
156 UpdateHeadings(indices);
159 const QStringList CSV::HasRequiredHeadings(const QStringList &requiredHeadings)
161 QStringList missingRequiredHeadings(requiredHeadings);
163 // Check over the required headings
164 foreach(const QString requiredHeading, requiredHeadings)
166 if(HeadingIndices().contains(requiredHeading.toLower()))
167 missingRequiredHeadings.removeOne(requiredHeading);
170 return missingRequiredHeadings;
173 void CSV::DetermineDelimiter(const QString &firstLineContent)
175 // Count the non-alphanumeric characters used
176 QHash<QChar, uint> counts;
177 foreach(const QChar c, firstLineContent)
180 QList<QPair<QChar, uint> > orderedCounts;
181 orderedCounts.reserve(counts.size());
182 foreach(const QChar c, counts.keys())
183 if(!QChar(c).isLetterOrNumber())
184 orderedCounts.append(QPair<QChar, uint>(c, counts.value(c)));
186 qSort(orderedCounts.begin(), orderedCounts.end(), SortByValueDesc());
188 // Work around Q_FOREACH macro limitation when dealing with
189 // multi-typed templates (comma issue)
190 typedef QPair<QChar, uint> bodge;
191 foreach(bodge count, orderedCounts)
192 qDebug() << count.first << " = " << count.second;
194 // No-one would be mad enough to use quotation marks or apostrophes
195 // as their delimiter,but just in case, check the second most
196 // frequent character is present the right number of times for
197 // the quotation marks to be present on every column heading (two
198 // per heading, less one as they're seperators)
199 if((orderedCounts.value(0).first == '"' || orderedCounts.value(0).first == '\'')
200 && ((orderedCounts.value(0).second / 2) - 1 == orderedCounts.value(1).second ))
203 Delimiter(orderedCounts.value(1).first);
206 Delimiter(orderedCounts.value(0).first);
209 const QString CSV::ExtractString(const QString &originalString)
211 QRegExp content("^[\"\']?(.*)?[\"\']?$");
212 content.indexIn(originalString.trimmed());
213 return content.cap(1);
216 void CSV::UpdateHeadings(const ColumnIndicesHash &headingIndices)
218 HeadingIndices().clear();
219 HeadingIndices().reserve(headingIndices.count());
220 foreach(QString columnName, headingIndices.keys())
221 HeadingIndices().insert(columnName.toLower(), headingIndices.value(columnName));
223 // ..and prepare the bidirectional hash (toLower not needed as above
225 HeadingNames().clear();
226 HeadingNames().reserve(headingIndices.count());
227 foreach(QString columnName, HeadingIndices().keys())
228 HeadingNames().insert(HeadingIndices().value(columnName), columnName);