Introduction
Parsing CSV files may sound like an easy task, but in reality it is not that trivial. Below is a CsvParser
class implementation that I use in my own projects. It supports the following features that I find critical:
- Custom Delimiter and Qualifier characters
- Supports quoting notation (allows delimiter character to be part of a value)
- Supports quote escaping (allows quote character to be part of a value)
- Supports both '
\n
' and '\r\n
' line endings - Designed to return
IEnumerable
via yield return
(no memory buffers) - Designed to return Header and the rest of lines separately (using
Tuple
)
Source Code
public static class CsvParser
{
private static Tuple<T, IEnumerable<T>> HeadAndTail<T>(this IEnumerable<T> source)
{
if (source == null)
throw new ArgumentNullException("source");
var en = source.GetEnumerator();
en.MoveNext();
return Tuple.Create(en.Current, EnumerateTail(en));
}
private static IEnumerable<T> EnumerateTail<T>(IEnumerator<T> en)
{
while (en.MoveNext()) yield return en.Current;
}
public static IEnumerable<IList<string>>
Parse(string content, char delimiter, char qualifier)
{
using (var reader = new StringReader(content))
return Parse(reader, delimiter, qualifier);
}
public static Tuple<IList<string>, IEnumerable<IList<string>>>
ParseHeadAndTail(TextReader reader, char delimiter, char qualifier)
{
return HeadAndTail(Parse(reader, delimiter, qualifier));
}
public static IEnumerable<IList<string>>
Parse(TextReader reader, char delimiter, char qualifier)
{
var inQuote = false;
var record = new List<string>();
var sb = new StringBuilder();
while (reader.Peek() != -1)
{
var readChar = (char) reader.Read();
if (readChar == '\n' || (readChar == '\r' && (char) reader.Peek() == '\n'))
{
if (readChar == '\r')
reader.Read();
if (inQuote)
{
if (readChar == '\r')
sb.Append('\r');
sb.Append('\n');
}
else
{
if (record.Count > 0 || sb.Length > 0)
{
record.Add(sb.ToString());
sb.Clear();
}
if (record.Count > 0)
yield return record;
record = new List<string>(record.Count);
}
}
else if (sb.Length == 0 && !inQuote)
{
if (readChar == qualifier)
inQuote = true;
else if (readChar == delimiter)
{
record.Add(sb.ToString());
sb.Clear();
}
else if (char.IsWhiteSpace(readChar))
{
}
else
sb.Append(readChar);
}
else if (readChar == delimiter)
{
if (inQuote)
sb.Append(delimiter);
else
{
record.Add(sb.ToString());
sb.Clear();
}
}
else if (readChar == qualifier)
{
if (inQuote)
{
if ((char) reader.Peek() == qualifier)
{
reader.Read();
sb.Append(qualifier);
}
else
inQuote = false;
}
else
sb.Append(readChar);
}
else
sb.Append(readChar);
}
if (record.Count > 0 || sb.Length > 0)
record.Add(sb.ToString());
if (record.Count > 0)
yield return record;
}
}
Using the Code
Here is an example of reading CSV file. The following code snippet parses out the first 5 records and prints them out to the Console
in form of key/value pairs:
const string fileName = @"C:\Temp\file.csv";
using (var stream = File.OpenRead(fileName))
using (var reader = new StreamReader(stream))
{
var data = CsvParser.ParseHeadAndTail(reader, ',', '"');
var header = data.Item1;
var lines = data.Item2;
foreach (var line in lines.Take(5))
{
for (var i = 0; i < header.Count; i++)
if (!string.IsNullOrEmpty(line[i]))
Console.WriteLine("{0}={1}", header[i], line[i]);
Console.WriteLine();
}
}
Console.ReadLine();
History
- 27th September, 2014: Initial version