The article describes performance profiles of JSON and CSV serialization of classes.
Introduction
This article includes source code using JSON and CSV serialization, weighing the merits of each.
Using the Code
The code consists of two classes and a test runner programmer.
For the primary class, MyFileInfo
, I chose file system information as it's easy to come by and of decent size. You see that I've burned the CSV into this class for simplicity. The "pack" class at the end allows for larger thing to serialize, lists of file info.
using System;
using System.Collections.Generic;
using System.IO;
namespace JsonVsCsv
{
public class MyFileInfo
{
public static MyFileInfo FromPath(string filePath)
{
MyFileInfo myInfo = new MyFileInfo();
myInfo.DirectoryName = Path.GetDirectoryName(filePath);
myInfo.Filename = Path.GetFileNameWithoutExtension(filePath);
myInfo.Ext = Path.GetExtension(filePath);
var info = new FileInfo(filePath);
myInfo.Length = info.Length;
myInfo.LastModified = info.LastWriteTimeUtc;
return myInfo;
}
public static MyFileInfo FromCsv(string csv)
{
MyFileInfo myInfo = new MyFileInfo();
string[] parts = csv.Split(',');
myInfo.DirectoryName = parts[0];
myInfo.Filename = parts[1];
myInfo.Ext = parts[2];
myInfo.Length = long.Parse(parts[3]);
myInfo.LastModified = DateTime.Parse(parts[4]);
return myInfo;
}
public override string ToString()
{
return $"{CleanseCsv(DirectoryName)},{CleanseCsv(Filename)},
{CleanseCsv(Ext)},{Length},{CleanseCsv(LastModified.ToString("u"))}";
}
private static string CleanseCsv(string str)
{
return str.Replace(',', '-');
}
public string DirectoryName { get; set; }
public string Filename { get; set; }
public string Ext { get; set; }
public long Length { get; set; }
public DateTime LastModified { get; set; }
}
public class MyFileInfoPack
{
public List<MyFileInfo> FileInfos { get; set; }
}
}
The test runner loads the file system information, then serializes and deserializes the data a few times to work out the kinks:
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Diagnostics;
using Newtonsoft.Json;
namespace JsonVsCsv
{
class Program
{
static void Main(string[] args)
{
if (args.Length != 1)
{
Console.WriteLine("Usage: JsonVsCsv <dir path to process>");
return;
}
string dirPath = args[0];
if (!Directory.Exists(dirPath))
{
Console.WriteLine("ERROR: Directory does not exist: {0}", dirPath);
return;
}
Stopwatch sw = new Stopwatch();
Stopwatch swPrep = Stopwatch.StartNew();
Console.Write("Getting file paths...");
sw.Restart();
var filePaths = Directory.GetFiles(dirPath, "*", SearchOption.AllDirectories);
Console.WriteLine("{0} files in {1} ms",
filePaths.Length, sw.ElapsedMilliseconds);
Console.Write("Populating file info pack...");
sw.Restart();
var pack = new MyFileInfoPack();
pack.FileInfos = new List<MyFileInfo>(filePaths.Length);
foreach (string filePath in filePaths)
pack.FileInfos.Add(MyFileInfo.FromPath(filePath));
Console.WriteLine("done in {0} ms", sw.ElapsedMilliseconds);
swPrep.Stop();
List<long> jsonConvertTimes = new List<long>();
List<long> jsonReadTimes = new List<long>();
for (int j = 1; j <= 7; ++j)
{
Console.Write("Converting to JSON...");
sw.Restart();
StringBuilder sb = new StringBuilder();
using (TextWriter tw = new StringWriter(sb))
m_jsonSerializer.Serialize(tw, pack);
jsonConvertTimes.Add(sw.ElapsedMilliseconds);
Console.WriteLine("{0} KB in {1} ms",
sb.Length / 1024, sw.ElapsedMilliseconds);
Console.Write("Reading back from JSON...");
sw.Restart();
MyFileInfoPack readBack;
using (TextReader tr = new StringReader(sb.ToString()))
using (JsonReader jr = new JsonTextReader(tr))
readBack = m_jsonSerializer.Deserialize<MyFileInfoPack>(jr);
jsonReadTimes.Add(sw.ElapsedMilliseconds);
Console.WriteLine("{0} back in {1} ms",
readBack.FileInfos.Count, sw.ElapsedMilliseconds);
}
List<long> csvConvertTimes = new List<long>();
List<long> csvReadTimes = new List<long>();
for (int c = 1; c <= 7; ++c)
{
Console.Write("Converting to CSV...");
sw.Restart();
StringBuilder sb = new StringBuilder();
foreach (var info in pack.FileInfos)
sb.Append(info.ToString()).Append('\n');
csvConvertTimes.Add(sw.ElapsedMilliseconds);
Console.WriteLine("{0} KB in {1} ms",
sb.Length / 1024, sw.ElapsedMilliseconds);
Console.Write("Reading back from CSV...");
sw.Restart();
MyFileInfoPack readBack = new MyFileInfoPack();
readBack.FileInfos = new List<MyFileInfo>();
foreach (string line in sb.ToString().Split('\n'))
{
if (line.Length > 0)
readBack.FileInfos.Add(MyFileInfo.FromCsv(line));
}
csvReadTimes.Add(sw.ElapsedMilliseconds);
Console.WriteLine("{0} back in {1} ms",
readBack.FileInfos.Count, sw.ElapsedMilliseconds);
}
Console.WriteLine("Prep: {0}");
Console.WriteLine("JSON: Total: {0} - Read {1} - Write {2}",
GetMedian(jsonReadTimes) + GetMedian(jsonConvertTimes),
GetMedian(jsonReadTimes),
GetMedian(jsonConvertTimes));
Console.WriteLine("CSV: Total: {0} - Read {1} - Write {2}",
GetMedian(csvReadTimes) + GetMedian(csvConvertTimes),
GetMedian(csvReadTimes),
GetMedian(csvConvertTimes));
}
static long GetMedian(List<long> lst)
{
lst.Sort();
return lst[lst.Count / 2];
}
static JsonSerializer m_jsonSerializer = new JsonSerializer();
}
}
Points of Interest
Here are the results from some files on my laptop:
Converting to JSON...1877 KB in 25 ms
Reading back from JSON...9064 back in 32 ms
Converting to JSON...1877 KB in 26 ms
Reading back from JSON...9064 back in 33 ms
Converting to JSON...1877 KB in 19 ms
Reading back from JSON...9064 back in 25 ms
Converting to JSON...1877 KB in 23 ms
Reading back from JSON...9064 back in 21 ms
Converting to CSV...1189 KB in 12 ms
Reading back from CSV...9064 back in 17 ms
Converting to CSV...1189 KB in 11 ms
Reading back from CSV...9064 back in 18 ms
Converting to CSV...1189 KB in 12 ms
Reading back from CSV...9064 back in 12 ms
Converting to CSV...1189 KB in 15 ms
Reading back from CSV...9064 back in 12 ms
Converting to CSV...1189 KB in 11 ms
Reading back from CSV...9064 back in 21 ms
Converting to CSV...1189 KB in 10 ms
Reading back from CSV...9064 back in 20 ms
Converting to CSV...1189 KB in 8 ms
Reading back from CSV...9064 back in 17 ms
Prep: {0}
JSON: Total: 58 - Read 33 - Write 25
CSV: Total: 28 - Read 17 - Write 11
You can see at the bottom that the CSV performs about 2X faster than JSON with this workload. If you are willing to live with ordinal addressing, CSV would seem to be the way to go. Looking at the other way, for the low price of 2X performance, you get easier to maintain serialization, and compatibility with the rest of the world.
History
- 4th May, 2020: Initial version