This is the first part of a two part series about how to perform bulk inserts in your .NET applications, using a SQL Server database.
In this article, I’ll show how to create a wrapper object for SqlBulkCopy
that can do a bulk insert for a collection of objects. In the second article, I’ll show how that wrapper can be easily integrated with Entity Framework (creating extension methods for DbContext
and ObjectContext
).
Table of Contents
The Problem
I am working on an application that parses Excel files and creates an object for each line. After some validations and properties mapping, the objects are then inserted in a database table, one by one. At the beginning, this wasn’t a big problem because the number of objects to insert in the database was small, but now there can be files with thousands of rows, so using Entity Framework isn’t the best way to do it (currently Entity Framework has no support for bulk insert operations).
The Solution
I decided to use the object SqlBulkCopy because it seemed the best and easiest option for performing a bulk insert in a SQL Server database. I created a wrapper for SqlBulkCopy
– BulkCopy.cs. This class works the same way as SqlBulkCopy
but has some extra features.
The following properties are available:
DestinationTableName
: Name of the destination table on the server BatchSize
(optional): Number of rows in each batch. At the end of each batch, the rows in the batch are sent to the server ConnectionString
: Database connection string ExpressionFilter
: Filters the properties to be included
And these are the methods available (see examples below):
public void WriteToServer<T>(IEnumerable<T> items) where T : class;
public void WriteToServer<T>(IEnumerable<T> items, SqlBulkCopyOptions options) where T : class;
public void WriteToServer<T>(IEnumerable<T> items,
SqlBulkCopyOptions options, IEnumerable<SqlBulkCopyColumnMapping> columnMappings) where T : class;
BulkCopy.cs source code:
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data.SqlClient;
using System.Data;
using System.Linq;
using System.Linq.Expressions;
using System.Text;
using BulkCopy.Extensions;
namespace BulkCopy
{
public class BulkCopy
{
public string DestinationTableName { get; set; }
public int? BatchSize { get; set; }
public string ConnectionString { get; set; }
public Func<PropertyDescriptor, bool> ExpressionFilter { get; set; }
public BulkCopy()
{
}
public BulkCopy(string connectionString)
{
this.ConnectionString = connectionString;
}
public virtual void WriteToServer<T>(IEnumerable<T> items) where T : class
{
WriteToServer(items, SqlBulkCopyOptions.Default);
}
public virtual void WriteToServer<T>
(IEnumerable<T> items, SqlBulkCopyOptions options) where T : class
{
DataTable dataTable = (this.ExpressionFilter == null) ?
items.ToDataTable() :
items.ToDataTable(this.ExpressionFilter);
WriteToServer(dataTable, options);
}
public virtual void WriteToServer<T>(IEnumerable<T> items,
SqlBulkCopyOptions options,
IEnumerable<SqlBulkCopyColumnMapping> columnMappings)
where T : class
{
DataTable dataTable = (this.ExpressionFilter == null) ?
items.ToDataTable() :
items.ToDataTable(this.ExpressionFilter);
WriteToServer(dataTable, options, columnMappings);
}
private void WriteToServer(DataTable dataTable)
{
WriteToServer(dataTable, SqlBulkCopyOptions.Default);
}
private void WriteToServer(DataTable dataTable, SqlBulkCopyOptions options)
{
var columnMappings = from x in dataTable.Columns.Cast<DataColumn>()
select new SqlBulkCopyColumnMapping(x.ColumnName, x.ColumnName);
WriteToServer(dataTable, options, columnMappings);
}
private void WriteToServer(DataTable dataTable,
SqlBulkCopyOptions options,
IEnumerable<SqlBulkCopyColumnMapping> columnMappings)
{
string destinationTableName =
(string.IsNullOrWhiteSpace(DestinationTableName) ? null : DestinationTableName)
?? (string.IsNullOrWhiteSpace(dataTable.TableName) ? null : dataTable.TableName);
if(string.IsNullOrWhiteSpace(destinationTableName))
throw new ArgumentException("destinationTableName cannot be null or empty");
using(var bulkCopy = new SqlBulkCopy(this.ConnectionString, options))
{
bulkCopy.DestinationTableName = destinationTableName;
if(this.BatchSize.HasValue)
bulkCopy.BatchSize = this.BatchSize.Value;
foreach(var mapping in columnMappings)
bulkCopy.ColumnMappings.Add(mapping);
bulkCopy.WriteToServer(dataTable);
}
}
}
}
The BulkCopy
object is internally using some extension methods that convert a collection of objects to a DataTable
(taken from SO’s post Generic List to DataTable, with some small modifications).
Click to expand the source code:
using System;
using System.Collections.Generic;
using System.Collections.Specialized;
using System.Linq;
using System.Linq.Expressions;
using System.Text;
using System.Data;
using System.ComponentModel;
namespace BulkCopy.Extensions
{
public static class DataExtensions
{
private static Type[] dataTypes = new[] {
typeof(byte)
,typeof(sbyte)
,typeof(short)
,typeof(ushort)
,typeof(int)
,typeof(uint)
,typeof(long)
,typeof(ulong)
,typeof(float)
,typeof(double)
,typeof(decimal)
,typeof(bool)
,typeof(char)
,typeof(Guid)
,typeof(DateTime)
,typeof(DateTimeOffset)
,typeof(byte[])
,typeof(string)
};
public static DataTable ToDataTable<T>(this IList<T> data)
{
IEnumerable<PropertyDescriptor> properties =
from x in TypeDescriptor.GetProperties(typeof(T))
.Cast<PropertyDescriptor>()
where IsBasicType(x.PropertyType)
select x;
DataTable table = GetDataTable(data, properties);
return table;
}
public static DataTable ToDataTable<T>(this IList<T> data,
Func<PropertyDescriptor, bool> expression)
{
var properties = TypeDescriptor.GetProperties(typeof(T))
.Cast<PropertyDescriptor>()
.Where(expression);
DataTable table = GetDataTable(data, properties);
return table;
}
public static DataTable ToDataTable<T>(this IEnumerable<T> data)
{
return data.ToList().ToDataTable();
}
public static DataTable ToDataTable<T>(this IEnumerable<T> data,
Func<PropertyDescriptor, bool> expression)
{
return data.ToList().ToDataTable(expression);
}
#region Private methods
private static bool IsBasicType(Type type)
{
type = Nullable.GetUnderlyingType(type) ?? type;
return type.IsEnum || dataTypes.Contains(type);
}
private static DataTable GetDataTable<T>(this IList<T> data,
IEnumerable<PropertyDescriptor> mappedProperties)
{
DataTable table = new DataTable();
foreach(PropertyDescriptor prop in mappedProperties)
{
table.Columns.Add(prop.Name,
Nullable.GetUnderlyingType(prop.PropertyType) ??
prop.PropertyType);
}
foreach(T item in data)
{
DataRow row = table.NewRow();
foreach(PropertyDescriptor prop in mappedProperties)
{
object value = prop.GetValue(item) ?? DBNull.Value;
row[prop.Name] = value;
}
table.Rows.Add(row);
}
return table;
}
#endregion
}
}
Using the Code
Assuming a table named dbo.Albums
(I’m using Mvc Music Store model):
1. Basic Usage
This example shows how to use a model object whose property names match the database column names from the table above:
public class Album
{
public virtual int AlbumId { get; set; }
public virtual int GenreId { get; set; }
public virtual int ArtistId { get; set; }
public virtual string Title { get; set; }
public virtual decimal Price { get; set; }
public virtual string AlbumArtUrl { get; set; }
}
All you need to do is to specify the connection string and the destination table:
IEnumerable<Album> data = GetData();
var bulkCopy = new BulkCopy() {
ConnectionString = ConnectionString,
DestinationTableName = "dbo.Albums"
};
bulkCopy.WriteToServer(data);
2. Specifying Batch Size and Bulk Options
Using the model from the previous example, you can specify some other options:
IEnumerable<Album> data = GetData();
var bulkCopy = new BulkCopy() {
BatchSize = 200,
ConnectionString = ConnectionString,
DestinationTableName = "dbo.Albums"
};
bulkCopy.WriteToServer(data, SqlBulkCopyOptions.CheckConstraints);
3. Using Column Mappings
This example shows how to use a model object whose property names don’t match the database column names:
public class Album
{
public virtual int Id { get; set; }
public virtual int GenreId { get; set; }
public virtual int ArtistId { get; set; }
public virtual string AlbumTitle { get; set; }
public virtual decimal Price { get; set; }
public virtual string AlbumArtImage { get; set; }
}
In this case, we have to create the column mappings. It can be done like this:
private static IEnumerable<SqlBulkCopyColumnMapping> GetColumnMappings()
{
yield return new SqlBulkCopyColumnMapping("Id", "AlbumId");
yield return new SqlBulkCopyColumnMapping("GenreId", "GenreId");
yield return new SqlBulkCopyColumnMapping("ArtistId", "ArtistId");
yield return new SqlBulkCopyColumnMapping("AlbumTitle", "Title");
yield return new SqlBulkCopyColumnMapping("Price", "Price");
yield return new SqlBulkCopyColumnMapping("AlbumArtImage", "AlbumArtUrl");
}
Finally, the bulk insert can be done this way:
IEnumerable<Album> data = GetData();
IEnumerable<SqlBulkCopyColumnMapping> mappings = GetColumnMappings();
var bulkCopy = new BulkCopy() {
ConnectionString = ConnectionString,
DestinationTableName = "dbo.Albums"
};
bulkCopy.WriteToServer(data,SqlBulkCopyOptions.Default, mappings);
4. Filtering Properties to be Copied
This example shows how to filter the properties of a model object to be used in the bulk insert:
public partial class Album
{
public virtual int AlbumId { get; set; }
public virtual int GenreId { get; set; }
public virtual int ArtistId { get; set; }
public virtual string Title { get; set; }
public virtual decimal Price { get; set; }
public virtual string AlbumArtUrl { get; set; }
public virtual string P1 { get; set; }
public virtual string P2 { get; set; }
}
Properties P1
and P2
don’t match any column of the table above, so they cannot be used in the bulk insert operation. Creating a filter to exclude those properties and using it can be done like this:
var nonMappedProperties = new string[] { "P1", "P2" };
Func<PropertyDescriptor, bool> expression = x =>
!nonMappedProperties.Contains(x.Name);
IEnumerable<Album> data = GetData();
var bulkCopy = new BulkCopy() {
BatchSize = 200,
ConnectionString = ConnectionString,
DestinationTableName = "dbo.Albums",
ExpressionFilter = expression
};
bulkCopy.WriteToServer(data, SqlBulkCopyOptions.CheckConstraints);
That’s it! In the next article, I’ll show you how to easily integrate this wrapper class in Entity Framework (creating extension methods for DbContext
and ObjectContext
).
References