Click here to Skip to main content
65,938 articles
CodeProject is changing. Read more.
Articles
(untagged)

Split DataTable into Multiple DataTables C#

0.00/5 (No votes)
19 May 2019 1  
The aim of this helper class is to divide a large datatable into multiple small datatables.

Background

A few days ago, I had to work with large data tables which may have more than 48000000 records. I had to split them into fixed size separate datatables and process them as needed. I found a number of ways to do split and today, I am going to share all possible ways considering efficiency.

DataTable Splitter

We are about to create a few splitter classes, that we are going to use in our helper class.

Base Splitter

using System;
using System.CodeDom;
using System.Collections.Generic;
using System.Data;

public abstract class DataTableSplitter
{
    protected DataTable SourceTable;
    protected int BatchSize;
    protected string TableNamePrefix;

    protected DataTableSplitter(DataTable table, int size, string namePrefix = "")
    {
        SourceTable = table;
        BatchSize = size;
        TableNamePrefix = namePrefix;
    }

    protected DataTable NewSplitTable(int tableCount)
    {
        DataTable dataTable = SourceTable.Clone();
        dataTable.TableName = !string.IsNullOrEmpty(TableNamePrefix)
            ? TableNamePrefix + "_" + tableCount
            : String.Empty;
        dataTable.Clear();
        return dataTable;
    }

    public abstract IEnumerable<DataTable> Split();
}

Regular Splitter (Without Making Any Change to the Source)

This splitter will create new datatables without introducing any change to source datatable, source datatable will be as it was.

public class DataTableRegularSplitter : DataTableSplitter
{
    public DataTableRegularSplitter
     (DataTable table, int size, string namePrefix = "") : base(table, size, namePrefix)
    {
    }

    public override IEnumerable<DataTable> Split()
    {
        int tableCount = 0;
        int rowCount = 0;
        DataTable dataTable = null;

        foreach (DataRow row in SourceTable.Rows)
        {
            /*create new table*/
            if (dataTable == null)
            {
                tableCount++;
                dataTable = NewSplitTable(tableCount);
            }

            /*row add to new table/remove from source*/
            DataRow newRow = dataTable.NewRow();
            newRow.ItemArray = row.ItemArray;
            dataTable.Rows.Add(newRow);
            rowCount++;

            /*return batch sized new table*/
            if (rowCount == BatchSize)
            {
                yield return dataTable;

                rowCount = 0;
                dataTable = null;
            }
        }

        /*return new table with remaining rows*/
        if (dataTable != null && dataTable.Rows.Count > 0)
        {
            yield return dataTable;
        }
    }
}

Remove Splitter (Removing Rows From Source Using Remove Functionality)

This splitter will create new datatables and will include changes to source datatable. At the end of the process, there will be no row at the source datatable. Remove() method basically removes the row immediately.

public class DataTableRemoveSplitter : DataTableSplitter
{
    public DataTableRemoveSplitter
     (DataTable table, int size, string namePrefix = "") : base(table, size, namePrefix)
    {
    }

    public override IEnumerable<DataTable> Split()
    {
        int tableCount = 0;
        int rowCount = 0;
        DataTable dataTable = null;

        while (SourceTable.Rows.Count > 0)
        {
            DataRow row = SourceTable.Rows[0];  /*first row*/

            /*create new table*/
            if (dataTable == null)
            {
                tableCount++;
                dataTable = NewSplitTable(tableCount);
            }

            /*row add to new table/remove from source*/
            DataRow newRow = dataTable.NewRow();
            newRow.ItemArray = row.ItemArray;
            dataTable.Rows.Add(newRow);
            SourceTable.Rows.Remove(row);
            rowCount++;

            /*return batch sized new table*/
            if (rowCount == BatchSize)
            {
                yield return dataTable;

                rowCount = 0;
                dataTable = null;
            }
        }

        /*return new table with remaining rows*/
        if (dataTable != null && dataTable.Rows.Count > 0)
        {
            yield return dataTable;
        }
    }
}

Remove Splitter (Removing Rows From Source Using Delete Functionality)

This splitter is going to do the same as above. By calling Delete() method, the row state becomes deleted and remains in the datatable until we call AcceptChanges().

public class DataTableDeleteSplitter : DataTableSplitter
{
    public DataTableDeleteSplitter(DataTable table, 
           int size, string namePrefix = "") : base(table, size, namePrefix)
    {
    }

    public override IEnumerable<DataTable> Split()
    {
        SourceTable.AcceptChanges();    /*important*/

        int tableCount = 0;
        int rowCount = 0;
        DataTable dataTable = null;

        for (int sourceTableIndex = 0; SourceTable.Rows.Count > 0; sourceTableIndex++)
        {
            DataRow row = SourceTable.Rows[sourceTableIndex];

            /*create new table*/
            if (dataTable == null)
            {
                tableCount++;
                dataTable = NewSplitTable(tableCount);
            }

            /*row add to new table/remove from source*/
            DataRow newRow = dataTable.NewRow();
            newRow.ItemArray = row.ItemArray;
            dataTable.Rows.Add(newRow);
            row.Delete();
            rowCount++;

            /*return batch sized new table*/
            if (rowCount == BatchSize)
            {
                SourceTable.AcceptChanges();
                yield return dataTable;

                rowCount = 0;
                dataTable = null;
                sourceTableIndex = -1;  /*loop need to start again from 0*/
            }
        }

        /*return new table with remaining rows*/
        if (dataTable != null && dataTable.Rows.Count > 0)
        {
            SourceTable.AcceptChanges();
            yield return dataTable;
        }
    }
}

Remove Vs Delete

  • Rows Remove() method removes dataRow immediately and won't be recovered.
  • Delete() method waits for Acceptchanges() call. Can be recovered by using Rejectchanges().
  • Remove() is faster than Delete().
  • Delete() is good with db attached datatables.

Split Helper Class

Here in this helper, we are going to use our slipper classes. We will also be able to include options on how to remove a row from the source datatable:

using System;
using System.Collections.Generic;
using System.Data;

public class DataTableHelper
{
    public enum RowRemoveType
    {
        Remove,
        Delete
    }

    public static IEnumerable<DataTable> 
           Split(DataTable sourceTable, int batchSize, string tableNamePrefix = "")
    {
        return new DataTableRegularSplitter(sourceTable, batchSize, tableNamePrefix).Split();
    }

    public static IEnumerable<DataTable> SplitAndRemoveFromSource
          (DataTable sourceTable, int batchSize, string tableNamePrefix = "", 
           RowRemoveType rowRemoveType = RowRemoveType.Remove)
    {
        switch (rowRemoveType)
        {
            case RowRemoveType.Remove:
                return new DataTableRemoveSplitter
                       (sourceTable, batchSize, tableNamePrefix).Split();
            case RowRemoveType.Delete:
                return new DataTableDeleteSplitter
                       (sourceTable, batchSize, tableNamePrefix).Split();
            default:
                throw new Exception("Unknown remove type");
        }
    }
}

Using Split Helper

Regular Split (Without Making Any Change to the Source)

/*no change in source table*/
DataTable table = Data.Table(3);
List<DataTable> tables = DataTableHelper.Split(table, 1).ToList();

Remove Split (Removing Rows From Source Using Remove Functionality)

/*change in source table: using remove*/
DataTable table1 = Data.Table(3);
List<DataTable> tables1 = DataTableHelper.SplitAndRemoveFromSource(table1, 1).ToList();

Remove Split (Removing Rows From Source Using Delete Functionality)

/*change in source table: using delete, good after db operations*/
DataTable table2 = Data.Table(3);
List<DataTable> tables2 = DataTableHelper.SplitAndRemoveFromSource
                          (table2, 1, "", DataTableHelper.RowRemoveType.Delete).ToList();

Generic Splitter

This generic splitter can split any IEnumerable source object including any datatable. This splitter will not include any change to the source object.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

public static class EnumerableExtension
{
    /*https://stackoverflow.com/a/34665432/2948523*/
    public static IEnumerable<IEnumerable<T>> ToBatch<T>
                  (this IEnumerable<T> enumerable, int batchSize)
    {
        int itemsReturned = 0;
        var list = enumerable.ToList();
        int count = list.Count;
        while (itemsReturned < count)
        {
            int currentChunkSize = Math.Min(batchSize, count - itemsReturned);
            yield return list.GetRange(itemsReturned, currentChunkSize);
            itemsReturned += currentChunkSize;
        }
    }  
}

Using Generic Splitter

Here, we are using the generic splitter to split both datatable and list:

/*generic split*/
/*table*/
List<DataTable> tables3 = Data.Table(3).AsEnumerable()
                                .ToBatch(1)
                                .Select(x => x.CopyToDataTable()).ToList();

/*list*/
List<List<Student>> lists1 = Data.List(3).ToBatch(1).Select(x => x.ToList()).ToList();

Good to Know!!!

Please find the Visual Studio 2017 solution as an attachment. If I have missed anything or if you have any suggestions, just let me know.

History

  • 19th May, 2019: Initial version

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here