Click here to Skip to main content
65,938 articles
CodeProject is changing. Read more.
Articles
(untagged)

List Batching/Paging With Entity Framework or Any IEnumerable in C#

0.00/5 (No votes)
20 May 2019 1  
The aim of this helper class is to divide a large IEnumerable into multiple small list.

Background

Last week, I had worked with a large data object list using Entity Framework. Due to the large data volume, I had to split them into multiple small batches and processed each item as needed. Today, I am going to share a utility/helper class to batch/page a source IEnumerable object efficiently.

IEnumerable Batch/Page Helper

Batch Detail Model

This model will provide all the necessary batching/paging detail. Even we can use this model for paging data binding to the front end.

public class BatchListDetail
{
    public int PageNo { get; set; }
    public int PageSize { get; set; }
    public int TotalItem { get; set; }

    public int TotalPage
    {
        get
        {
            var value = PageSize > 0 
                        ? (int)Math.Ceiling(TotalItem / (double)PageSize)
                        : 0;
            return value;
        }
    }
    public bool HasPreviousPage
    {
        get
        {
            var value = PageNo > 1;
            return value;
        }
    }
    public bool HasNextPage
    {
        get
        {
            var value = PageNo < TotalPage;
            return value;
        }
    }
}

Utility/Helper Class

In this helper class, we will get three different extension methods:

  1. Get page/batch data for a given page number.
  2. Get all data in multiple paged list, starting from a given page number.
  3. Get top (SQL top) data. Please, use this in a scenario where we are going to process each item of a large list and after processing the item will not be available on next batch query anymore.
using System; 
using System.Collections.Generic; 
using System.Linq;

public static class EnumerableHelper
{
    private static List<T> PageData<T>(IEnumerator<T> source, int pageSize)
    {
        if (pageSize < 1)
        {
            throw new ArgumentOutOfRangeException("pageSize value should be 1.");
        }

        List<T> list = new List<T>();
        int itemCount = 0;
        while (source.MoveNext())
        {
            list.Add(source.Current);
            ++itemCount;
            if (itemCount == pageSize)
            {
                break;
            }
        }
        return list;
    }

    private static List<T> PageData<T>(IEnumerator<T> source, int pageNo, int pageSize)
    {
        if (pageNo < 1)
        {
            throw new ArgumentOutOfRangeException("pageNo minimum value should be 1.");
        }
        if (pageSize < 1)
        {
            throw new ArgumentOutOfRangeException("pageSize value should be 1.");
        }

        /*skipping unwanted page items*/
        int skipItemCount = (pageNo - 1) * pageSize;
        for (int i = 0; i < skipItemCount; i++)
        {
            if (!source.MoveNext())
            {
                break;
            }
        }
        /*data*/
        List<T> list = PageData(source, pageSize);
        return list;
    }

    private static List<T> PageData<T>(IEnumerable<T> source, 
            int pageNo, int pageSize, out BatchListDetail details) where T : class
    {
        List<T> list = new List<T>();
        details = new BatchListDetail();

        if (pageNo < 1)
        {
            throw new ArgumentOutOfRangeException("pageNo minimum value should be 1.");
        }
        if (pageSize < 1)
        {
            throw new ArgumentOutOfRangeException("pageSize pageSize value should be 1.");
        }
        int totalItem = source == null ? 0 : source.Count();
        if (totalItem == 0)
        {
            return list;
        }

        /*batch list details*/
        details.PageNo = pageNo;
        details.PageSize = pageSize;
        details.TotalItem = totalItem;

        /*result*/
        list = pageNo == 1
            ? source.Take(pageSize).ToList()
            : source.Skip((pageNo - 1) * pageSize).Take(pageSize).ToList();
        return list;
    }

    /// <summary>
    /// Total list detail with current page data
    /// </summary>
    public static List<T> Batch<T>(this IEnumerable<T> source, 
           int pageNo, int pageSize, out BatchListDetail details) where T : class
    {
        return PageData(source, pageNo, pageSize, out details);
    }

    private static IEnumerable<List<T>> BatchsUsingEnumerable<T>(IEnumerable<T> source, 
            int startingFromPage, int pageSize) where T : class
    {
        BatchListDetail details;
        do
        {
            List<T> list = PageData(source, startingFromPage, pageSize, out details);
            if (list.Count > 0)
            {
                yield return list;
                ++startingFromPage;
            }
        }
        while (details.HasNextPage);
    }

    private static IEnumerable<List<T>> BatchsUsingEnumerator<T>(IEnumerable<T> source, 
            int startingFromPage, int pageSize) where T : class
    {
        IEnumerator<T> enumerator = source.GetEnumerator();

        List<T> list = PageData(enumerator, startingFromPage, pageSize);
        if (list.Count > 0)
        {
            /*first page data*/
            yield return list;

            /*other page data*/
            do
            {
                list = PageData(enumerator, pageSize);
                if (list.Count > 0)
                {
                    yield return list;
                }
            } while (list.Count > 0);
        }
    }

    /// <summary>
    /// Total list to multiple split list
    /// 
    /// Default useSourceEnumerator = false
    ///     If the data source is an in-memory list and we are sure that 
    ///     it is not going to change, It's good to use
    /// 
    /// Set useSourceEnumerator = true
    ///     If the data source is a Db where changes can happen 
    ///     any time by other processes and 
    ///     we don't want to incorporate the new changes
    ///     
    ///     Reading source as Enumerator can be a bit slower process than Enumerable, 
    ///     but it will maintain consistency. 
    /// </summary>
    public static IEnumerable<List<T>> Batchs<T>(this IEnumerable<T> source, 
        int startingFromPage, int size, bool useSourceEnumerator = false) where T : class
    {
        if (useSourceEnumerator)
        {
            return BatchsUsingEnumerator(source, startingFromPage, size);

        }
        return BatchsUsingEnumerable(source, startingFromPage, size);
    }

    /// <summary>
    /// Use in a scenario when we are going to process 
    /// each item of a large list, batch wise.
    /// Need to process the item in a way so that, after processing this 
    /// will not be available on next batch query anymore.
    /// </summary>
    public static bool Batch<T>(this IEnumerable<T> source, int size, 
                  out List<T> result) where T : class
    {
        result = source.Take(size).ToList();
        return result.Any();
    }
}

Using With List

Here, we are testing the helper class with a List/IList object:

/*list*/
/*list*/
List<People> sourceList = Data.Peoples(TotalItem);
/*queryable*/
//IQueryable<People> sourceList = Data.Peoples(TotalItem).AsQueryable();

/*page wise data*/
BatchListDetail details;
List<People> batch = sourceList.Batch(1, 3, out details).ToList();

/*full list to multiple paged list*/
List<List<People>> batchs = sourceList.Batchs(1, 3).ToList();
List<List<People>> batchsUseingSourceEnumerator = 
     sourceList.Batchs(1, 3, useSourceEnumerator: true).ToList();
/*batching data source as Enumerable*/
string name = "xName"; int count = 0;
foreach (var itemList in sourceList.Batchs(3, 3))
{
    AddNewPeople(sourceList, name, ++count);
    foreach (var item in itemList)               /*newly added 'xName' items will come*/
    {
        People people = item;
    }
}
/*batching data source as Enumerator*/
name = "xxName"; count = 0;
foreach (var itemList in sourceList.Batchs(3, 3, useSourceEnumerator: true))
{
    //AddNewPeople(sourceList, name, ++count);   /*will throw error, 
                                                   System.InvalidOperationException*/     
    foreach (var item in itemList)               /*newly added xxName items will not come*/
    {
        People people = item;
    }
}
/*page out of range*/
batchs = sourceList.Batchs(10, 3).ToList();
batchsUseingSourceEnumerator = sourceList.Batchs(10, 3, useSourceEnumerator: true).ToList();

/*get batch data, process and do same until all processed*/
var query = sourceList.Where(x => !x.IsActive);
List<People> resultList;
while (query.Batch(3, out resultList))
{
    foreach (var item in resultList)
    {
        item.IsActive = true;
    }
}

Using With Entity Framework

Db Connection String Setup for SQL Server

  1. Change connection string at app.config file
  2. Type Update-Database at package manager console
<connectionStrings>
    <!--For local development-->
    <!--<add name="DbPms" connectionString="Data Source=(local);
         Initial Catalog=PMS;Integrated Security=True" 
         providerName="System.Data.SqlClient" />-->
    <add name="DbPms" connectionString="Data Source=DESKTOP-GSTET0K\MSSQLSERVER2014;
     Initial Catalog=PMS;Integrated Security=True" providerName="System.Data.SqlClient" />
</connectionStrings>

Now we are good to connect to our local SQL Server. Let's populate some sample data in our database for test purposes:

private static Db PopulateDb()
{
    Db db = new Db();
    db.People.RemoveRange(db.People);               /*remove old data*/
    db.SaveChanges();
    db.People.AddRange(Data.Peoples(TotalItem));    /*add new data*/
    db.SaveChanges();
    return db;
}

private static void AddNewPeople(string name, int count)
{
    /*
     * adding using a new db connection
     * 
     * same connection will return throw error, while
     *      reading using IEnumerator and adding a new row
     */
    Db db = new Db();
    db.People.Add(Data.People(name, count));
    db.SaveChanges();
    db.Dispose();
}

IQueryable, IDbSet

Here, we are testing the utility class with Entity Framework and IQueryable, IDbSet objects.

Db db = PopulateDb();
List<People> list = db.People.ToList();

/*page wise data*/
BatchListDetail details;
List<People> batch = db.People.Batch(1, 3, out details).ToList();

/*full list to multiple paged list*/
List<List<People>> batchs = db.People.Batchs(1, 3).ToList();
List<List<People>> batchsUseingSourceEnumerator = 
         db.People.Batchs(1, 3, useSourceEnumerator: true).ToList();
/*batching data source as Enumerable*/
string name = "xName"; int count = 0;
foreach (var itemList in db.People.Batchs(3, 3))
{
    AddNewPeople(name, ++count);
    foreach (var item in itemList)          /*newly added 'xName' items will come*/
    {
        People people = item;
    }
}
/*batching data source as Enumerator*/
name = "xxName"; count = 0;
foreach (var itemList in db.People.Batchs(3, 3, useSourceEnumerator: true))
{
    AddNewPeople(name, ++count);            /*will throw error, 
                                              if same db object been used to add*/
    foreach (var item in itemList)          /*newly added 'xxName' items will not come*/
    {
        People people = item;
    }
}
/*page out of range*/
batchs = db.People.Batchs(10, 3).ToList();
batchsUseingSourceEnumerator = db.People.Batchs(10, 3, useSourceEnumerator: true).ToList();

/*get batch data, process and do same until all processed*/
var query = db.People.Where(x => !x.IsActive);
List<People> resultList;
while (query.Batch(3, out resultList))
{
    foreach (var item in resultList)
    {
        item.IsActive = true;
    }
    db.SaveChanges();                   /*save to db*/
}

SQL or DbRawSqlQuery

Now let's try the helper class with DbRawSqlQuery (SQL query result object):

Db db = PopulateDb();
List<PeopleModel> list = db.Peoples().ToList();

/*page wise data*/
BatchListDetail modelDetails;
List<PeopleModel> modelBatch = db.Peoples().Batch(1, 3, out modelDetails).ToList();

/*full list to multiple paged list*/
List<List<PeopleModel>> batchs = db.Peoples().Batchs(1, 3).ToList();
List<List<PeopleModel>> batchsUseingSourceEnumerator = 
          db.Peoples().Batchs(1, 3, useSourceEnumerator: true).ToList();
/*batching data source as Enumerable*/
string name = "xName"; int count = 0;
foreach (var itemList in db.Peoples().Batchs(3, 3))
{
    AddNewPeople(name, ++count);
    foreach (var item in itemList)          /*newly added 'xName' items will come*/
    {
        PeopleModel people = item;
    }
}
/*batching data source as Enumerator*/
name = "xxName"; count = 0;
foreach (var itemList in db.Peoples().Batchs(3, 3, useSourceEnumerator: true))
{
    AddNewPeople(name, ++count);       /*will throw error, 
                                         if same db object been used to add*/
    foreach (var item in itemList)     /*newly added 'xxName' items will not come*/
    {
        PeopleModel people = item;
    }
}
/*page out of range*/
batchs = db.Peoples().Batchs(10, 3).ToList();
batchsUseingSourceEnumerator = 
    db.Peoples().Batchs(10, 3, useSourceEnumerator: true).ToList();

/*get batch data, process and do same until all processed*/
var modeQuery = db.Peoples().Where(x => x.IsActive);
List<PeopleModel> modelResultList;
while (modeQuery.Batch(3, out modelResultList))
{
    foreach (var item in modelResultList)
    {
        People people = db.People.Find(item.Id);
        people.IsActive = false;
    }
    db.SaveChanges();                   /*save to db*/
}

Working as expected, right!

Using the Batchs() Function

If the data source is an in-memory list and we are sure that it is not going to change, it's good to use.

List<List<People>> batchs = sourceList.Batchs(1, 3).ToList();

But if the data source is a Db where changes can happen any time by other processes and we don't want to incorporate the new changes, we should use:

List<List<People>> batchsUseingSourceEnumerator = 
          sourceList.Batchs(1, 3, useSourceEnumerator: true).ToList();

Reading source as Enumerator can be a slightly slower process than Enumerable, but it will maintain consistency. Thank you Daniele Rota Nodari for the comment.

Good to Know!!!

Please find the Visual Studio 2017 solution as an attachment. If I have missed anything or if you have any suggestions, just let me know.

History

  • 20th May, 2019: Initial version

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here