Background
Last week, I had worked with a large data object list using Entity Framework. Due to the large data volume, I had to split them into multiple small batches and processed each item as needed. Today, I am going to share a utility/helper class to batch/page a source IEnumerable
object efficiently.
IEnumerable Batch/Page Helper
Batch Detail Model
This model will provide all the necessary batching/paging detail. Even we can use this model for paging data binding to the front end.
public class BatchListDetail
{
public int PageNo { get; set; }
public int PageSize { get; set; }
public int TotalItem { get; set; }
public int TotalPage
{
get
{
var value = PageSize > 0
? (int)Math.Ceiling(TotalItem / (double)PageSize)
: 0;
return value;
}
}
public bool HasPreviousPage
{
get
{
var value = PageNo > 1;
return value;
}
}
public bool HasNextPage
{
get
{
var value = PageNo < TotalPage;
return value;
}
}
}
Utility/Helper Class
In this helper class, we will get three different extension methods:
- Get page/batch data for a given page number.
- Get all data in multiple paged list, starting from a given page number.
- Get top (SQL top) data. Please, use this in a scenario where we are going to process each item of a large list and after processing the item will not be available on next batch query anymore.
using System;
using System.Collections.Generic;
using System.Linq;
public static class EnumerableHelper
{
private static List<T> PageData<T>(IEnumerator<T> source, int pageSize)
{
if (pageSize < 1)
{
throw new ArgumentOutOfRangeException("pageSize value should be 1.");
}
List<T> list = new List<T>();
int itemCount = 0;
while (source.MoveNext())
{
list.Add(source.Current);
++itemCount;
if (itemCount == pageSize)
{
break;
}
}
return list;
}
private static List<T> PageData<T>(IEnumerator<T> source, int pageNo, int pageSize)
{
if (pageNo < 1)
{
throw new ArgumentOutOfRangeException("pageNo minimum value should be 1.");
}
if (pageSize < 1)
{
throw new ArgumentOutOfRangeException("pageSize value should be 1.");
}
int skipItemCount = (pageNo - 1) * pageSize;
for (int i = 0; i < skipItemCount; i++)
{
if (!source.MoveNext())
{
break;
}
}
List<T> list = PageData(source, pageSize);
return list;
}
private static List<T> PageData<T>(IEnumerable<T> source,
int pageNo, int pageSize, out BatchListDetail details) where T : class
{
List<T> list = new List<T>();
details = new BatchListDetail();
if (pageNo < 1)
{
throw new ArgumentOutOfRangeException("pageNo minimum value should be 1.");
}
if (pageSize < 1)
{
throw new ArgumentOutOfRangeException("pageSize pageSize value should be 1.");
}
int totalItem = source == null ? 0 : source.Count();
if (totalItem == 0)
{
return list;
}
details.PageNo = pageNo;
details.PageSize = pageSize;
details.TotalItem = totalItem;
list = pageNo == 1
? source.Take(pageSize).ToList()
: source.Skip((pageNo - 1) * pageSize).Take(pageSize).ToList();
return list;
}
public static List<T> Batch<T>(this IEnumerable<T> source,
int pageNo, int pageSize, out BatchListDetail details) where T : class
{
return PageData(source, pageNo, pageSize, out details);
}
private static IEnumerable<List<T>> BatchsUsingEnumerable<T>(IEnumerable<T> source,
int startingFromPage, int pageSize) where T : class
{
BatchListDetail details;
do
{
List<T> list = PageData(source, startingFromPage, pageSize, out details);
if (list.Count > 0)
{
yield return list;
++startingFromPage;
}
}
while (details.HasNextPage);
}
private static IEnumerable<List<T>> BatchsUsingEnumerator<T>(IEnumerable<T> source,
int startingFromPage, int pageSize) where T : class
{
IEnumerator<T> enumerator = source.GetEnumerator();
List<T> list = PageData(enumerator, startingFromPage, pageSize);
if (list.Count > 0)
{
yield return list;
do
{
list = PageData(enumerator, pageSize);
if (list.Count > 0)
{
yield return list;
}
} while (list.Count > 0);
}
}
public static IEnumerable<List<T>> Batchs<T>(this IEnumerable<T> source,
int startingFromPage, int size, bool useSourceEnumerator = false) where T : class
{
if (useSourceEnumerator)
{
return BatchsUsingEnumerator(source, startingFromPage, size);
}
return BatchsUsingEnumerable(source, startingFromPage, size);
}
public static bool Batch<T>(this IEnumerable<T> source, int size,
out List<T> result) where T : class
{
result = source.Take(size).ToList();
return result.Any();
}
}
Using With List
Here, we are testing the helper class with a List
/IList
object:
List<People> sourceList = Data.Peoples(TotalItem);
BatchListDetail details;
List<People> batch = sourceList.Batch(1, 3, out details).ToList();
List<List<People>> batchs = sourceList.Batchs(1, 3).ToList();
List<List<People>> batchsUseingSourceEnumerator =
sourceList.Batchs(1, 3, useSourceEnumerator: true).ToList();
string name = "xName"; int count = 0;
foreach (var itemList in sourceList.Batchs(3, 3))
{
AddNewPeople(sourceList, name, ++count);
foreach (var item in itemList)
{
People people = item;
}
}
name = "xxName"; count = 0;
foreach (var itemList in sourceList.Batchs(3, 3, useSourceEnumerator: true))
{
System.InvalidOperationException*/
foreach (var item in itemList)
{
People people = item;
}
}
batchs = sourceList.Batchs(10, 3).ToList();
batchsUseingSourceEnumerator = sourceList.Batchs(10, 3, useSourceEnumerator: true).ToList();
var query = sourceList.Where(x => !x.IsActive);
List<People> resultList;
while (query.Batch(3, out resultList))
{
foreach (var item in resultList)
{
item.IsActive = true;
}
}
Using With Entity Framework
Db Connection String Setup for SQL Server
- Change connection string at app.config file
- Type
Update-Database
at package manager console
<connectionStrings>
<add name="DbPms" connectionString="Data Source=DESKTOP-GSTET0K\MSSQLSERVER2014;
Initial Catalog=PMS;Integrated Security=True" providerName="System.Data.SqlClient" />
</connectionStrings>
Now we are good to connect to our local SQL Server. Let's populate some sample data in our database for test purposes:
private static Db PopulateDb()
{
Db db = new Db();
db.People.RemoveRange(db.People);
db.SaveChanges();
db.People.AddRange(Data.Peoples(TotalItem));
db.SaveChanges();
return db;
}
private static void AddNewPeople(string name, int count)
{
Db db = new Db();
db.People.Add(Data.People(name, count));
db.SaveChanges();
db.Dispose();
}
IQueryable, IDbSet
Here, we are testing the utility class with Entity Framework and IQueryable
, IDbSet
objects.
Db db = PopulateDb();
List<People> list = db.People.ToList();
BatchListDetail details;
List<People> batch = db.People.Batch(1, 3, out details).ToList();
List<List<People>> batchs = db.People.Batchs(1, 3).ToList();
List<List<People>> batchsUseingSourceEnumerator =
db.People.Batchs(1, 3, useSourceEnumerator: true).ToList();
string name = "xName"; int count = 0;
foreach (var itemList in db.People.Batchs(3, 3))
{
AddNewPeople(name, ++count);
foreach (var item in itemList)
{
People people = item;
}
}
name = "xxName"; count = 0;
foreach (var itemList in db.People.Batchs(3, 3, useSourceEnumerator: true))
{
AddNewPeople(name, ++count);
foreach (var item in itemList)
{
People people = item;
}
}
batchs = db.People.Batchs(10, 3).ToList();
batchsUseingSourceEnumerator = db.People.Batchs(10, 3, useSourceEnumerator: true).ToList();
var query = db.People.Where(x => !x.IsActive);
List<People> resultList;
while (query.Batch(3, out resultList))
{
foreach (var item in resultList)
{
item.IsActive = true;
}
db.SaveChanges();
}
SQL or DbRawSqlQuery
Now let's try the helper class with DbRawSqlQuery
(SQL query result object):
Db db = PopulateDb();
List<PeopleModel> list = db.Peoples().ToList();
BatchListDetail modelDetails;
List<PeopleModel> modelBatch = db.Peoples().Batch(1, 3, out modelDetails).ToList();
List<List<PeopleModel>> batchs = db.Peoples().Batchs(1, 3).ToList();
List<List<PeopleModel>> batchsUseingSourceEnumerator =
db.Peoples().Batchs(1, 3, useSourceEnumerator: true).ToList();
string name = "xName"; int count = 0;
foreach (var itemList in db.Peoples().Batchs(3, 3))
{
AddNewPeople(name, ++count);
foreach (var item in itemList)
{
PeopleModel people = item;
}
}
name = "xxName"; count = 0;
foreach (var itemList in db.Peoples().Batchs(3, 3, useSourceEnumerator: true))
{
AddNewPeople(name, ++count);
foreach (var item in itemList)
{
PeopleModel people = item;
}
}
batchs = db.Peoples().Batchs(10, 3).ToList();
batchsUseingSourceEnumerator =
db.Peoples().Batchs(10, 3, useSourceEnumerator: true).ToList();
var modeQuery = db.Peoples().Where(x => x.IsActive);
List<PeopleModel> modelResultList;
while (modeQuery.Batch(3, out modelResultList))
{
foreach (var item in modelResultList)
{
People people = db.People.Find(item.Id);
people.IsActive = false;
}
db.SaveChanges();
}
Working as expected, right!
Using the Batchs() Function
If the data source is an in-memory list and we are sure that it is not going to change, it's good to use.
List<List<People>> batchs = sourceList.Batchs(1, 3).ToList();
But if the data source is a Db where changes can happen any time by other processes and we don't want to incorporate the new changes, we should use:
List<List<People>> batchsUseingSourceEnumerator =
sourceList.Batchs(1, 3, useSourceEnumerator: true).ToList();
Reading source as Enumerator can be a slightly slower process than Enumerable, but it will maintain consistency. Thank you Daniele Rota Nodari for the comment.
Good to Know!!!
Please find the Visual Studio 2017 solution as an attachment. If I have missed anything or if you have any suggestions, just let me know.
History
- 20th May, 2019: Initial version