Introduction
Have you ever encountered a performance bottleneck when trying to filter a large collection of business objects on multiple different properties?
Something like this?
List<Task> filteredList = new List<Task>();
foreach (Task t in _tasks)
if (t.Type == "Product Backlog Item" && t.Number = 9)
filteredList.Add(t);
return filteredList;
But, what if you could do something like this that produced the same results as the previous method?
return _tasks.Find("Type", "Product Backlog Item").Intersect<Task>(_tasks.Find("Number", 9));
No, that's not a typo. That is a 289200 % improvement in performance.
Now, there's one catch. This only works when your collection of items has a low ratio of difference over the property you want to filter on. Do this calculation:
# of possible values for the property / Total Items in Collection = ratio
In my case, there are only 5 possible values for the Type
property, and there are 14 possible values for the Number
property. So, my ratio would be 5/5000 and 14/5000, both of which are very close to 0. As that ratio gets closer to 0, the memory overhead decreases and performance increases. Even with a ratio close to 1, I still noticed a large performance increase, although the memory overhead increased significantly.
Using the code
You first need to have the .NET 3.5 Framework installed. The only thing that is dependent on 3.5 is the HashSet<T>
class. This could easily be replaced with a normal HashTable
, which would make it possible to use with 2.0
Next, add the [IndexedProperty()]
attribute to any property of your class you want to filter on. One thing you have to watch out for is having null
values for the property you are filtering on. Because of the way we are indexing the properties, we have to substitute a value in place of null
. You can pass in the value you want null represented as when you create the attribute. If the property's type is a value type, and cannot be null
, then use the parameterless constructor instead.
The following example uses String.Empty
in place of null
when indexing the property:
[IndexedProperty(string.Empty)]
public string Type
{
get
{
return _type;
}
set
{
_type = value;
OnPropertyChanged("Type");
}
}
Now, make an instance of IndexableCollection<T>
(the only restriction on T
is that it needs to implement INotifyPropertyChanged
). The indexes are built and updated when the PropertyChanged
event is caught by T
's parent IndexableCollection<T>
.
That's it!
Now, you can do fun stuff like this:
_tasks.Find("Type", "Product Backlog Item")
which returns all tasks that have the Type
== "Product Backlog Item".
And to wrap it up. Here's the code (you can also download the sample application and code from the attached zip file):
public class IndexableCollection<T> : BindingList<T> where T : INotifyPropertyChanged
{
private Dictionary<string, object> _indexedProperties;
private Dictionary<string, IDictionary> _propertyHashes =
new Dictionary<string, IDictionary>();
private Dictionary<T, Dictionary<string,
HashSet<T>>> _currentPropertyHashes =
new Dictionary<T, Dictionary<string, HashSet<T>>>();
public bool IsPropertyIndexed(string propertyName)
{
return _indexedProperties.ContainsKey(propertyName);
}
public IndexableCollection()
{
_indexedProperties = new Dictionary<string, object>();
foreach (PropertyInfo info in typeof(T).GetProperties())
{
object[] temp = info.GetCustomAttributes(typeof(IndexedProperty), false);
if (temp.Count<object>() > 0)
_indexedProperties.Add(info.Name,
((IndexedProperty)temp[0]).NullValue);
}
}
protected override void SetItem(int index, T item)
{
T obj = this[index];
RemoveAllHases(obj);
SetAllHashes(item);
base.SetItem(index, item);
}
protected override void OnListChanged(ListChangedEventArgs e)
{
if (e.ListChangedType == ListChangedType.ItemChanged)
{
T obj = this[e.NewIndex];
if (e.PropertyDescriptor != null &&
_indexedProperties.ContainsKey(e.PropertyDescriptor.Name))
SetHash(obj,e.PropertyDescriptor.Name,
GetPropertyValue(obj,e.PropertyDescriptor.Name));
}
base.OnListChanged(e);
}
private void RemoveAllHases(T obj)
{
foreach (HashSet<T> set in _currentPropertyHashes[obj].Values)
set.Remove(obj);
_currentPropertyHashes.Remove(obj);
}
private void SetAllHashes(T obj)
{
foreach (string prop in _indexedProperties.Keys)
SetHash(obj, prop, GetPropertyValue(obj, prop));
}
private object GetPropertyValue(T obj, string propertyName)
{
return obj.GetType().GetProperty(propertyName).GetValue(obj, null);
}
private void SetHash(T obj, string propertyName, object value)
{
RemoveHashFromHistory(obj, propertyName);
Dictionary<object, HashSet<T>> dict =
GetDictionaryForProperty(propertyName);
value = value ?? _indexedProperties[propertyName];
HashSet<T> set = GetHashSetFromDictionary(dict, value);
StoreHashToHistory(obj, propertyName, set);
set.Add((T)obj);
}
private void StoreHashToHistory(T obj, string propertyName, HashSet<T> hash)
{
Dictionary<string, HashSet<T>> dict;
if (!_currentPropertyHashes.TryGetValue(obj, out dict))
{
dict = new Dictionary<string, HashSet<T>>();
_currentPropertyHashes[obj] = dict;
}
dict[propertyName] = hash;
}
private void RemoveHashFromHistory(T obj, string propertyName)
{
HashSet<T> previousSet = GetPreviousHash(obj, propertyName);
if (previousSet != null)
previousSet.Remove(obj);
}
private HashSet<T> GetPreviousHash(T obj, string propertyName)
{
HashSet<T> set = null;
Dictionary<string, HashSet<T>> dict;
if (!_currentPropertyHashes.TryGetValue(obj, out dict))
{
dict = new Dictionary<string, HashSet<T>>();
_currentPropertyHashes[obj] = dict;
if (!dict.TryGetValue(propertyName, out set))
{
set = new HashSet<T>();
dict[propertyName] = set;
}
}
else
{
if (!dict.TryGetValue(propertyName, out set))
{
set = new HashSet<T>();
dict[propertyName] = set;
}
}
return set;
}
protected HashSet<T> GetHashSetFromDictionary(Dictionary<object,
HashSet<T>> dict, object value)
{
HashSet<T> set;
if (!dict.TryGetValue(value, out set))
{
set = new HashSet<T>();
dict[value] = set;
}
return set;
}
private Dictionary<object, HashSet<T>> GetDictionaryForProperty(string property)
{
Dictionary<object, HashSet<T>> dict = null;
IDictionary temp;
if (!_propertyHashes.TryGetValue(property, out temp))
{
dict = new Dictionary<object, HashSet<T>>();
_propertyHashes[property] = dict;
return dict;
}
else
{
return temp as Dictionary<object, HashSet<T>>;
}
}
protected override void InsertItem(int index, T obj)
{
SetAllHashes(obj);
base.InsertItem(index, obj);
}
public virtual HashSet<T> Find(string propertyName, object value)
{
if (!_indexedProperties.ContainsKey(propertyName))
throw new Exception("You can only search on indexed properties");
return GetHashSetFromDictionary(GetDictionaryForProperty(propertyName), value);
}
protected override void RemoveItem(int index)
{
RemoveAllHases(this[index]);
base.RemoveItem(index);
}
}
[AttributeUsage(AttributeTargets.Property, AllowMultiple = false)]
public class IndexedProperty : System.Attribute
{
private object _nullValue;
public object NullValue
{
get
{
return _nullValue;
}
}
public IndexedProperty()
{
}
public IndexedProperty(object nullValue)
{
if (nullValue == null)
throw new Exception("The nullValue must be non-null");
_nullValue = nullValue;
}
}