Introduction
Hopefully you're aware of my format by now - once again I'm going to provide you with code you can copy and paste into an empty console application rather than use a zip file. I'll see you on the other side...
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <functional>
#include <numeric>
using std::binary_function;
using std::unary_function;
using std::accumulate;
using std::cout;
using std::string;
using std::vector;
using std::for_each;
using std::ostream_iterator;
using std::copy;
using std::endl;
using std::sort;
using std::mem_fun_ref;
struct Member
{
string Surname;
string Name;
int VideosOut;
int DaysOverdue;
Member::Member(string surname, string name, int videosOut = 0,
int daysOverdue = 0) : Surname(surname),
Name(name), VideosOut(videosOut), DaysOverdue(daysOverdue) {};
int Print()
{
cout << "Name: " << Surname <<", " << Name;
if (VideosOut > 0) cout << " Videos Out = " << VideosOut;
if (DaysOverdue > 0) cout << " Days overdue = " << DaysOverdue;
cout << endl;
return 0;
}
};
void PrintItem(const Member & m)
{
cout << "Name: " << m.Surname <<", " << m.Name;
if (m.VideosOut > 0) cout << " Videos Out = " << m.VideosOut;
if (m.DaysOverdue > 0) cout << " Days overdue = " << m.DaysOverdue;
cout << endl;
}
struct CountOverdueDays : public unary_function<int, int>
{
CountOverdueDays(int nDayLimit) : m_nDayLimit(nDayLimit) { }
int operator()(int nOverdueSoFar, const Member& m)
{
return m.DaysOverdue >= m_nDayLimit ?
nOverdueSoFar + m.VideosOut : nOverdueSoFar;
}
private:
const int m_nDayLimit;
};
struct SortByVideosOut : public binary_function<Member, Member, bool>
{
bool operator() (const Member & m1, const Member & m2)
{
return (m1.VideosOut < m2.VideosOut);
}
};
bool SortMember(const Member m1, const Member m2)
{
if (m1.Surname < m2.Surname)
return true;
else if (m1.Surname > m2.Surname)
return false;
return (m1.Name < m2.Name);
}
struct CountIntSort : public binary_function<int, int, bool>
{
public:
static int nCount;
bool operator() (const int n1, const int n2)
{
++ nCount;
return (n1 < n2);
}
};
int CountIntSort::nCount = 0;
struct CountIntSort2 : public binary_function<int, int, bool>
{
public:
int nCount;
CountIntSort2() : nCount(0){};
bool operator() (const int n1, const int n2)
{
++ nCount;
return (n1 < n2);
}
};
struct CountIntSort3 : public binary_function<int, int, bool>
{
public:
int nCount;
CountIntSort3 * pParent;
int nCopies;
CountIntSort3() : nCount(0), nCopies(0), pParent(0) {};
CountIntSort3(CountIntSort3 & c) : nCount(0), nCopies(1)
{
pParent = &c;
}
~CountIntSort3()
{
pParent->nCount += nCount;
pParent->nCopies += nCopies;
}
bool operator() (const int n1, const int n2)
{
++ nCount;
return (n1 < n2);
}
};
int _tmain(int argc, _TCHAR* argv[])
{
vector<Member> vecMem;
vecMem.push_back(Member("Zaphir", "Jill"));
vecMem.push_back(Member("Smith", "John", 5, 0));
vecMem.push_back(Member("Smith", "Phil", 1, 3));
vecMem.push_back(Member("Jones", "Susan"));
vecMem.push_back(Member("Kaputnik", "Joe", 10, 5));
vecMem.push_back(Member("Jones", "Bill", 3, 7));
for_each(vecMem.begin(), vecMem.end(), PrintItem);
cout << endl;
sort(vecMem.begin(), vecMem.end(), SortMember);
cout << "Now the same list sorted\n\n";
for_each(vecMem.begin(), vecMem.end(), mem_fun_ref(&Member::Print));
cout << endl;
int nOverdue = accumulate(vecMem.begin(), vecMem.end(),
0 ,
CountOverdueDays(5));
cout << "Number of videos overdue by five or more days: " << nOverdue;
cout << "\n\nSorted by number of videos out:\n\n";
SortByVideosOut v;
sort (vecMem.begin(), vecMem.end(), v);
for_each(vecMem.begin(), vecMem.end(), mem_fun_ref(&Member::Print));
cout << endl;
vector<int> vecInt;
for (int i = 0; i < 10000; ++i)
vecInt.push_back(rand()% 10000);
CountIntSort::nCount = 0;
CountIntSort c;
vector<int> vecInt2 = vecInt;
sort(vecInt2.begin(), vecInt2.end(), c);
cout << "Sorting a vector of 10000 ints took "
<< CountIntSort::nCount << " calls to the sort function"
<< endl;
CountIntSort2 c2;
vecInt2 = vecInt;
sort(vecInt2.begin(), vecInt2.end(), c2);
cout << "Sorting a vector of 10000 ints second try took "
<< c2.nCount << " calls to the sort function" << endl;
CountIntSort3 c3;
vecInt2 = vecInt;
sort(vecInt2.begin(), vecInt2.end(), c3);
cout << "Sorting a vector of 10000 ints second try took "
<< c3.nCount
<< " calls to the sort function, and the predicate was copied "
<< c3.nCopies << " times." << endl;
string s;
std::cin >> s;
return 0;
}
This third installment deals with an imaginary video library program, for which a portion of the struct
that holds customer data may look like this:
struct Member
{
string Surname;
string Name;
int VideosOut;
int DaysOverdue;
Member::Member(string surname, string name, int videosOut = 0,
daysOverdue = 0) : Surname(surname),
Name(name), VideosOut(videosOut), DaysOverdue(daysOverdue) {};
}
Having created this struct
, we can build a vector of imaginary customers like this:
vector<Member> vecMem;
vecMem.push_back(Member("Zaphir", "Jill"));
vecMem.push_back(Member("Smith", "John", 5, 0));
vecMem.push_back(Member("Smith", "Phil", 1, 3));
vecMem.push_back(Member("Jones", "Susan"));
vecMem.push_back(Member("Kaputnik", "Joe", 10, 5));
vecMem.push_back(Member("Jones", "Bill", 3, 7));
Now, normally, if I created a struct
like this on the console, I'd create a stream inserter/extractor pair for it, so I could just call cout
on each item, but I'm not trying to show how to do that. Instead, I am going to create my own function, which I will call with for_each
. This function is often preferable to a hand written loop for a number of reasons, which are all covered in Effective STL by Scott Meyers. If you have enough interest in my articles to have read up to the third one, you need this book. Go to Fatbrain or Amazon and order it now. Don't worry, I'll wait for you...
You're back? Good. Now, here is how it works. A function like for_each
requires a unary function, one which takes one parameter. So we write a print function like this:
void PrintItem(const Member & m)
{
cout << "Name: " << m.Surname <<", " << m.Name;
if (m.VideosOut > 0) cout << " Videos Out = " << m.VideosOut;
if (m.DaysOverdue > 0) cout << " Days overdue = " << m.DaysOverdue;
cout << endl;
}
and then inside main
we call it like this:
for_each(vecMem.begin(), vecMem.end(), PrintItem);
Simple, yes? A comparison function such as sort
requires a binary function - it needs two objects in order to compare them. Let's write a function which sorts by name - a structure requires this sort of function to tell us what to sort by. Furthermore, in this case we will sort by surname, and sort by first name within groups of the same surname.
bool SortMember(const Member & m1, const Member & m2)
{
if (m1.Surname < m2.Surname)
return true;
else if (m1.Surname > m2.Surname)
return false;
return (m1.Name < m2.Name);
}
Then it's just a case of using this function to sort by as follows:
sort(vecMem.begin(), vecMem.end(), SortMember);
Of course, all of these global functions get a bit messy, not to mention that they are not object oriented at all, and so we are provided with some adaptors to help us, namely mem_fun
, and mem_fun_ref
. Both of these allow us to pass in a function that is a member of the class we are storing in our container. memfun
assumes we are storing pointers to objects, and mem_fun_ref
that we are storing them by value. So if we add the following code to our Member
struct
:
int Print()
{
cout << "Name: " << Surname <<", " << Name;
if (VideosOut > 0) cout << " Videos Out = " << VideosOut;
if (DaysOverdue > 0) cout << " Days overdue = " << DaysOverdue;
cout << endl;
return 0;
}
then we can do this:
for_each(vecMem.begin(), vecMem.end(), mem_fun_ref(&Member::Print));
We also have mem_fun1
and mem_fun1_ref
, which allow us to specify functions that take one argument. Sadly there is no mem_fun2_ref
, which means we cannot put a predicate into a class (a predicate is a function that takes two arguments and returns bool
.)
We can however move beyond these global functions, by instead creating a struct
or class that overloads operator ()
. Such a function is called a functor in STL land, and a functor where operator()
returns a bool
is called a predicate.
Before I talk about the effect of this, let me first thank J�rgen Sigvardsson for being the first of many to point out the flaws in my original article in regard to this section. He is the author of the CoutOverdueDays
function that follows.
After several false starts, I think I've got to the bottom of the issue of functors having state. Meyers explains that if operator()
modifies the state of the functor, we have a problem as we are unable to know exactly how many times it will be called. However, the fact that we can introduce state into a functor is one good reason to use one. A good example can be found in Chris Losinger's article here. J�rgen's CountOverdueDays
functor looks like this:
struct CountOverdueDays
{
CountOverdueDays(int nDayLimit) : m_nDayLimit(nDayLimit) { }
int operator()(int nOverdueSoFar, const Member& m)
{
return m.DaysOverdue >=
m_nDayLimit ? nOverdueSoFar + m.VideosOut : nOverdueSoFar;
}
private:
const int m_nDayLimit;
};
With the help of J�rgen, the following improved code demonstrates how to count how many videos are five or more days overdue:
int nOverdue = accumulate(vecMem.begin(), vecMem.end(),
0 ,
CountOverdueDays(5));
cout << "Number of videos overdue by five or more days: " << nOverdue;
To allow you to easily see this is correct, I provide another sort, by the number of videos out.
struct SortByVideosOut : public
binary_function<MEMBER bool Member Member Member,>
{
bool operator() (const Member & m1, const Member & m2)
{
return (m1.VideosOut < m2.VideosOut);
}
};
Anyhow, now we can output the list by number of videos out, by doing the following:
SortByVideosOut v;
sort (vecMem.begin(), vecMem.end(), v);
for_each(vecMem.begin(), vecMem.end(), mem_fun_ref(&Member::Print));
One good reason for using functors instead of global functions is that they have no function call overhead once they have been passed in as an argument. Another, as also pointed out by J�rgen, is that it allows you to use function adapters like binder1st
and unary_negate
, so long as you derive them from unary_function
or binary_function
. These two classes are found in the header <functional>
.
Having come this far, I decided as an exercise to add the code to count how many times operator()
is called for a common function, namely sort
. The following code creates a binary predicate that does a normal sort, but also counts how often the operator is called using a static variable.
struct CountIntSort : public
binary_function<int, int, bool><int int bool int int,,>
{
public:
static int nCount;
bool operator() (const int n1, const int n2)
{
++ nCount;
return (n1 < n2);
}
};
int CountIntSort::nCount = 0;
And in main()
vector<int> vecInt;
for (int i = 0; i < 10000; ++i)
vecInt.push_back(rand()% 10000);
CountIntSort::nCount = 0;
CountIntSort c;
vector<int> vecInt2 = vecInt;
sort(vecInt2.begin(), vecInt2.end(), c);
cout << "Sorting a vector of 10000 ints took "
<< CountIntSort::nCount << " calls to the sort function" << endl;
On my machine, the magic number is 205,832 calls to operator ()
to sort 10,000 int
s. That static variable is a bit ugly though, so let's replace it with a class level member.
struct CountIntSort2 : public binary_function<int, int, bool>
{
public:
int nCount;
CountIntSort2() : nCount(0){};
bool operator() (const int n1, const int n2)
{
++ nCount;
return (n1 < n2);
}
};
Making another copy of the same vector and calling this returns us a value of 0 for the number of iterations. This was the same problem I faced when writing the original article, and it was Joaqu�n M L�pez Mu�oz who provides the solution in the discussion below.
Predicates are passed by value, not by reference. Therefore the counter spins nicely inside the STL, but the predicate instance you passed in remains unaffected. A full explanation is given by Joaqu�n below, in his message titled 'I got it', in the thread labeled 'I still don't get why predicates should be stateless'. Suffice it to say that this will not work. So how do we get away from the static variable? We need to overload our copy constructor, and our destructor in order to maintain a count per copy, and pass it back until it returns to our original predicate. While we're at it, let's count how many copies of the predicate are made during the copy operation.
struct CountIntSort3 : public binary_function<int, int, bool>
{
public:
int nCount;
CountIntSort3 * pParent;
int nCopies;
CountIntSort3() : nCount(0), nCopies(0), pParent(0) {};
CountIntSort3(CountIntSort3 & c) : nCount(0), nCopies(1)
{
pParent = &c;
}
~CountIntSort3()
{
pParent->nCount += nCount;
pParent->nCopies += nCopies;
}
bool operator() (const int n1, const int n2)
{
++ nCount;
return (n1 < n2);
}
};
Using this code instead, we once again get 205,832 calls to operator ()
, and the predicate is copied 3,315 times during the operation.
As has been noted before by others commenting on my first article, the Boost library provides a lot of extensions to STL, but I do not use them simply because I had a hard enough time getting everyone at work to install STLPort, and I don't see much point in learning stuff that leaves me less likely to know workarounds that can be used in every situation. I will be installing Boost when I do an article on stuff not found in the VC6 STL that can be useful, although that will mostly cover stuff in STLPort. I mention this again because I'd really like to be able to add these predicate functions to the class itself if I could.
As you can see, not only does STL come with a lot of powerful functions built in (a future article will discuss how many, for now I just try to introduce a couple in each article ), for_each
provides an easy way to write our own functions, and the general mechanism of providing our own code for policy means that we can adapt the general algorithms to suit our needs. Next up, I am going to offer an article on associative containers, namely map
and set
. As I discovered, a set is a subset of map, and both are a very useful alternative if you're looking to keep your data sorted, or want a tradeoff between the high cost of inserting into a vector, and the high cost of searching a list.