file is here:
Computer science or computing
science (abbreviated CS or CompSci) is the scientific
approach to computation and its applications.
A computer scientist specialises in the theory of
computation and the design of computers or computational
systems
#include " stdafx.h "
#include < fstream >
#include < iostream >
#include < cctype >
#include < string >
#include < algorithm >
#include <map>
#include < vector >
#include < string >
#include < set >
#include < cstddef >
#include < sstream >
using namespace std;
std::string int_to_string(int);
string int_to_string(int i) {
stringstream out;
out<< i ;
return out.str();
}
class FileIndex {
public:
FileIndex(int);
FileIndex* getNext();
int getIndex();
void setNext(FileIndex*);
private:
int index;
FileIndex* next;
};
class InvertedIndexGen {
public:
InvertedIndexGen();
~InvertedIndexGen();
int build(const std::string&);
FileIndex* lookup(const std::string&);
void to_set(std:: set<int>&s, FileIndex* ) ;
std::string toString();
int numberOfWords();
private:
std::map<std::string,> idx;
int loadIndexFile(std::vector<std::string xmlns:std="#unknown">&, const std::string&);
int indexFiles(const std::vector<std::string>&);
int readWords(const std::string&, std::vector<std::string>&);
void insert(const std::string&, int);
};
InvertedIndexGen::InvertedIndexGen() {
}
InvertedIndexGen::~InvertedIndexGen() {
map<string,>::iterator it = idx.begin();
while (it != idx.end()) {
FileIndex* fi = it->second;
while (fi != NULL) {
FileIndex* p = fi;
fi = fi->getNext();
delete p;
}
it++;
}
idx.clear();
}
int InvertedIndexGen::build(const string& file) {
vector<string> files;
if (loadIndexFile(files, file) == -1)
return -1;
if (indexFiles(files) == -1)
return -1;
return 0;
}
FileIndex* InvertedIndexGen::lookup(const string& word) {
return idx[word];
}
int alpha(char c) {
return isalpha(c) ;
}
bool not_alpha(char c) {
return !isalpha(c);
}
vector<string> split(const string& str) {
vector<string> ret;
string::const_iterator i = str.begin();
while (i != str.end()) {
i = find_if(i, str.end(), alpha);
string::const_iterator j = find_if(i, str.end(), not_alpha);
if (i != str.end())
ret.push_back(string(i, j));
i = j;
}
return ret;
}
int InvertedIndexGen::readWords(const string& file, vector<string>& v) {
std::ifstream infile("D:/C++ project/ConsoleApplication7/file.txt");
if (infile) {
std::string line;
while (getline(infile, line)) {
vector<string> words = split(line);
v.insert(v.end(), words.begin(), words.end());
}
return 0;
}
else {
cerr << "can't open file " << file << endl;
return -1;
}
}
void InvertedIndexGen :: to_set(set<int>&s, FileIndex* fi) {
FileIndex* p = fi;
while (p != NULL) {
s.insert(p->getIndex());
p = p->getNext();
}
}
int InvertedIndexGen::indexFiles(const vector<string>& files) {
vector<string> words;
set<string> seen;
int fcnt;
string curr_file;
string curr_word;
int status = 0;
fcnt = 0;
for (vector<string>::const_iterator file_it = files.begin(); file_it != files.end(); ++file_it){
curr_file = *file_it;
if(seen.find(curr_file) != seen.end()){
seen.insert(curr_file);
if(readWords(curr_file, words) != -1){
for (vector<string>::iterator words_it = words.begin(); words_it != words.end(); ++words_it){
curr_word = *words_it;
insert(curr_word, fcnt);
words.clear();
}
fcnt++;
}
else {
status = -1;
fcnt++;
}
}
else {
status = -1;
cout << "duplicate input file: " << curr_file << ". Skipping." << endl;
fcnt++;
}
}
return status;
}
void InvertedIndexGen::insert(const string& word, int fcnt) {
if(idx.find(word) == idx.end()) {
FileIndex* newEntry = new FileIndex(fcnt);
idx.insert(pair<string,>(word, newEntry) );
return;
}
else {
FileIndex* curr = lookup(word);
while(curr->getIndex() != fcnt && curr->getNext() != NULL){
curr = curr->getNext();
}
if((curr->getIndex() == fcnt)) {
return;
}
else {
FileIndex* addIndex = new FileIndex(fcnt);
curr->setNext(addIndex);
}
}
}
int InvertedIndexGen::loadIndexFile(vector<string>& files, const string& idxfile) {
std::ifstream infile("D:/C++ project/ConsoleApplication7/file.txt");
if (infile) {
std:: string line;
int lineno = 1;
while (getline(infile, line)) {
if (line == "")
cerr << "[" << int_to_string(lineno)
<< "] found blank line in input file. skipping." << endl;
else
files.push_back(line);
lineno++;
}
return 0;
}
else {
cerr << "can't open file " << idxfile << endl;
return -1;
}
}
FileIndex::FileIndex(int i) {
index = i;
next = NULL;
}
FileIndex* FileIndex::getNext() {
return next;
}
void FileIndex::setNext(FileIndex* n) {
next = n;
}
int FileIndex::getIndex() {
return index;
}
string InvertedIndexGen::toString() {
set<int>indexes;
string res = "";
map<string,>::iterator it = idx.begin();
while (it != idx.end()) {
res += it->first + ": ";
to_set(indexes, it->second);
for (set<int>::iterator it2 = indexes.begin();
it2 != indexes.end(); ++it2) {
res += int_to_string(*it2) + " ";
}
res += "\n";
indexes.clear();
it++;
}
return res;
}
int InvertedIndexGen::numberOfWords() {
return idx.size();
}
int main(int argc, char* argv[]) {
cout << "Test" << endl;
int s;
InvertedIndexGen ivgen;
ivgen.build(argv[0]);
cout << ivgen.toString();
cout << ivgen.numberOfWords() << " words" << endl;
cin>>s;
return 0;
}