EDIT:
* Figured out way to make 'poopoopoop' become 'p**p**p**p' (without recursion) and keep other design constraints.
* Figured out an 'Oh Duh - that's so simple!' way to keep 'nincompoop' as 'nincompoop' - just replace it with itself!
* Made class static, which resulted in about 8% inprovement. But the additional 'p**p**p**p' routine (replaceRepeatingStrings) dropped speed from 0.24ms to 0.37ms. Still 7 times faster than Solution 10 on my machine.
-END EDIT
Quote:
The point is to make the question a little loose to allow ... pointless religious wars
Good, I'll use some 'goto's :)
I'm not very familiar with regex, but believe it doesn't allow you to change the case of a word based on the cases of the surrounding words. (If I'm wrong, please enlighten me.) If that is the case, 'PHB' will always become 'boss' in the regex solutions, regardless of whether the input 'shouted' the phrase.
The following solution is C++ all the way, and is about 7 times faster than Solution 10 on my machine using Grant's newest timing routine. It does take the capitalization of the surrounding words into effect. It will also change 'pooppoop' to 'p**pp**p', which doesn't seem to have been specified by the specifications, although logically should occur. Additionally, it changes 'poopoopoop' into 'p**p**p**p' without recursion.
Quote:
A weekly simple programming problem that should take no more than half an hour or so
Yeah, right Chris... :rolleyes: This logic was a friggin' pain!
#include <map>
#include <string>
#include <algorithm>
#include <vector>
#include <tuple>
#include <ctime>
#include <iostream>
#include "windows.h" //For QueryPerformanceCounter
using namespace std;
#define GetTupleWord get<0>
#define GetTupleSmallCase get<1>
#define GetTupleCapCase get<2>
class TimerLowRes {
private:
std::clock_t begC;
std::clock_t avgTotC;
std::clock_t diffC;
int numTimesC;
public:
TimerLowRes() : avgTotC(0), numTimesC(0) { }
void start() { begC = std::clock(); }
std::clock_t stop() {
diffC = std::clock() - begC;
avgTotC = avgTotC + diffC;
numTimesC++;
return diffC;
}
std::clock_t getAvg() {
if (numTimesC == 0) return 0;
return avgTotC / numTimesC;
}
void reset() {
numTimesC = 0;
avgTotC = 0;
}
std::clock_t getLapTime() { return std::clock() - begC; }
};
class TimerHighRes {
private:
LARGE_INTEGER StartingTime, EndingTime, ElapsedMicroseconds, Frequency;
int numTimesC;
public:
TimerHighRes() : numTimesC(0) {
QueryPerformanceFrequency(&Frequency);
}
void start() {
QueryPerformanceCounter(&StartingTime);
}
LARGE_INTEGER stop() {
QueryPerformanceCounter(&EndingTime);
ElapsedMicroseconds.QuadPart = EndingTime.QuadPart - StartingTime.QuadPart;
ElapsedMicroseconds.QuadPart *= 1000000;
ElapsedMicroseconds.QuadPart /= Frequency.QuadPart;
numTimesC++;
return ElapsedMicroseconds;
}
};
class Reformatter {
private:
typedef tuple<wstring, wstring, wstring> WordTuples;
vector<WordTuples> repeatingStringsC; vector<WordTuples> caseSensitiveSubsC; vector<WordTuples> nonCaseSensitiveSubsC; vector<WordTuples> asteriskSubsC;
wstring resultC;
size_t strLenC;
wstring::const_iterator beginLocC;
wstring::const_iterator curLocC;
wstring::const_iterator endLocC;
bool replaceAsterisks();
bool replaceNonCaseSensitiveBegin();
bool replaceCaseSensitive();
bool replaceRepeatingStrings();
bool previousWordIsCapitalized(wstring::const_iterator & thisWord, wstring::const_iterator & prevWord);
bool nextWordIsCapitalized(wstring::const_iterator & thisWord, wstring::const_iterator & nextWord);
void findBeginningOfPrevious(wstring::const_iterator & thisWord, wstring::const_iterator & prevWord);
void findBeginningOfNext(wstring::const_iterator & thisWord, wstring::const_iterator & nextWord);
bool isWhiteSpaceOrPunctuation(const wstring::const_iterator & temp);
public:
Reformatter() {
repeatingStringsC.push_back(make_tuple(L"poo", L"p**", L"p"));
caseSensitiveSubsC.push_back(make_tuple(L"PHB", L"boss", L"BOSS"));
nonCaseSensitiveSubsC.push_back(make_tuple(L"gotten to be", L"become", L"BECOME"));
nonCaseSensitiveSubsC.push_back(make_tuple(L"gotten", L"become", L"BECOME"));
asteriskSubsC.push_back(make_tuple(L"nincompoop", L"nincompoop", L"nincomp##p"));
asteriskSubsC.push_back(make_tuple(L"poop", L"p**p", L"P**P"));
asteriskSubsC.push_back(make_tuple(L"p##p", L"p**p", L"P**P"));
asteriskSubsC.push_back(make_tuple(L"ass", L"a**", L"A**"));
}
void reformat(const wstring & str);
void outputResult();
};
void Reformatter::outputResult() {
wcout << L"OUTPUT: " << resultC << endl << endl;
}
bool Reformatter::isWhiteSpaceOrPunctuation(const wstring::const_iterator & it) {
if (*it == L' ' || *it == L'\t' || *it == L'.' || *it == L'!' || *it == L'_' ||
*it == L'\r' || *it == L'\n') return true;
return false;
}
void Reformatter::findBeginningOfNext(wstring::const_iterator & thisWord, wstring::const_iterator & nextWord) {
while (thisWord != endLocC && !isWhiteSpaceOrPunctuation(thisWord)) ++thisWord;
nextWord = thisWord;
while (nextWord != endLocC && isWhiteSpaceOrPunctuation(nextWord)) ++nextWord;
if (nextWord == endLocC) {
nextWord = endLocC;
thisWord = endLocC;
return;
}
}
void Reformatter::findBeginningOfPrevious(wstring::const_iterator & thisWord, wstring::const_iterator & prevWord) {
while (thisWord != beginLocC && !isWhiteSpaceOrPunctuation(thisWord)) --thisWord;
prevWord = thisWord;
++thisWord;
while (prevWord != beginLocC && isWhiteSpaceOrPunctuation(prevWord)) --prevWord;
if (prevWord == beginLocC) {
if (isWhiteSpaceOrPunctuation(prevWord)) prevWord = thisWord;
return;
}
while (prevWord != beginLocC && !isWhiteSpaceOrPunctuation(prevWord)) --prevWord;
if (isWhiteSpaceOrPunctuation(prevWord)) ++prevWord;
}
bool Reformatter::previousWordIsCapitalized(wstring::const_iterator & thisWord,
wstring::const_iterator & prevWord) {
findBeginningOfPrevious(thisWord, prevWord);
if (thisWord == prevWord) return true; while (!isWhiteSpaceOrPunctuation(prevWord)) {
wchar_t temp = *prevWord;
if (iswlower(*prevWord)) return false;
++prevWord;
}
return true;
}
bool Reformatter::nextWordIsCapitalized(wstring::const_iterator & thisWord, wstring::const_iterator & nextWord) {
findBeginningOfNext(thisWord, nextWord);
if (thisWord == nextWord) return false; while (nextWord != endLocC && !isWhiteSpaceOrPunctuation(nextWord)) {
wchar_t temp = *nextWord;
if (iswlower(*nextWord)) return false;
++nextWord;
}
return true;
}
bool Reformatter::replaceCaseSensitive() {
bool found = false;
wstring::const_iterator inputIterator;
for (auto it = caseSensitiveSubsC.begin(); it != caseSensitiveSubsC.end(); ++it) {
const wstring & str = GetTupleWord(*it);
inputIterator = curLocC;
for (int pos=0, len=str.length(); pos<len; ++pos) {
found = true;
if (inputIterator == endLocC || *inputIterator != str[pos]) {
found = false;
break;
}
++inputIterator;
}
if (found) {
if (inputIterator == endLocC || !isWhiteSpaceOrPunctuation(inputIterator)) {
return false;
}
wstring::const_iterator thisWord = curLocC;
wstring::const_iterator prevWord = curLocC;
wstring::const_iterator nextWord = curLocC;
if (previousWordIsCapitalized(thisWord, prevWord) && nextWordIsCapitalized(thisWord, nextWord)) {
resultC.append(GetTupleCapCase(*it));
}
else {
resultC.append(GetTupleSmallCase(*it));
}
curLocC += str.length();
while (curLocC != endLocC && isWhiteSpaceOrPunctuation(curLocC)) {
resultC.append(1, *curLocC);
++curLocC;
}
return found;
}
}
return found;
}
bool Reformatter::replaceNonCaseSensitiveBegin() {
bool found;
vector<tuple<wstring, wstring, wstring>>::iterator it;
wstring::const_iterator inputIterator;
const wstring * str;
if (curLocC == beginLocC) return false;
if (!(curLocC-1==beginLocC || isWhiteSpaceOrPunctuation(curLocC-1))) return false;
if (!isWhiteSpaceOrPunctuation(curLocC-1)) return false;
for (it = nonCaseSensitiveSubsC.begin(); it != nonCaseSensitiveSubsC.end(); ++it) {
str = &GetTupleWord(*it);
inputIterator = curLocC;
for (int pos=0, len=str->length(); pos<len; ++pos) {
found = true;
if (inputIterator == endLocC || towlower(*inputIterator) != (*str)[pos]) {
found = false;
break;
}
++inputIterator;
}
if (found) break;
}
if (found) {
inputIterator = curLocC;
bool isCapped = true;
for (int pos=0, len=(*str).length(); pos<len; ++pos) {
if (iswlower(*inputIterator)) {
isCapped = false;
break;
}
++inputIterator;
}
if (isCapped) {
resultC.append(GetTupleCapCase(*it));
}
else {
resultC.append(GetTupleSmallCase(*it));
}
curLocC += GetTupleWord(*it).length();
return true;
}
return false;
}
bool Reformatter::replaceAsterisks() {
bool found;
vector<tuple<wstring, wstring, wstring>>::iterator it;
wstring::const_iterator inputIterator;
const wstring * str;
for (it = asteriskSubsC.begin(); it != asteriskSubsC.end(); ++it) {
str = &GetTupleWord(*it);
inputIterator = curLocC;
for (int pos=0, len=str->length(); pos<len; ++pos) {
found = true;
if (inputIterator == endLocC || towlower(*inputIterator) != (*str)[pos]) {
found = false;
break;
}
++inputIterator;
}
if (found) break;
}
if (found) {
str = &GetTupleSmallCase(*it);
inputIterator = curLocC;
for (int pos=0, len=(*str).length(); pos<len; ++pos) {
if ((*str)[pos] == L'*') {
resultC.append(L"*");
}
else {
resultC.append(1, *curLocC);
}
++curLocC;
++inputIterator;
}
return true;
}
return false;
}
bool Reformatter::replaceRepeatingStrings() {
bool found;
vector<tuple<wstring, wstring, wstring>>::iterator it;
wstring::const_iterator inputIterator;
const wstring * str;
for (it = repeatingStringsC.begin(); it != repeatingStringsC.end(); ++it) {
str = &GetTupleWord(*it);
inputIterator = curLocC;
for (int pos=0, len=str->length(); pos<len; ++pos) {
found = true;
if (inputIterator == endLocC || towlower(*inputIterator) != (*str)[pos]) {
found = false;
break;
}
++inputIterator;
}
if (found) {
str = &GetTupleCapCase(*it);
if (*inputIterator != (*str)[0]) found = false;
}
if (found) break;
}
if (found) {
str = &GetTupleSmallCase(*it);
inputIterator = curLocC;
for (int pos=0, len=(*str).length(); pos<len; ++pos) {
if ((*str)[pos] == L'*') {
resultC.append(L"*");
}
else {
resultC.append(1, *curLocC);
}
++curLocC;
++inputIterator;
}
return true;
}
return false;
}
void Reformatter::reformat(const wstring & str) {
resultC.reserve(str.length()+1);
resultC.clear();
beginLocC = str.begin();
curLocC = beginLocC;
strLenC = str.length();
endLocC = str.end();
while (curLocC != endLocC) {
while (curLocC != endLocC && isWhiteSpaceOrPunctuation(curLocC)) {
resultC.append(1, *curLocC);
++curLocC;
}
if (curLocC == endLocC) return;
if (!(replaceCaseSensitive() || replaceNonCaseSensitiveBegin() ||
replaceRepeatingStrings() || replaceAsterisks())) {
resultC.append(1, *curLocC);
++curLocC;
}
}
}
int main() {
static Reformatter reformatter;
wstring str = L"I GOTTEN PHB lettuce!";
reformatter.reformat(str);
wcout << L"INPUT: " << str << endl;
reformatter.outputResult();
str = L"I GOTTEN phb lettuce!";
reformatter.reformat(str);
wcout << L"INPUT: " << str << endl;
reformatter.outputResult();
str = L"Not Poopoop, and PoopPoop nInComPoop!!";
reformatter.reformat(str);
wcout << L"INPUT: " << str << endl;
reformatter.outputResult();
str = L"My PHB is such a poophead. It's gotten worse since his promotion";
reformatter.reformat(str);
wcout << L"INPUT: " << str << endl;
reformatter.outputResult();
cout << "NOTE THAT 'boss' BECOMES CAPPED OR NON-CAPPED DEPENDING ON THE SURROUNDING WORDS:"
<< endl << "ALSO, 'PHB' BECOMES 'boss' DEPENDING UPON WHETHER 'PHB' IS BY ITSELF."
<< endl;
str = L"MY PHB HAS started his new BLOG PHB_BLOG.com. And PHBBlog.com! "
"He's GOTTEn TO BE SUCH A MISBEGOTTEN POOPHEAD!";
reformatter.reformat(str);
wcout << L"INPUT: " << str << endl;
reformatter.outputResult();
TimerLowRes timerLowRes;
TimerHighRes timerHighRes;
LARGE_INTEGER timeRighRes;
vector<float> timeList;
int numIterations = 100;
int totalTests = 10;
for (int numTimes=0; numTimes<totalTests; ++numTimes) {
timerLowRes.start();
timerHighRes.start();
for (int x=0; x<numIterations; ++x) {
reformatter.reformat(L"My PHB is such a poophead. It's gotten worse since his promotion");
}
timeRighRes = timerHighRes.stop();
clock_t timeLowRes(timerLowRes.stop());
cout << "low res time: " << timeLowRes << " milliseconds" << endl;
cout << "high res time: " << timeRighRes.QuadPart/(float)1000 << " milliseconds" << endl;
timeList.push_back(timeRighRes.QuadPart/(float)1000);
}
float avg = 0.0;
for (int i=0; i<totalTests; ++i) avg += timeList[i];
avg = avg/totalTests;
cout << endl << "Average = " << avg << " milliseconds";
cout << endl << "Enter any key, then an 'Enter' to exit: ";
cin >> timeRighRes.QuadPart; return 0;
}
Didn't use any goto's, for those who are counting, but wouldn't have given a crap if I did.
It might be possible to streamline the logic in the replaceCaseSensitive and other replacement routines. All the 'found' checking and setting seemed excessive, but it worked and I moved on.
David