A string tokenizer class in C++

Many a times, we need to tokenize various inputs and other data and end up writing custom code for each use. I have tried to make it more generic using latest C++11 STL facilities. Given that its a template class, make sure to copy the whole code in a header file for use across the project. std::wstring and std:;string both can be passed as template argument.

A simple use is as follows

Tokenizer<std::wstring>(m_delims, TokenizerOP::SIMPLE).GetTokens(m_input, tokens);

auto tokens_1 = Tokenizer<std::wstring>(m_delims, TokenizerOP::SIMPLE).GetTokens(m_input);

Tokenizer code is as follows

///////////////////////////////////////////////////////////////////////////////
// Tokenizer
enum class TokenizerOP
{
	SIMPLE,
	TRIMMEDTOKENS, NONEMPTYTOKENS, NONEMPTYTRIMMEDTOKENS,
	UNIQUETOKENS, UNIQUETRIMMEDTOKENS,
	UNIQUENONEMPTYTOKENS, UNIQUENONEMPTYTRIMMEDTOKENS
};
template <typename valueType>
class Tokenizer
{
private:
	TokenizerOP			op_;
	valueType			delims_;

	valueType
	Trim(valueType const &s)
	{
		auto  wsfront = std::find_if_not(s.begin(), s.end(), 
					[](int c){return std::isspace(c); });
		return valueType(wsfront,
			std::find_if_not(s.rbegin(), valueType::const_reverse_iterator(wsfront),
			[](int c){return std::isspace(c); }).base());
	}
	void
	TrimTokens(std::vector<valueType>& tokens)
	{
		for (size_t i = 0; i < tokens.size(); i++)
			tokens[i] = Trim(tokens[i]);
	}
	void
	RemoveEmptyTokens(std::vector<valueType>& tokens)
	{
		using namespace std;
		vector<valueType> nonemptytokens;

		for (auto &it : tokens){
			auto  wsfront = find_if_not(it.begin(), it.end(), [](int c){return std::isspace(c); });
			if (it.end() != wsfront){
				nonemptytokens.push_back(it);
			}
		}
		if (tokens.size() != nonemptytokens.size())
			tokens = nonemptytokens;
	}
	void
	KeepUniqueTokens(std::vector<valueType>& tokens)
	{
		using namespace std;
		if (tokens.size() <= 1)
			return;

		vector<valueType> UniqueTokens;
		UniqueTokens.push_back(tokens.front());
		for (auto it = tokens.begin() + 1; it != tokens.end(); it++){
			if (it == find(tokens.begin(), it, *it))
				UniqueTokens.push_back(*it);
		}
		if (tokens.size() != UniqueTokens.size())
			tokens = UniqueTokens;
	}

public:
	Tokenizer() :Tokenizer(TokenizerOP::SIMPLE){}
	Tokenizer(TokenizerOP op) : op_(op){}
	Tokenizer(valueType const& delims, TokenizerOP op = TokenizerOP::SIMPLE)
		:delims_(delims), op_(op){};

	std::vector<valueType>
	Tokens(valueType const& text)
	{
		std::vector<valueType> tokens;
		GetTokens(text, tokens);
		return tokens;
	}
	std::vector<valueType>
	Tokens(valueType const& text, valueType const& delims)
	{
		delims_ = delims;
		return Tokens(text);
	}
	std::vector<valueType>
	Tokens(valueType const& text, TokenizerOP op)
	{
		op_ = op;
		return Tokens(text);
	}
	void
	GetTokens(valueType const& text, std::vector<valueType>& tokens)
	{
		using namespace std;
		tokens.clear();
		valueType::size_type lastPos = text.find_first_not_of(delims_, 0);
		valueType::size_type pos = text.find_first_of(delims_, lastPos);

		while (valueType::npos != pos || valueType::npos != lastPos){
			tokens.push_back(text.substr(lastPos, pos - lastPos));
			lastPos = text.find_first_not_of(delims_, pos);
			pos = text.find_first_of(delims_, lastPos);
		}

		if (TokenizerOP::TRIMMEDTOKENS == op_){
			TrimTokens(tokens);
		}
		if (TokenizerOP::NONEMPTYTOKENS == op_){
			RemoveEmptyTokens(tokens);
		}
		if (TokenizerOP::NONEMPTYTRIMMEDTOKENS == op_){
			TrimTokens(tokens);
			RemoveEmptyTokens(tokens);
		}
		if (TokenizerOP::UNIQUETOKENS == op_){
			KeepUniqueTokens(tokens);
		}
		if (TokenizerOP::UNIQUETRIMMEDTOKENS == op_){
			TrimTokens(tokens);
			KeepUniqueTokens(tokens);
		}
		if (TokenizerOP::UNIQUENONEMPTYTOKENS == op_){
			KeepUniqueTokens(tokens);
			RemoveEmptyTokens(tokens);
		}
		if (TokenizerOP::UNIQUENONEMPTYTRIMMEDTOKENS == op_){
			TrimTokens(tokens);
			KeepUniqueTokens(tokens);
			RemoveEmptyTokens(tokens);
		}
		return;
	}

	void
	GetTokens(
		valueType const& text, 
		std::vector<valueType>& tokens,
		valueType const& delims)
	{
		delims_ = delims;
		GetTokens(text, tokens);
		return;
	}
};
Advertisements

A basic extensible execution timer for C++

All of us need to time our projects and part of it. While recently getting re-acquainted with C++ through C++11, I too need to do it many times. I started with a using std::chrono facilities, but soon figured out that changing code to track different time of operations was getting tiring and boring. Some portions makes meaning only when expressed in seconds and some would never reach second limits and we are better off tracing them in microseconds. Then again, microseconds may go into 7-8 digits and we woul have tpough time comparing two values if log file has multiple outputs. I thus decided to revamp my timerclass and wrote its signature as follows. Its usage is shown in the blog here.

////////////////////////////////////////////////////////////////////////////////
class PrintTimer
{
public:
virtual std::wstring
	to_wstring(std::chrono::hours::rep value, std::chrono::hours dummy) = 0;
virtual std::wstring
	to_wstring(std::chrono::minutes::rep value, std::chrono::minutes dummy) = 0;
virtual std::wstring
	to_wstring(std::chrono::seconds::rep value, std::chrono::seconds dummy) = 0;
virtual std::wstring
	to_wstring(std::chrono::microseconds::rep value, std::chrono::microseconds dummy) = 0;
virtual std::wstring
	to_wstring(std::chrono::milliseconds::rep value, std::chrono::milliseconds dummy) = 0;
};

template<typename clock_type, typename dur>
class ExecutionTimer
{
private:

typename clock_type::time_point m_start;
typename clock_type::time_point m_end;
std::shared_ptr<PrintTimer>		m_printer;

public:

ExecutionTimer() : ExecutionTimer(nullptr) {};
ExecutionTimer(std::shared_ptr<PrintTimer> const& printer)
{
	m_start = clock_type::now();;
	m_end = clock_type::now();;
	m_printer = printer;
};

void set_start(){
	m_start = clock_type::now();
}
void set_start(typename clock_type::time_point start){
	m_start = start;
}
void set_end(){
	m_end = clock_type::now();
}
void set_end(typename clock_type::time_point end){
	m_end = end;
}
void set_printer(std::shared_ptr<PrintTimer> const& printer){
	m_printer = printer;
}

typename dur::rep elapsed()
{
	return std::chrono::duration_cast<dur>(m_end - m_start).count();
};
std::wstring to_wstring(){
	return to_wstring(m_printer);
}
std::wstring to_wstring(std::shared_ptr<PrintTimer> const& printer)
{
	if (printer){
		auto ret = printer->to_wstring(elapsed(), 
			std::chrono::duration_cast<dur>(std::chrono::seconds(1)));
		return ret;
	}
	else{
		return L"Printing facilities not available";
	}	
}
};

PrintTimer is the interface that will be used to display output in nice way when its big. You will see it soon. ExecutionTimer has to be declared and defined fully in a header file for it to be available everywhere as it is a template class for std::chrono facilities. The whole shebang here will not use C library functions for timer display which was the main agenda. Everything is done using STL methods. Purpose of having PrintTimer as interface is very simple, it allows these big functions to be written anywhere and in customizable way. If user does not need it, then can simply call elapsed from ExecutionTimer and get the abstract counter and then use some other display scheme.

dummy parameters at the end of each to_wstring method is needed to ensure function overloading since multiple ***::rep values coalesces to same kind.

My implementation for PrintTimer interface is as follows.

////////////////////////////////////////////////////////////////////////////////
class PrintExecutionTimer :public PrintTimer
{
private:
const std::wstring SS = std::wstring(L" ");

public:
virtual std::wstring
	to_wstring(std::chrono::hours::rep value, std::chrono::hours dummy);
virtual std::wstring
	to_wstring(std::chrono::minutes::rep value, std::chrono::minutes dummy);
virtual std::wstring
	to_wstring(std::chrono::seconds::rep value, std::chrono::seconds dummy);
virtual std::wstring
	to_wstring(std::chrono::microseconds::rep value, std::chrono::microseconds dummy);
virtual std::wstring
	to_wstring(std::chrono::milliseconds::rep value, std::chrono::milliseconds dummy);
};

////////////////////////////////////////////////////////////////////////////////
std::wstring
PrintExecutionTimer::to_wstring(std::chrono::hours::rep value, std::chrono::hours dummy)
{
	using namespace std::chrono;
	auto ret = std::to_wstring(value) + SS + L"hours";
	return ret;
}
std::wstring
PrintExecutionTimer::to_wstring(std::chrono::minutes::rep value, std::chrono::minutes dummy)
{
	using namespace std::chrono;
	std::wstring ret{ L"" };
	auto cmp_unit = minutes(60).count();
	auto dummy_to_pass = duration_cast<hours>(dummy);

	if (value > cmp_unit){
		ret = to_wstring(hours::rep(value / cmp_unit), dummy_to_pass);
		ret += SS + std::to_wstring(value % cmp_unit) + SS + L"minutes";
	}
	else if (value == cmp_unit){
		ret = to_wstring(hours::rep(1), dummy_to_pass);
	}
	else{
		ret = std::to_wstring(value) + SS + L"minutes";
	}
	return ret;
}

std::wstring
PrintExecutionTimer::to_wstring(std::chrono::seconds::rep value, std::chrono::seconds dummy)
{
	using namespace std::chrono;
	std::wstring ret{ L"" };
	auto cmp_unit = seconds(60).count();
	auto dummy_to_pass = duration_cast<minutes>(dummy);

	if (value > cmp_unit){
		ret = to_wstring(minutes::rep(value / cmp_unit), dummy_to_pass);
		ret += SS + std::to_wstring(value % cmp_unit) + SS + L"seconds";
	}
	else if (value == cmp_unit){
		ret = to_wstring(minutes::rep(1), dummy_to_pass);
	}
	else{
		ret = std::to_wstring(value) + SS + L"seconds";
	}
	return ret;
}

std::wstring
PrintExecutionTimer::to_wstring(std::chrono::milliseconds::rep value, std::chrono::milliseconds dummy)
{
	using namespace std::chrono;
	std::wstring ret{ L"" };
	auto cmp_unit = milliseconds(1000).count();
	auto dummy_to_pass = duration_cast<seconds>(dummy);

	if (value > cmp_unit){
		ret = to_wstring(seconds::rep(value / cmp_unit), dummy_to_pass);
		ret += SS + std::to_wstring(value % cmp_unit) + SS + L"milliseconds";
	}
	else if (value == cmp_unit){
		ret = to_wstring(seconds::rep(1), dummy_to_pass);
	}
	else{
		ret = std::to_wstring(value) + SS + L"milliseconds";
	}
	return ret;
}

std::wstring
PrintExecutionTimer::to_wstring(std::chrono::microseconds::rep value, std::chrono::microseconds dummy)
{
	using namespace std::chrono;
	std::wstring ret{ L"" };
	auto cmp_unit = microseconds(1000).count();
	auto dummy_to_pass = duration_cast<milliseconds>(dummy);

	if (value > cmp_unit){
		ret = to_wstring(milliseconds::rep(value / cmp_unit), dummy_to_pass);
		ret += SS + std::to_wstring(value % cmp_unit) + SS + L"microseconds";
	}
	else if (value == cmp_unit){
		ret = to_wstring(milliseconds::rep(1), dummy_to_pass);
	}
	else{
		ret = std::to_wstring(value) + SS + L"microseconds";
	}
	return ret;
}

You may notice that they reference each other. If needed, you can decouple them from each other.

Below is a use case as to how to use it.

void timer_test()
{
    using namespace std::chrono;
	using namespace std;

	ExecutionTimer<high_resolution_clock, microseconds> exe_timer_micro;
	ExecutionTimer<high_resolution_clock, milliseconds> exe_timer_milli(new PrintExecutionTimer());
	exe_timer_micro.set_start();
	exe_timer_milli.set_start();

	for (size_t i = 0; i < (std::numeric_limits<unsigned int>::max()); i++)
	{
		if (0 == i % (32 * std::numeric_limits<unsigned short>::max())){
			wcout << L".";
			exe_timer_micro.set_end(high_resolution_clock::now());
			exe_timer_milli.set_end(high_resolution_clock::now());
		}
	}
	wcout << endl;
	wcout << to_wstring(exe_timer_micro.elapsed()) << L" microseconds" << endl;
	wcout << exe_timer_micro.to_wstring() << endl;

	wcout << to_wstring(exe_timer_milli.elapsed()) << L" milliseconds" << endl;
	wcout << exe_timer_milli.to_wstring() << endl;

    return;
}

Is calling a recursive class method slower than calling recursive free functions in C++ ?

While trying to make an algorithm recently, I just wondered will it be better to define and declare a recursive method outside of a class. To confirm my doubt, I wrote a small program using C++11 and tested it. It seems there isn’t any discernible difference between these two whether the function is part of a class or defined as free function. The code is as follows.

#include <chrono>
#include <iostream>
#include <string>
#include <thread>
////////////////////////////////////////////////////////////////////////////////
// Check if recursive call to calss function runs at same speed as normal function
class Factorial
{
public:
	long long get(unsigned int num)
	{
		if (num <= 1)
			return 1l;
		else
			return num*get(num - 1);
	}
};

long long factorial(unsigned int num)
{
	if (num <= 1)
		return 1l;
	else
		return num*factorial(num - 1);
}

////////////////////////////////////////////////////////////////////////////////
#pragma optimize( "", off )
void RunOOP()
{
	using namespace std;
	using namespace std::chrono;
	
	ExecutionTimer<high_resolution_clock, microseconds> exe_timer_micro(new PrintExecutionTimer());
	vector<unsigned int> num { 35, 40 };
	unsigned int iter = 2000000;
	
	vector<long long> product{ 0, 0 };
	vector<long long> result{ 0, 0 };

	wcout << L"Calling free function" << endl;
	exe_timer_micro.set_start();
	product[0] = factorial(num[0]);
	product[1] = factorial(num[1]);
	for (size_t i = 0; i < iter; i++)
	{
		result[0] = factorial(num[0]);
		result[1] = factorial(num[1]);
		if (product != result){
			product = { 0, 0 };
			break;
		}
	}
	exe_timer_micro.set_end();
	wcout << L"Factorial of " << num[0] << L" is : " << to_wstring(product[0]) << endl;
	wcout << L"Factorial of " << num[1] << L" is : " << to_wstring(product[1]) << endl;
	wcout << to_wstring(exe_timer_micro.elapsed()) << L" microseconds" << endl;
	wcout << exe_timer_micro.to_wstring() << endl;
	
	wcout << endl;
	seconds sleep_time(10);
	wcout << L"Sleeping for " << PrintExecutionTimer().to_wstring(seconds(10).count(), seconds(1)) << endl;
	std::this_thread::sleep_for(seconds(10));
	wcout << endl;

	wcout << L"Calling class method" << endl;
	exe_timer_micro.set_start();
	product[0] = Factorial().get(num[0]);
	product[1] = Factorial().get(num[1]);
	for (size_t i = 0; i < iter; i++)
	{
		result[0] = Factorial().get(num[0]);
		result[1] = Factorial().get(num[1]);
		if (product != result){
			product = { 0, 0 };
			break;
		}
	}
	exe_timer_micro.set_end();
	wcout << L"Factorial of " << num[0] << L" is : " << to_wstring(product[0]) << endl;
	wcout << L"Factorial of " << num[1] << L" is : " << to_wstring(product[1]) << endl;
	wcout << to_wstring(exe_timer_micro.elapsed()) << L" microseconds" << endl;
	wcout << exe_timer_micro.to_wstring() << endl;
}
#pragma optimize( "", on )

I used

#pragma optimize

to ensure that VS 20013 does not do any optimization for this function. To be sure of avoiding optimization, I ran it on two types of input, one by one. You can call

RunOPP

function from main to get the output. Slightly modified Code for ExecutionTimer is available at this stackoverflow thread as well as in my next blog.

As you can see, I ran the iterations around 200000 times with values 35 and 40. You can modify it as per your environment. I did not see much difference in timing between call to free function or a class method.

Enjoy timing your code to reach correct conclusion.