Click here to Skip to main content
65,938 articles
CodeProject is changing. Read more.
Articles / programming / algorithm

Generate a Parameterized Random Char Sequence

4.50/5 (2 votes)
15 Sep 2015MIT3 min read 14.4K   15  
How to generate a random sequence of chars with the specified parameters (e.g., length, char sets, char count per set, etc.)

Introduction

The problem is common and well self explainable. For example, I need an algorithm, producing a strong brute force proof password with the length of 16 and containing at least one alpha and one digit. Looks familiar, isn't it? If your program requires your users to have a good password, appropriate to the above described rules (or any others), why not auto generate it and suggest to a user?

Or you might require to generate a unique session ID or any other char sequence of some specific type.

Background

Unification. We like it. So, we need one generic algorithm to-rule-them-all.

Using the Code

The header module: ("StringUtils.h"):

C++
#ifndef StringUtilsH
#define StringUtilsH

#include <cstring>

// Will use ALL available options by default
// Pwd. complexity: 26 + 10 + 32 + 26 = 94^N
struct GeneratePwdOptions {
  enum ECharSetType {
    CST_ALPHA,  // latin letters; low: [97, 122] AND up: [65, 90] (total of 26 unique)
    CST_DIGITS, // arabic numbers [48, 57] (10 unique)
    CST_SYMBOLS // printable symbols: punctuation AND so on, EXCLUDING space
  };

  static const ECharSetType CHAR_SETS[];
  static const size_t CHAR_SET_COUNT_; // big AND small alphas counts as a one charset

  //// ALL codes are shown for the ANSI ASCII table
  bool useAlpha      = true;
  bool useDigits     = true;
  bool useSymbols    = true;
  bool caseSensetive = true; // use both lower AND upper case (if false - ONLY lowercase)
};
const GeneratePwdOptions DEFAULT_GENERATE_PWD_OPTIONS_;
const GeneratePwdOptions::ECharSetType GeneratePwdOptions::CHAR_SETS[] =
  {ECharSetType::CST_ALPHA, ECharSetType::CST_DIGITS, ECharSetType::CST_SYMBOLS};
const size_t GeneratePwdOptions::CHAR_SET_COUNT_ = sizeof(CHAR_SETS) / sizeof(*CHAR_SETS);

#include <functional>
#include <random>
#include <chrono>

// 'pwdBuf' should be at least (len + 1U) large
// 'len' should be >= 8U (returns empty str. if NOT)
// Returns empty str. on ANY error
// 'charPerSet' is an OUTPUT statistics (OPTIONAL)
// Complexity - linear: O(2*len)
template <const bool Optimized = true>
char* generatePwd(char* pwdBuf, const size_t len = 16U,
                  const GeneratePwdOptions& options = DEFAULT_GENERATE_PWD_OPTIONS_,
                  size_t charPerSet[GeneratePwdOptions::CHAR_SET_COUNT_] = nullptr) throw()
{
  static const auto MIN_PWD_LEN_ = 8U;
  static_assert('z' > 'a' && 'Z' > 'A' && '9' > '0', "Incorrect char codes");
  // [33, 47] U [58, 64] U [91, 96] U [123, 126]
  static const char SPEC_SYMBOLS[] = {"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"};

  if (!pwdBuf || !len) return nullptr;
  *pwdBuf = '\0';
  if (len < MIN_PWD_LEN_) return pwdBuf;

  const size_t timeSeed =
    static_cast<size_t>(std::chrono::system_clock::now().time_since_epoch().count());
  const std::mt19937 engine(timeSeed);

  const std::uniform_int_distribution<size_t> charIdxDistribution(size_t(), len - 1U);
  auto rollCharIdx = std::bind(charIdxDistribution, engine); // callable

  auto charSetCount = size_t();
  if (options.useAlpha) ++charSetCount;
  if (options.useDigits) ++charSetCount;
  if (options.useSymbols) ++charSetCount;
  
  if (!charSetCount) return pwdBuf; // error: charset NOT specified
  
  // Each set can place a strictly limited amount of chars
  const auto maxCharPerSetCount = len / charSetCount;
  // At least 1 char form each set
  const std::uniform_int_distribution<size_t> charPerSetDistribution(1U, maxCharPerSetCount);
  auto rollCharPerSetCount = std::bind(charPerSetDistribution, engine); // callable
  
  size_t localCharPerSet[GeneratePwdOptions::CHAR_SET_COUNT_] = {0}; // statistics
  auto const currCharPerSet = charPerSet ? charPerSet : localCharPerSet; // replace if NOT provided
  
  struct Randomizer_ {
    Randomizer_() throw() { // 'srand' will be called ONLY once during initialization
      srand(static_cast<unsigned int>(time(nullptr)));
    }
  };
  static const Randomizer_ R_; // static init. is a thread safe in C++11

  auto charSetLeftCount = charSetCount;
  size_t charPlacedCount = size_t(); // total amount of chars already placed in the buf.

  auto getCharCountPerSet = [&]() throw() {
    // If last - fill the rest of the buf.
    return charSetLeftCount ? rollCharPerSetCount() : (len - charPlacedCount);
  };

  // Also updates statistics for the specified charset
  auto getRandomChar = [&](const GeneratePwdOptions::ECharSetType charSetType) throw() {
    size_t firstIdx = size_t(), lastIdx = size_t();
    switch (charSetType) {
      case GeneratePwdOptions::CST_ALPHA: lastIdx = 'z' - 'a'; break;
      case GeneratePwdOptions::CST_DIGITS: lastIdx = '9' - '0'; break;
      case GeneratePwdOptions::CST_SYMBOLS:
        lastIdx = (sizeof(SPEC_SYMBOLS) / sizeof(*SPEC_SYMBOLS)) - 2U; break; // skip '\0'
    }
    const auto idx = firstIdx + (rand() % (lastIdx - firstIdx + 1U)); // from set

    auto actualChar = '\0';
    switch (charSetType) {
      case GeneratePwdOptions::CST_ALPHA:
        // Randomize case (if neeeded)
        actualChar = options.caseSensetive ? (rand() % 2 ? 'A' : 'a') : 'a';
        actualChar += idx;
        ++currCharPerSet[0U];
      break;
      case GeneratePwdOptions::CST_DIGITS:
        actualChar = '0' + idx;
        ++currCharPerSet[1U];
      break;
      case GeneratePwdOptions::CST_SYMBOLS:
        actualChar = SPEC_SYMBOLS[idx];
        ++currCharPerSet[2U];
      break;
    }
    return actualChar;
  };
  
  // Places random count of a random chars from the specified set at random blank positions
  auto addCharsA = [&](const GeneratePwdOptions::ECharSetType charSetType) throw() {
    --charSetLeftCount;
    const auto charCount = getCharCountPerSet();
    
    size_t charIdx = size_t(); // actual idx. in the array
    // Add random chars from the curr. set
    for (size_t charNumber = size_t(); charNumber < charCount; ++charNumber) {
      while (pwdBuf[charIdx = rollCharIdx()]); // roll while NOT free space for the symbol
      pwdBuf[charIdx] = getRandomChar(charSetType);
    }
    charPlacedCount += charCount;
  };
  
  // Places random count of a random chars from the specified set at predefined positions
  //  (sequentially)
  auto addCharsB = [&](const GeneratePwdOptions::ECharSetType charSetType) throw() {
    --charSetLeftCount;
    const auto charCount = getCharCountPerSet();
    
    // Add random chars from the curr. set: O(charCount)
    for (size_t charNumber = size_t(); charNumber < charCount; ++charNumber, ++charPlacedCount) {
      pwdBuf[charPlacedCount] = getRandomChar(charSetType);
    }
  };
  
  switch (Optimized) {
    case false:
      memset(pwdBuf, 0, len + 1U);
      if (options.useDigits) addCharsA(GeneratePwdOptions::ECharSetType::CST_DIGITS);
      if (options.useSymbols) addCharsA(GeneratePwdOptions::ECharSetType::CST_SYMBOLS);
      if (options.useAlpha) addCharsA(GeneratePwdOptions::ECharSetType::CST_ALPHA);
    break;
    default: // true
      if (options.useDigits) addCharsB(GeneratePwdOptions::ECharSetType::CST_DIGITS);
      if (options.useSymbols) addCharsB(GeneratePwdOptions::ECharSetType::CST_SYMBOLS);
      if (options.useAlpha) addCharsB(GeneratePwdOptions::ECharSetType::CST_ALPHA);
      // Random shuffle: O(charPlacedCount)
      for (size_t charNumber = size_t(); charNumber < charPlacedCount; ++charNumber) {
        std::swap(pwdBuf[charNumber], pwdBuf[rollCharIdx()]);
      }
      pwdBuf[charPlacedCount] = '\0';
  }
  return pwdBuf;
}

#endif // StringUtilsH

Notes

I used an old C and modern C++11 pseudorandom generators to demonstrate both.

We have three char set types (all codes are ANSI ASCII):

C++
CST_ALPHA,  // latin letters; low: [97, 122] AND up: [65, 90] (total of 26 unique)
CST_DIGITS, // arabic numbers [48, 57] (10 unique)
CST_SYMBOLS // printable symbols: punctuation AND so on, EXCLUDING space

so:

C++
// Pwd. complexity: 26 + 10 + 32 + 26 = 94^N

if case sensitive and using all sets ('^' means the power, not the bitwise xor), because we have 26 + 26 latin letters in both cases, 10 digits and 32 special symbols:

C++
// [33, 47] U [58, 64] U [91, 96] U [123, 126]
static const char SPEC_SYMBOLS[] = {"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"};

The algorithm is adaptive and uses:

C++
lastIdx = (sizeof(SPEC_SYMBOLS) / sizeof(*SPEC_SYMBOLS)) - 2U; break; // skip '\0'

while generating the random char from the special symbols set, so this static array can be freely changed or the function can be upgraded to use some external one (passed as a function parameter).

The '/ sizeof(*SPEC_SYMBOLS)) - 2U' is a bit redundant through, because the sizeof(char) is always 1.

Currently an amount of chars per set used is pseudorandom in [1, maxCharPerSetCount]:

C++
const std::uniform_int_distribution<size_t> charPerSetDistribution(1U, maxCharPerSetCount);
auto rollCharPerSetCount = std::bind(charPerSetDistribution, engine); // callable

where:

C++
const auto maxCharPerSetCount = len / charSetCount;

i.e., for a string of 8 chars, it will be in [1, 2] for the first two sets and [4, 6] for the last one (the last set uses all the rest space available). I decided to make it random to make a password structure less predictable, but you can easily extend/modify the provided function to generate a specified amout of chars per set.

Minimum password length of 8 chosen to have a strong password, which is capable of having at least one char of an each set.

The filling order is:

C++
if (options.useDigits) addChars(GeneratePwdOptions::ECharSetType::CST_DIGITS);
if (options.useSymbols) addChars(GeneratePwdOptions::ECharSetType::CST_SYMBOLS);
if (options.useAlpha) addChars(GeneratePwdOptions::ECharSetType::CST_ALPHA);

so the password of 8 length with the all options turned ON will have 1-2 digits, 1-2 special symols and 4-6 alphas in the random case.

The algorithm can also be easily extended to produce a char sequence of a random length (up to buffer size - 1).

Test code for Ideone online compiler:

C++
// <Include the code from StringUtils>

#include <cassert>
#include <iostream>
 
int main() {
    char pwdBuf[64U];
    std::cout << generatePwd(pwdBuf, sizeof(pwdBuf) - 1U) << std::endl;
 
    const GeneratePwdOptions options = {true, true, false, false};
    std::cout << generatePwd(pwdBuf, sizeof(pwdBuf) / 2U - 1U, options) << std::endl;
    
    const auto pwdLen = 32U;
    assert(pwdLen < sizeof(pwdBuf));
    GeneratePwdOptions options2;
    options2.useAlpha = false;
    const auto pwd = generatePwd(pwdBuf, pwdLen, options2);
    const auto len_ = strlen(pwd);
    assert(len_ == pwdLen);
    for (size_t idx = 0U; idx < len_; ++idx) {
      assert(!isalpha(pwdBuf[idx]));
    }
    std::cout << pwd << std::endl;
    
    return 0;
}

Output:

C++
QHK7I0o:uCb0Jgx[M74xd*1jG/xNb=W4dR6DH9w@18t'50X3N0kz28JG1786RTY
wt4x7tlzbc8mynsduk3bxmfearig17v
7]]+:%+<<,[|~#={&_]";[.?`@^.^";`

Points of Interest

This module is just a small part of the library, which uses C++11 features and which I am working on now, I decided to make it a public property.

As you can see, version 'A' of an algorithm has a performance issue:

C++
while (pwdBuf[charIdx = rollCharIdx()]); // roll while NOT free space for the symbol

so I decided to slightly redesign it, adding an optimized 'B' version. I left both through, to compare the performance advantage gained.

History

  • 15th September, 2015: Corrected version
  • 17th September, 2015 - Update 1:  Added optimized version
  • 18th September, 2015 - Update 2:  Error fixed (thx. cheesbj), even more optimized version

I extended the function adding new generation logic where firstly string is filled in the direct and predictive order and then randomly shuffled.

Then, I removed unnecessary (for the optimzed version) call:

C++
memset(pwdBuf, 0, len + 1U);

And finally, I added optimization for pseudo-random number generator initialization:

C++
struct Randomizer_ {
    Randomizer_() throw() { // 'srand' will be called ONLY once during initialization
      srand(static_cast<unsigned int>(time(nullptr)));
    }
  };
  static const Randomizer_ R_; // static init. is a thread safe in C++11

Also, this fixes possible data race error:

The function accesses and modifies internal state objects, 
which may cause data races with concurrent calls to rand or srand.

(form srand descripton)

Performance test for Ideone online compiler:

C++
// <Include the code from StringUtils>

#include <cassert>
#include <iostream>

int main(int argc, char* argv[])
{
  static const auto EXEC_COUNT = 55000U;
  char pwdBuf[128U] = {0};

  auto time1 = std::chrono::system_clock::now();
  for (size_t idx = size_t(); idx < EXEC_COUNT; ++idx) {
    generatePwd(pwdBuf, sizeof(pwdBuf) - 1U);
  }
  auto time2 = std::chrono::system_clock::now();
  auto count1 = (time2 - time1).count();
  std::cout << "Optimzed version: " << count1 << std::endl;

  time1 = std::chrono::system_clock::now();
  for (size_t idx = size_t(); idx < EXEC_COUNT; ++idx) {
    generatePwd<false>(pwdBuf, sizeof(pwdBuf) - 1U);
  }
  time2 = std::chrono::system_clock::now();
  auto count2 = (time2 - time1).count();
  std::cout << "Old version: " << count2 << std::endl;

  std::cout << "New (optimzed) version is " << static_cast<double>(count2) / 
	count1 << " times faster" << std::endl;

  return 0;
}

Output:

C++
Optimzed version: 1392535754
Old version: 3381367898
New (optimzed) version is 2.42821 times faster

License

This article, along with any associated source code and files, is licensed under The MIT License