Arbitrary wordlist support.

This commit is contained in:
2026-02-05 00:51:59 -05:00
parent fd6dcbc841
commit b8cbf97ae5
3 changed files with 5074 additions and 24 deletions

5000
5k-dictionary Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,7 @@
CXXFLAGS+= -std=c++17 -O3 CXXFLAGS+= -std=c++23 -O3
CXXFLAGS+= -I /usr/local/include CXXFLAGS+= -I /usr/local/include
CXXFLAGS+= -I .
LDFLAGS+= -Wl,-rpath,'$$ORIGIN/' LDFLAGS+= -Wl,-rpath,'$$ORIGIN/'

View File

@ -1,3 +1,11 @@
#include <cstdint>
#include <cctype>
#include <cstdlib>
#include <cstddef>
#include <climits>
#include <cassert>
#include <cmath>
#include <iostream> #include <iostream>
#include <fstream> #include <fstream>
#include <vector> #include <vector>
@ -7,14 +15,13 @@
#include <exception> #include <exception>
#include <stdexcept> #include <stdexcept>
#include <random> #include <random>
#include <cstdint>
#include <cctype>
#include <cstdlib>
#include <cstddef>
#include <climits>
#include <cassert>
#include <boost/lexical_cast.hpp> #include <boost/lexical_cast.hpp>
#include <Alepha/ProgramOptions.h>
#include <Alepha/Utility/enroll.h>
#undef BITS #undef BITS
#undef DOMAIN #undef DOMAIN
#undef SKIP #undef SKIP
@ -32,10 +39,28 @@ namespace
{ {
namespace C namespace C
{ {
const std::uint64_t bits= 18; const bool debug= false;
const std::uint64_t domain= 1 << C::bits; const bool debugDictionary= false or C::debug;
} }
namespace Options
{
std::string dictionaryName= "dictionary";
auto init= Alepha::Utility::enroll <=[]
{
using namespace Alepha::literals::option_literals;
--"dictionary"_option << dictionaryName << "Specify a list of works (dictionary) to "
<< "use when generating passwords. !default!";
};
std::uint64_t bits;
std::uint64_t domain() { return 1 << bits; }
}
class Failure : public std::runtime_error class Failure : public std::runtime_error
{ {
public: public:
@ -67,7 +92,9 @@ namespace
void void
dictStat( const std::vector< std::string > &d ) dictStat( const std::vector< std::string > &d )
{ {
std::cout << "Dictionary statistics: " << std::endl; if( not C::debugDictionary ) return;
std::cout << "Dictionary statistics (" << d.size() << " entries): " << std::endl;
for( int i= 1; i < 25; ++i ) for( int i= 1; i < 25; ++i )
{ {
std::cout << i << " character words: "; std::cout << i << " character words: ";
@ -80,14 +107,20 @@ namespace
auto auto
getDictionary() getDictionary()
{ {
std::ifstream d( "dictionary" ); std::ifstream d( Options::dictionaryName );
if( d.bad() || d.fail() ) throw Failure(); if( d.bad() || d.fail() ) throw Failure();
// We assume that the dictionary is unique -- it reduces load time. // We assume that the dictionary is unique -- it reduces load time.
using input_type= std::istream_iterator< std::string >; using input_type= std::istream_iterator< std::string >;
safe_vector< std::string > dict{ input_type{ d }, input_type{} }; safe_vector< std::string > dict{ input_type{ d }, input_type{} };
// dictStat( dict ); const std::size_t initDictSize= dict.size();
Options::bits= std::ceil( std::log( initDictSize ) / std::log( 2 ) );
const std::size_t minSize= std::ceil( Options::bits / ( std::log( 26 ) / std::log( 2 ) ) );
std::cerr << "Min size is: " << minSize << std::endl;
dictStat( dict );
// Remove words which are really small from the dictionary -- it makes for // Remove words which are really small from the dictionary -- it makes for
// somewhat slightly kinda sorta easier to crack passwords: // somewhat slightly kinda sorta easier to crack passwords:
@ -117,8 +150,15 @@ namespace
// and about 600 two-letter words, total, people will be selective in the passwords // and about 600 two-letter words, total, people will be selective in the passwords
// that they keep from this program and attempt to memorize. If we cut out the // that they keep from this program and attempt to memorize. If we cut out the
// possibility of hard passwords, we save pain in explaining good designs. // possibility of hard passwords, we save pain in explaining good designs.
DOIT dict.erase( std::remove_if( begin( dict ), end( dict ), //
[]( const auto &x ){ return x.size() < 4; } ), end( dict ) ); // This has been generalized to not permit words shorter than N characters,
// where N is the number of expected bits divided by the number of bits in a single
// letter. Thus a 1024 word list would be 10 bits and should not use words shorter
// than 3 characters.
dict.erase( std::remove_if( begin( dict ), end( dict ),
[&]( const auto &x ){ return x.size() < minSize; } ), end( dict ) );
dictStat( dict );
// We shuffle before trim so that we aren't quite sure which words get thrown out. // We shuffle before trim so that we aren't quite sure which words get thrown out.
std::random_device rd; std::random_device rd;
@ -126,9 +166,15 @@ namespace
std::shuffle( begin( dict ), end( dict ), gen ); std::shuffle( begin( dict ), end( dict ), gen );
// Make sure that we can reach the expected domain, and trim to that domain. // Make sure that we can reach the expected domain, and trim to that domain.
if( ( C::domain ) > dict.size() ) if( Options::domain() > dict.size() )
throw Failure( "Dict size: " + boost::lexical_cast< std::string >( dict.size() ) ); {
dict.resize( C::domain ); --Options::bits;
dict.resize( Options::domain() );
}
dictStat( dict );
assert( Options::domain() == dict.size() );
return dict; return dict;
}; };
@ -139,11 +185,13 @@ int
main( const int argcnt, const char *const *const argvec ) main( const int argcnt, const char *const *const argvec )
try try
{ {
const auto args= Alepha::handleOptions( argcnt, argvec );
const auto dict= getDictionary(); const auto dict= getDictionary();
auto rnd= openRandom(); auto rnd= openRandom();
const auto bitsDesired = ( argcnt == 1 ) ? 64 : boost::lexical_cast< int >( argvec[ 1 ] ); const auto bitsDesired = ( args.empty() ) ? 64 : boost::lexical_cast< int >( args.at( 0 ) );
std::cout << "We are going to make a password at least as strong as a " std::cout << "We are going to make a password at least as strong as a "
<< bitsDesired << " bit secret" << std::endl; << bitsDesired << " bit secret" << std::endl;
@ -155,21 +203,21 @@ try
do do
{ {
if( bitsInRnd < C::bits ) if( bitsInRnd < Options::bits )
{ {
rnd.read( reinterpret_cast< char * >( &randomness ), sizeof( randomness ) ); rnd.read( reinterpret_cast< char * >( &randomness ), sizeof( randomness ) );
if( rnd.bad() || rnd.fail() || rnd.eof() ) throw Failure(); if( rnd.bad() || rnd.fail() || rnd.eof() ) throw Failure();
bitsInRnd= sizeof( randomness ) * CHAR_BIT; bitsInRnd= sizeof( randomness ) * CHAR_BIT;
} }
const auto &word= dict[ randomness % ( C::domain ) ]; const auto &word= dict[ randomness % ( Options::domain() ) ];
DEBUG std::cout << randomness % ( C::domain ) << std::endl; DEBUG std::cout << randomness % ( Options::domain() ) << std::endl;
DEBUG std::cout << word << std::endl; DEBUG std::cout << word << std::endl;
randomness>>= C::bits; randomness>>= Options::bits;
words.push_back( word ); words.push_back( word );
bitsInRnd-= C::bits; bitsInRnd-= Options::bits;
bits+= C::bits; bits+= Options::bits;
} }
while( bits < bitsDesired ); while( bits < bitsDesired );