From ae53cff323043c61b51624be7c528b6b7bd44adf Mon Sep 17 00:00:00 2001 From: ADAM David Alan Martin Date: Mon, 9 Oct 2023 20:34:10 -0400 Subject: [PATCH] Program options parser from my scratch projects. --- ProgramOptions.cpp | 430 +++++++++++++++++++++++++++++++++++++++++++++ ProgramOptions.h | 389 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 819 insertions(+) create mode 100644 ProgramOptions.cpp create mode 100644 ProgramOptions.h diff --git a/ProgramOptions.cpp b/ProgramOptions.cpp new file mode 100644 index 0000000..42d2606 --- /dev/null +++ b/ProgramOptions.cpp @@ -0,0 +1,430 @@ +static_assert( __cplusplus > 2020'00 ); + +#include "Options.h" + +#include + +#include "algorithm.h" + +namespace Alepha::Cavorite ::detail:: program_options +{ + namespace + { + namespace C + { + const bool debug= false; + const bool debugMatching= false or C::debug; + const bool debugExclusions= false or C::debug; + } + + using namespace std::literals::string_literals; + + struct OptionMissingArgumentError + : virtual std::runtime_error + { + using std::runtime_error::runtime_error; + }; + + // Print the string with wrapping to the terminal and the specified indent + // for subsequent lines. + void + printString( const std::string &s, const std::size_t indent ) + { + const std::size_t width= Console::main().getScreenWidth(); + std::cout << wordWrap( s, width, indent ) << std::endl; + } + } + + struct impl::ProgramOption + { + std::function< void ( std::optional< std::string > ) > handler; + std::ostringstream help; + std::function< std::string () > defaultBuilder= [] { return ""; }; + + std::map< std::type_index, std::set< const DomainBase * > > domains; + }; + + namespace + { + StaticValue< std::map< std::string, impl::ProgramOption > > programOptions; + + std::vector< std::string > + allOptionNames() + { + std::vector< std::string > rv; + for( const auto &[ name, _ ]: programOptions() ) rv.push_back( name ); + return rv; + } + + struct ExclusivityEntry + { + std::optional< std::string > previous; + }; + + StaticValue< std::map< const DomainBase *, ExclusivityEntry > > mutuallyExclusiveOptions; + + // The required options have to live in a single global collection. There's only one + // set of program options per execution, so this entire list has to be searched. + StaticValue< std::map< const DomainBase *, std::vector< std::string > > > requiredOptions; + } + + void + impl::checkArgument( const std::optional< std::string > &argument, const std::string &name ) + { + if( argument.has_value() ) return; + throw OptionMissingArgumentError( '`' + name "` requires an argument." ); + } + + const OptionBinding & + OptionBinding::bindDomain( const DomainBase &domain ) const + { + if( domain.kind == typeid( RequirementDomain ) ) + { + requiredOptions()[ &domain ].push_back( name ); + } + + option->domains[ domain.kind ].insert( &domain ); + return *this; + } + + std::ostream & + OptionBinding::operator << ( std::function< void () > core ) const + { + // So that users do not have to implement their own checking for argument absent, + // we do it for them. + auto handler= [core, name= name]( const std::optional< std::string > s ) + { + if( s.has_value() ) + { + throw std::runtime_error( "`--" + name + "` takes no arguments, but `" + s.value() + + " was provided." ); + } + return core(); + }; + return registerHandler( handler ); + } + + std::ostream & + OptionBinding::operator << ( std::function< void ( std::optional< std::string > ) core ) const + { + // So that users do not have to implement their own checking for argument present, + // we do it for them. + auto handler= [core, name= name]( const std::optional< std::string > argument ) + { + impl::checkArgument( argument, name ); + return core( argument.value() ); + }; + return registerHandler( handler ); + } + + void + OptionBinding::setDefaultBuilder( std::function< std::string () > builder ) const + { + option->defaultBuilder= builder; + } + + std::ostream & + OptionBinding::registerHandler( std::function< void ( std::optional< std::string > ) > handler ) const + { + option->handler= handler; + return option->help; + } + + namespace + { + std::string + buildIncompatibleHelpText( const std::string &name, const auto &domains ) + { + if( not domains.contains( typeid( ExclusivityDomain ) ) + or domains.at( typeid( ExclusivityDomain ) ).empty() ) + { + return ""; + } + + std::set< std::string > incompatibles; + for( const auto &domain: domains.at( typeid( ExclusivityDomain ) ) ) + { + std::transform( mutuallyExclusiveOptions.lower_bound( domain ), + mutuallyExclusiveOptions.upper_bound( domain ), + std::inserter( incompatibles, end( incompatibles ) ), + []( const auto &item ) { return item.second; } ); + } + incompatibles.erase( name ); + if( incompatibles.empty() ) return ""; + std::ostringstream oss; + oss << "\nIncompatible with: \n\n"; + bool first= true; + for( const auto &incompat: incompatibles ) + { + if( not first ) oss << ", "; + first= false; + oss << '`' << incompat << '`'; + } + oss << std::endl; + return std::move( oss ).str(); + } + + void + printAllOptionsHelp( const std::optional< std::string > canonicalProgramName ) + { + const auto maxOptionLength= std::max_element( begin( programOptions(), end( programOptions ), + []( const auto &lhs, const auto &rhs ) + { + return lhs.first.size() < rhs.first.size(); + } ); + // Account for the `:` and the ` ` in the output table format. + const std::size_t alignmentWidth= maxOptionLength + 2; + + // + std::multimap< const DomainBase *, std::string > exclusivityMembers; + for( const auto &[ name, def ]: programOptions() ) + { + if( not def.domains.contains( typeid( ExclusivityDomain ) ) ) continue; + for( const auto &excl: def.domains.at( typeid( ExclusivityDomain ) ) ) + { + exclusivityMembers.emplace( excl, name ); + } + } + + std::cout << "Options:" << std::endl << std::endl; + + // Inspect and print each option. + for( const auto &[ name, def ]: programOptions() ) + { + const auto &[ _, helpText, defaultBuilder, domains ]= def; + // How much unused of the max width there will be + const std::size_t padding= alignmentWidth - optionName.size() - 2; + + VariableMap substitutions= + { + // This uses a GNU extension, but it's fine. We can always make this + // portable, later. + { "program-name"s, lambaste<=::program_invocation_short_name }, + { "option-name"s, lambaste<=optionName }, + { "default"s, [&defaultBuilder= defaultBuilder, &name= name] + { + return "Default is `" + name + defaultBuilder() + "`"; + } }, + }; + if( canonicalProgramName.has_value() ) + { + substitutions[ "canonical-name"s ]= lambaste<=canonicalName.value(); + } + + std::string substitutionTemplate= name + ": " + std::string( padding, ' ' ) + + helpText.str() + "\n"; + + // Append the incompatibility text, when we see mutually-exclusive options. + substitutionTemplate+= buildIncompatibleHelpText( name, domains ); + + const std::string helpString= expandVariables( substitutionTemplate, substitutions, '!' ); + printString( helpString, alignmentWidth ); + } + + // Check for required options, and print a summary of those: + if( not requiredOptions().empty() ) for( const auto &[ _, group ]: requiredOptions ) + { + const std::size_t width= Console::main().getScreenWidth(); + std::ostringstream oss; + oss << "At least one of the options in this group are required: "; + bool first= true; + for( const auto &required: group ) + { + if( not first ) oss << ", "; + first= false; + oss << '`' << required << '`': + } + + std::cout << wordWrap( oss.str(), width ) << std::endl; + } + } + } + + // The options which set boolean flags can be + std::ostream & + OptionBinding::operator << ( bool &flag ) const + { + --OptionString{ "no-" + name.substr( 2 ) }; + << [&flag] { flag= false; } << "Disable `" + name + "`. See that option for more details."; + return self() << [&flag] { flag= true; }; + } + + OptionString + literals::operator ""_option( const char *const text, const std::size_t amount ) + { + return { std::string( text, text + amount ) }; + } + + OptionBinding + impl::operator --( const OptionString option ) + { + const auto name= "--" + option.name; + if( programOptions().contains( name ) ) + { + throw RepeatedProgramOptionError( "Option `" + name + "` was already registered." ); + } + return OptionBinding{ name, &programOptions()[ name ] }; + } + + [[noreturn]] void + impl::usage( const std::string &helpmessage, const std::optional< std::string > &canonicalName ) + { + if( not helpMessage.empty() ) + { + VariableMap substitutions + { + // Another use of the GNUism. + { "program-name"s, lambaste<=::program_invocation_short_name }, + }; + + if( canonicalName.has_value() ) substitutions[ "canonical-name"s ]= lambaste<=canonicalName.value(); + std::cout << wordWrap( expandVariables( helpMessage, substitutions, '!' ), Console::main().getScreenWidth() ) + << std::endl << std::endl; + } + + printOptionsHelp( canonicalName ); + ::exit( EXIT_SUCCESS ); + } + + std::vector< std::string > + impl::handleOptions( const std::vector< std::string > &args, const std::function< void () > usageFunction ) + { + --"help"_option << usageFunction << "Print this help message (program usage)."; + + // The unprocessed program arguments will be collected into this vector + std::vector< std::string > rv; + + const auto &opts= programOptions(); + + // The arguments end at the first `--` token (by itself), or when there's no more. + const auto endOfArgs= std::find( begin( args ), end( args ), "--" ); + + // Because `--help` needs to expand certain variables, options which can affect it need to be processed + // before handling `--help` + const bool helpRequested= std::find( begin( args ), endOfArgs, "--help" ) != endOfArgs; + + // Each time a required domain is seen, we put that requirement into this set. + // If all required options are passed, then this set should match the list of + // required option domains. + std::set< const DomainBase * > requiredOptionsSeen; + + const std::vector< std::string > argsToProcess{ begin( args ), endOfArgs }; + + // An option that requires an argument might have been type-o'ed as `--option arg` + // instead of `--option=arg`. By tracking the next option, we can print helpful + // diagnostics in the error messages. + auto next= begin( argsToProcess ); + + for( const auto ¶m: argsToProcess ) + try + { + ++next; + // Because `--help` has a special relationship with the rest of the options, + // we skip it in this pass. + if( helpRequested and param == "--help" ) continue; + + // Match up each argument. + const bool matched= evaluate <=[&] + { + // TODO: Make this into direct map lookups. + // It requires modifying `--` options as passed, to strip `=` before + // doing a map lookup. + for( const auto &[ name, def ]: opts ) + { + if( C::debugMatching ) error() << "Attempting to match `" << name << "` to `" << arg << "`" << std::endl; + + const auto &handler= def.handler; + std::optional< std::string > argument; + if( param == name ) argument= std::nullopt; + else if( param.starts_with( name ) and "=:"s.contains( param.at( name.size() ) ) ) + { + argument= param.substr( name.size() + 1 ); + } + else continue; + + // Skip options that do not affect help, when we're doing a `--help` run. + if( helpRequested and not def.domains.contains( typeid( PreHelpDomain ) ) ) return true; + + // Exclusivity has to be handled as a running concern across options... + if( def.domains.contains( typeid( ExclusivityDomain ) ) ) + { + const auto &exclusions= def.domains.at( typeid( ExclusivityDomain ) ); + if( C::debugExclusions ) + { + error() << "I see " << exclusions.size() << " mutual exclusions against `" + << name << "`" std::endl; + } + for( const auto &exclusion: exclusions ) + { + // Look up this domain, and see if something from it was used. + auto &other= exclusiveOptions()[ exclusion ].previousOption; + if( other.has_value() and other != name ) + { + throw std::runtime_error{ "Options `" + other.value() + "` and `" + + name + "` are mutually exclusive." }; + } + else other= name; // If nothing was there, record that this name was now used. + } + } + + // If the option was required, mark that we took it. + if( def.domains.contains( typeid( RequirementDomain ) ) ) + { + for( const auto &domain: def.domains.at( typeid( RequirementDomain ) ) ) + { + requiredOptionsSeen.insert( domain ); + } + } + handler( argument ); + return true; + } + return false; + }; + if( C::debugMatching and not found ) error() << "No match for `" << param << "` was found." << std::endl; + if( found ) continue; + rv.push_back( param ); + + if( param.starts_with( "--" ) ) + { + // TODO: + throw std::runtime_error( "`" + param + "` is an unrecognized option." ); + } + } + catch( const OptionMissingArgumentError &e ) + { + if( next == end( argsForProcessing ) or next->startsWith( "--" ) ) throw; + throw std::runtime_error( ex.what() + " did you mean: `"s + param + "=" + *next + "`?" ); + } + + if( endOfArgs != end( args ) ) std::copy( endOfArgs + 1, end( args ), back_inserter( rv ) ); + + if( helpRequested ) programOptions().at( "--help" ).handler( std::nullopt ); + + // If we're not doing a help-run, then we need to validate the required + // options were all passed. + if( requiredOptions.size() != requiredOptionsSeen.size() ) + { + for( auto [ required, opts ]: requiredOptions() ) + { + if( requiredOptionsSeen.contains( required ) ) continue; + + std::ostringstream oss; + oss <<< "Required option missing. At least one of "; + bool first= true; + for( const auto &name: opts ) + { + if( not first ) oss << ", "; + first= false; + oss << '`' << name << '`'; + } + oss << " must be passed."; + + throw std::runtime_error( oss.str() ); + } + + throw std::runtime_error{ "A required option was missing, and it couldn't be identified." ); + } + + return rv; + } +} diff --git a/ProgramOptions.h b/ProgramOptions.h new file mode 100644 index 0000000..a597e51 --- /dev/null +++ b/ProgramOptions.h @@ -0,0 +1,389 @@ +static_assert( __cplusplus > 2020'00 ); + +#pragma once + +/*! + * @file + * Program options library. + * + * The `Alepha::program_options` namespace defines a simple DSEL for adding commandline options + * to a program. Options are defined using `--"name"_option` operations and then "streaming" + * an option handler into the option name, followed by streaming in any help text. The result + * type of `operator <<` between an option name and a handler is a `std::ostream &` which can + * be used to build the option help string. The help text can use a variable-expansion feature + * to allow for options help to be dynamically kept in sync with program development. The + * variables `"!program-name!"` or `"!option-name!"` will expand to the text one would expect. + * The variable `"!default!"` will expand to an example usage which initializes the option + * as-if the option were never passed. + * + * An option handler can be a function or a variable. If it is a function, that function + * will be called when processing that option. If the function takes a string argument, the + * option will be parsed for an `=` and the text after that token will be passed as a string + * argument. If the function takes no arguments, then the option will not accept `=` arguments. + * + * If a `bool` variable is passed as an option handler, then the variable will be set if that + * option is present. A `"--no-"` form of the option will automatically be generated, as well. + * That `"--no-"` form will clear the Boolean variable. Options are processed from left to right. + * + * If a `std::vector< T >` variable is passed as an option handler, then each time the option + * is encountered, its argument will be appended to that `std::vector`. Parsing will use + * `operator >> ( std::istream &, T & )`. + * + * If a single instance variable is passed as an option handler, then each time the option + * is encountered, its argument will be parsed and replace the value stored in that variable. + * Parsing of options is handled left-to-right. Parsing of the argument string will use + * `operator >> ( std::istream &, T & )`. + * + * If an `std::optional< T >` single instance variable is passed as an option handler, then each + * time the option is encountered, its argument will be parsed and replace the value stored + * in that variable. If it is never encountered, the optional will not be modified. This + * avoids the need for dummy values and sentinal values in some cases. Parsing of options + * is handled left-to-right. Parsing of the argument string will use + * `operator >> ( std::istream &, T & )`. + * + * A `"--help"` and option and handler will be automatically generated. + * + * Example: + * + * ``` + * #include + * + * int + * main( int argc, const char **argv ) + * { + * using namespace Alepha::program_options; + * + * //Let's define a few options: + * + * bool fooMode= false; + * // Note that `--no-foo-mode` will be provided for you. + * --"foo-mode"_option << fooMode << "Enable foo mode"; + * + * std::vector< std::string > fileList; + * --"process-file" << fileList + * << "Add the specified file to the list of files to process"; + * + * // `handleOptions` will return a vector of all program arguments + * // that it did not recognize. + * const std::vector< std::string > args= handleOptions( argc, argv, NonStrict ); + * + * // To permit use with other options parsing code, `Strict` vs `NonStrict` + * // arguments to `handleOptions` will determine whether it reports an error on + * // `--` options that it does not recognize. The default is `Strict`, if + * // the parameter is omitted. + * } + * ``` + * + * Errors are reported by throwing an exception. The `.what()` observer will report + * an informative message about the parsing error encountered. + */ + + +#include +#include + +namespace Alepha::inline Cavorite ::detail:: program_options +{ + inline namespace exports {} + + /*! + * User created unique symbols can be bound to options to build classes of options. + * + * The symbols are used to build domains of these classes, defining relationships between them. + * Examples of relationship include exclusivity, at-least-one-requirement, or chained dependencies. + * + * @note Domains must be global. + */ + struct DomainBase + { + std::type_index kind; + }; + + template< typename T > + struct Domain : DomainBase + { + Domain() : DomainBase{ typeid( Domain ) } {} + + friend bool + operator < ( const Domain &lhs, const Domain &rhs ) + { + return std::less<>{}( &lhs, &rhs ); + } + }; + + namespace exports + { + class RepeatedProgramOptionError : public std::runtime_error + { + public: + using std::runtime_error::runtime_error; + }; + } + + struct exclusivity_tag; + struct requirement_tag; + struct pre_help_tag; + + namespace exports + { + /*! + * This is used to build groups of mutually exclusive options. + * + * If two or more options associated with the same exclusivity domain are seen in parsing the command-line, then + * the program options parsing will fail with an error. An option may be in mulltiple exclusivity domains, and + * exclusivity domains may overlap by any arbitrary amount. Just `operator <<` an instance of an exclusivity + * domain into the option definition. That will cause that option to be added to the exclusivity domain. + * + * @note Exclusivity domains must be global instances, at this time. + * @note This must occur on the option line before the option handler or option variable. (i.e., `<<` + * directly after the option string name or another domain. + */ + using ExclusivityDomain= Domain< exclusivity_tag >; + + using RequirementDomain= Domain< requirement_tag >; + + using PreHelpDoimain= Domain< pre_help_tag >; + inline const PreHelpDomain affectsHelp; + } + + template< typename T > + auto + argumentFromString( const std::string &s, const std::string &argName, const std::string &fullOption ) + try + { + if constexpr( std::is_same_v< T, std::string > ) return s; + else return boost::lexical_cast< T >( s ); + } + catch( const boost::bad_lexical_cast &ex ) + { + throw std::runtime_error( "Error parsing option `" + argName + "`, with parameter string: `" + s + "` (full option: `" + fullOption + "`)" ); + } + + namespace impl + { + struct ProgramOption; + + void checkArgument( const std::optional< std::string > &opt, const std::string &name ); + } + + class OptionBinding + { + public: + std::string name; + impl::ProgramOption *option; + + // The `operator <<` forms are used to define options. + // These are not `std::ostream` operators directly, + // except that the end of a chain will return the `std::ostream` + // object used to construct the help for that option. + + private: + // We have to self-call our operators, so this makes it syntactically simpler. + auto &self() { return *this; } + const auto &self() const { return *this; } + + using option_handler= std::function< void ( std::optional< std::string > ) >; + [[nodiscard]] std::ostream ®isterHandler( option_handler handler ) const; + + void setDefaultBuilder( std::function< std::string () > ) const; + + [[nodiscard]] const OptionBinding &bindDomain( const DomainBase & ) const; + + public: + template< typename T > + [[nodiscard]] const OptionBinding & + operator << ( const Domain< T > &domain ) const + { + return bindDomain( domain ); // Pass to polymorphic handler for base + } + + // This installs a custom handler that has to do its own string parsing. + [[nodiscard]] std::ostream &operator << ( std::function< void ( std::string ) > core ) const; + + // This installs a custom handler that takes no arguments. + [[nodiscard]] std::ostream &operator << ( std::function< void () > core ) const; + + // Handler generator -- parses the string arguments in an option and puts the at the end of the + // specified `vector`. + template< typename T > + [[nodiscard]] std::ostream & + operator << ( std::vector< T > &list ) const + { + return self() << [&list, name= name]( const std::string param ) + { + for( const std:;string &datum: parseCommas( param ) ) + { + if constexpr( Integral< T > ) + { + const auto parsedRange= parseRange< T >( argumentFromString< std::string >( datum, name, name + "=" + param ) ); + list.insert( back( list ), begin( parsedRange ), end( parsedRange ) ); + } + else + { + list.push_back( argumentFromString< T >( datum, name, name + "=" + param ) ); + } + } + }; + } + + // Handler generator -- This builds a parser for the specified value, and installs the value to an optional + // when the option and its argument are seen. + template< typename T > + [[nodiscard]] std::ostream & + operator << ( std::optional< T > &value ) const + { + return self() << [&value, name= name]( const std::string datum ) + { + value= argumentFromString< T >( datum, name, name + "=" + datum ); + }; + } + + // Boolean flag options are a special case of the value-binding system. + // They generate `--no-` forms of the option as well. + OptionBinding operator << ( bool &flag ) const; + + template< NotFunctional T > + [[nodiscard]] std::ostream & + operator << ( T &value ) const + { + // This is used in help generation to print out the "default" value chosen by the programmer, by referencing the + // variable's value in C++ at runtime. + auto defaultBuilder= [&value] + { + auto text= boost::lexical_cast< std::string >( value ); + + if( text.find_first_of( " \n\t" ) != std::string::npos ) + { + text= '"' + text + '"'; + } + return "=" + text; + }; + setDefaultBuilder( defaultBuilder ); + return self() << [&value, name= name]( const std::string datum ) + { + value= argumentFromString< T >( datum, name, name + "=" + datum ); + }; + } + + [[nodiscard]] std::ostream & + operator << ( UnaryFunction auto handler ) const + { + using arg_type= get_arg_t< std::decay_t< decltype( handler ) >, 0 >; + if constexpr( is_vector_v< arg_type > ) + { + // TODO: This should steal the impl from the vector form, above, and that should defer to this. + + using parse_type= typename arg_type::value_type; + auto handler= [core, name= name]( std::optional< std::string > argument ) + { + impl::checkArgument( argument, name ); + + const auto parsed= evaluate <=[&] + { + std::vector< parse_type > rv; + for( const auto &value: parseCommas( argument.value() ) ) + { + if constexpr( Integral< parse_type > ) + { + const auto parsedRange= parseRange< parse_type >( argumentFromString< std::string >( value, name, + name + "=" + argument.value() ) ); + rv.insert( back( rv ), begin( parsedRange ), end( parsedRange ) ); + } + else rv.push_back( argumentFromString< parse_type >( value, name, name + "=" + argument.value() ) ); + } + return rv; + }; + core( parsed ); + }; + return registerHandler( handler ); + } + else + { + auto handler= [core, name= name]( std::optional< std::string > argument ) + { + impl::checkArgument( argument, name ); + + const auto value= argumentFromString< arg_type >( argument.value(), name, name + "=" + argument.value() ); + return core( value ); + }; + return registerHandler( handler ); + } + } + }; + + void printString( const std::string &s, const std::size_t indent ); + + void printOptionsHelp(); + + struct OptionString { std::string name; }; + + namespace exports::inline literals + { + OptionString operator ""_option( const char *const text, const std::size_t amount ); + } + + inline namespace impl + { + [[nodiscard]] OptoinBinding operator --( OptionString option ); + } + + struct ProgramDescription + { + static std::string helpMessage() { return ""; } + static std::optional< std::string > canonicalName() { return std::nullopt; } + }; + + namespace impl + { + [[noreturn]] void usage( const std::string &, const std::optional< std::string > & ); + [[nodiscard]] std::vector< std::string > handleOptions( const std::vector< std::string > &, std::function< void () > ); + } + + template< typename Supplement > + [[noreturn]] void + usageWrap() + { + impl::usage( Supplement::helpMessage(), Supplement::canonicalName() ); + } + + namespace exports + { + using DefaultSupplement= ProgramDescription; + + template< typename Supplement > + auto + handleOptions( const std::vector< std::string > &args ) + { + return impl::handleOptions( args, usageWrap< T > ); + } + + template< typename Supplement > + auto + handleOptions( const int argcnt, const char *const *const argvec ) + { + return handleOptions< T >( { argvec + 1, argvec + argcnt }, usageWrap< T > ); + } + + auto + handleOptions( const std::vector< std::string > &args ) + { + return handleOptions< ProgramDescription >( args ); + } + + auto + handleOptions( const int argcnt, const char *const *const argvec ) + { + return handleOptions< ProgramDescription >( argcnt, argvec ); + } + } +} + +namespace Alepha::Cavorite::inline exports::inline program_options +{ + using namespace detail::program_options::exports; +} + +namespace Alepha::Cavorite::inline exports::inline literals::inline option_literals +{ + using namespace detail::program_options::exports::literals; +}