From 4dde1e6f7455bc733b2a691460b1567d1d308a051d4e2f6ca4739a3d5efb6d94 Mon Sep 17 00:00:00 2001 From: ADAM David Alan Martin Date: Wed, 17 Sep 2025 20:59:18 -0400 Subject: [PATCH] This uses a tokenizer system for functions to call. It seems slower, though. I'm preserving it in this branch, for now. --- js4g/StackMachine.cc | 103 +++++++++++++++++++++++++++++++++++++++---- js4g/StackMachine.h | 29 ++++++------ 2 files changed, 107 insertions(+), 25 deletions(-) diff --git a/js4g/StackMachine.cc b/js4g/StackMachine.cc index 62a2118..b79e054 100644 --- a/js4g/StackMachine.cc +++ b/js4g/StackMachine.cc @@ -12,7 +12,10 @@ namespace namespace C { constexpr bool debug= false; + constexpr bool debugCompiledCall= C::debug or false; } + + void breakpoint() {} } namespace Dillo::Hydrogen::JavaScriptForge ::detail:: StackMachine_m @@ -22,12 +25,70 @@ namespace Dillo::Hydrogen::JavaScriptForge ::detail:: StackMachine_m { } + struct StackMachine::TokenHolder + { + std::unordered_map< std::string_view, std::vector< Token > >::iterator where; + }; + + void + StackMachine::compile( std::unordered_map< std::string_view, std::vector< Token > >::iterator def ) + { + tokenHolders.push_back( std::make_unique< TokenHolder >( TokenHolder{ def } ) ); + if( C::debug ) + { + std::cerr << "Replacing token " << std::get< std::string >( *current ) << " (" << def->first << ") with compiled " + << "address " << (void * ) tokenHolders.back()->where->second.data() << std::endl;; + } + *current= tokenHolders.back(); + } + void StackMachine::run() { while( not tokenStack.empty() and tokenStack.back().hasNext() ) { - runWord( next() ); + const bool inConditional= not conditionals.empty(); + + auto token= next(); + if( std::holds_alternative< std::shared_ptr< TokenHolder > >( token ) ) + { + if( inConditional and currentState != conditionals.back() ) { breakpoint(); continue; } + // If compiled, just go directly there. + if( C::debug ) std::cerr << " Executing to compiled token: " << (void *) std::get< std::shared_ptr< TokenHolder > >( token ).get() << std::endl; + + if( C::debugCompiledCall ) + { + std::cerr << "\n\n\n\n===============================================================================================" << std::endl; + std::cerr << "Top of stack is: " << std::visit( []< typename T >( const T &value ) + { + return boost::lexical_cast< std::string >( value ); + }, + peek() ) << std::endl; + const auto target= std::get< std::shared_ptr< TokenHolder > >( token )->where; + std::cerr << "The token sequence to execute (for function " << target->first << ") is:" << std::endl; + + const auto &def= target->second; + std::transform( begin( def ), end( def ), + std::ostream_iterator< std::string >{ std::cerr, "\n" }, + []( const auto &element ) + { + return std::visit + ( + []< typename T >( const T &val ) + { + if constexpr( std::is_same_v< T, std::string > ) return val; + else return boost::lexical_cast< std::string >( (void *) val->where->second.data() ) + ": " + std::string{ val->where->first }; + }, + element + ); + } ); + } + + tokenStack.emplace_back( std::get< std::shared_ptr< TokenHolder > >( token )->where->second ); + + continue; + } + else runWord( std::get< std::string >( token ) ); if( C::debug ) std::cerr << "After processing stack is now: " << std::endl; if( C::debug ) for( const auto &element: stack ) @@ -43,7 +104,7 @@ namespace Dillo::Hydrogen::JavaScriptForge ::detail:: StackMachine_m if( C::debug ) std::cerr << "Run done with stack at size: " << stack.size() << std::endl; } - std::string_view + const StackMachine::Token & StackMachine::next() { if( tokenStack.empty() ) @@ -51,7 +112,9 @@ namespace Dillo::Hydrogen::JavaScriptForge ::detail:: StackMachine_m throw std::runtime_error{ "FATAL: Token required, no more tokens left." }; } - const auto rv= tokenStack.back().next(); + auto &rv= tokenStack.back().next(); + current= &rv; + while( not tokenStack.empty() and not tokenStack.back().hasNext() ) tokenStack.pop_back(); return rv; } @@ -88,14 +151,26 @@ namespace Dillo::Hydrogen::JavaScriptForge ::detail:: StackMachine_m } if( C::debug ) std::cerr << "Definition of " << definition.value() << " is done. It is: " << std::endl; - if( C::debug ) std::copy( begin( words.at( definition.value() ) ), end( words.at( definition.value() ) ), - std::ostream_iterator< std::string >{ std::cout, "\n" } ); + if( C::debug ) std::transform( begin( words.at( definition.value() ) ), end( words.at( definition.value() ) ), + std::ostream_iterator< std::string_view >{ std::cerr, "\n" }, + []( const auto &element ) + { + return std::visit + ( + []< typename T >( const T &val ) -> std::string_view + { + if constexpr( std::is_same_v< T, std::string > ) return val; + else return val->where->first; + }, + element + ); + } ); definition= std::nullopt; } else if( definition.has_value() ) { if( C::debug ) std::cerr << "Adding word: " << word << " to function definition: " << definition.value() << std::endl; - words[ definition.value() ].emplace_back( word ); + words[ definition.value() ].push_back( std::string{ word } ); } else if( inConditional and word == "@else"sv and currentState != Else ) { @@ -212,9 +287,9 @@ namespace Dillo::Hydrogen::JavaScriptForge ::detail:: StackMachine_m + "`" }; } - const auto &def= words.at( invoke ); - - tokenStack.emplace_back( def ); + auto found= words.find( invoke ); + tokenStack.emplace_back( found->second ); + compile( found ); // Memoize it for next time. } else { @@ -280,6 +355,16 @@ namespace Dillo::Hydrogen::JavaScriptForge ::detail:: StackMachine_m else if constexpr( std::is_same_v< std::string, T > ) push( t ); else push( boost::lexical_cast< std::string >( t ) ); } + + void + StackMachine::loadProgram( std::vector< std::string > tokens ) + { + this->tokens.clear(); + std::copy( begin( tokens ), end( tokens ), back_inserter( this->tokens ) ); + + tokenStack.clear(); + tokenStack.emplace_back( this->tokens ); + } } int diff --git a/js4g/StackMachine.h b/js4g/StackMachine.h index b603a3b..6968a59 100644 --- a/js4g/StackMachine.h +++ b/js4g/StackMachine.h @@ -12,6 +12,7 @@ static_assert( __cplusplus >= 2023'02 ); #include #include #include +#include #include #include @@ -50,30 +51,31 @@ namespace Dillo::Hydrogen::JavaScriptForge ::detail:: StackMachine_m // It's `void *` to break the cycle with itself struct TokenHolder; - //using Token= std::variant< std::string, TokenHolder * >; - using Token= std::string; + using Token= std::variant< std::string, std::shared_ptr< TokenHolder > >; + std::vector< std::shared_ptr< TokenHolder > > tokenHolders; - std::vector< std::string > tokens; + std::vector< Token > tokens; struct Tokenizer { - std::vector< Token >::const_iterator pos; - std::vector< Token >::const_iterator end; + std::vector< Token >::iterator pos; + std::vector< Token >::iterator end; explicit - Tokenizer( const std::vector< Token > &tokens ) + Tokenizer( std::vector< Token > &tokens ) : pos( tokens.begin() ), end( tokens.end() ) {} bool hasNext() const { return pos != end; } - std::string_view next() { return *pos++; } + Token &next() { return *pos++; } }; std::vector< Tokenizer > tokenStack; + Token *current= nullptr; std::list< std::string > wordNames; - std::unordered_map< std::string_view, std::vector< std::string > > words; + std::unordered_map< std::string_view, std::vector< Token > > words; // Which side of the current conditional to take. enum ConditionalState { If, Else, Skipped }; @@ -104,20 +106,15 @@ namespace Dillo::Hydrogen::JavaScriptForge ::detail:: StackMachine_m void push( const T &t ); - std::string_view next(); + void compile( std::unordered_map< std::string_view, std::vector< Token > >::iterator ); + const Token &next(); void recurse( std::string_view func ); public: StackMachine( std::ostream &output= std::cout ); - void - loadProgram( std::vector< std::string > tokens ) - { - this->tokens= std::move( tokens ); - tokenStack.clear(); - tokenStack.emplace_back( this->tokens ); - } + void loadProgram( std::vector< std::string > tokens ); void run(); };