1
0
forked from Alepha/Alepha

Input stream stacking with line numbers!

This should help when building custom language parsers.  An input
stream can be augmented with this stackable streambuf to track
the current line number.  This can (and should) be done low in
the stack, so that any variable expansion and comment stripping
stages will not affect line number count.

The stage bolts on a stream state sidecar to point back to itself.
The observer for the current line peeks into the sidecar to see
the current line number tracking object for the stream and then
gets the current line number from that object.

The line number is the current line the input cursor is on.
Newline characters are treated as-if they're part of the current
line.  The newly created line will start on the first character
after the newline character.  This helps keep line-index counts
accurate too.  (Idea for a further stage?  Line index cursor
too?)
This commit is contained in:
2024-04-03 08:29:10 -04:00
parent 373b07e1c4
commit 7410245314
6 changed files with 292 additions and 2 deletions

View File

@ -1,6 +1,7 @@
add_subdirectory( IStreamable.test ) add_subdirectory( IStreamable.test )
add_subdirectory( OStreamable.test ) add_subdirectory( OStreamable.test )
add_subdirectory( streamable.test ) add_subdirectory( streamable.test )
add_subdirectory( LineTrackingStreambuf.test )
add_subdirectory( OutUnixFileBuf.test ) add_subdirectory( OutUnixFileBuf.test )
add_subdirectory( StackableStreambuf.test ) add_subdirectory( StackableStreambuf.test )

View File

@ -0,0 +1,144 @@
static_assert( __cplusplus > 2020'99 );
#pragma once
#include <Alepha/Alepha.h>
#include <cstdint>
#include <vector>
#include <iostream>
#include <string_view>
#include <algorithm>
#include "StackableStreambuf.h"
#include "StreamState.h"
namespace Alepha::Hydrogen::IOStreams ::detail:: LineTrackingStreambuf_m
{
inline namespace exports {}
struct TrackLines_params {};
namespace exports
{
struct LineTrackingStreambuf;
using TrackLines= IOStreams::PushStack< TrackLines_params >;
std::int64_t getLineNumber( std::ios_base &ios );
auto setLineNumber( std::int64_t baseLine );
}
inline StreamState< LineTrackingStreambuf * > tracker{ []{ return nullptr; } };
struct exports::LineTrackingStreambuf
: public virtual StackableStreambuf, public virtual std::streambuf
{
private:
std::int64_t baseLine= 1;
std::vector< std::int64_t > lineStarts;
char *bufBase= nullptr;
std::int64_t current() const { return gptr() - bufBase; }
std::int64_t
index() const
{
return std::lower_bound( begin( lineStarts ), end( lineStarts ), current() ) - begin( lineStarts );
}
public:
explicit
LineTrackingStreambuf( std::ios &is )
: StackableStreambuf( is )
{
tracker.get( is )= this;
}
void
setLineNumber( const std::int64_t baseLine )
{
this->baseLine= baseLine - index();
}
std::int64_t
getLineNumber() const
{
return baseLine + index();
}
void writeChar( char ch ) override { throw "Unimpl"; }
void drain() override { throw "Unimpl"; }
int
underflow() override
{
//std::cerr << "Present base line is: " << baseLine << std::endl;
//std::cerr << "Present starts count: " << lineStarts.size() << std::endl;
baseLine+= lineStarts.size();
lineStarts.clear();
const auto rv= forwardUnderflow();
//std::cerr << "Underflow char is: " << (char) rv << std::endl;
if( rv == EOF ) return rv;
assume_underlying();
const std::string_view view{ gptr(), egptr() };
bufBase= gptr();
//std::cerr << "Underflow picked up " << view.size() << " chars" << std::endl;;
//std::cerr << "Underflow sees `" << (void*) gptr() << "` for gptr." << std::endl;
for( std::int64_t i= 0; i < view.size(); ++i )
{
if( view.at( i ) == '\n' ) lineStarts.push_back( i + 1 );
}
if( rv == 101 ) abort();
return rv;
}
};
inline std::int64_t
exports::getLineNumber( std::ios_base &ios )
{
auto thisTracker= tracker.get( ios );
if( thisTracker == nullptr ) return -1;
return thisTracker->getLineNumber();
}
#if 0
struct Setter
{
const std::int64_t baseLine;
friend std::istream &
operator >> ( std::istream &is, const Setter &s )
{
auto thisTracker= tracker.get( is );
if( thisTracker != nullptr )
{
thisTracker->setLineNumber( s.baseLine );
}
return is;
}
};
inline auto
exports::setLineNumber( const std::int64_t baseLine )
{
return Setter{ baseLine };
}
#endif
inline void
build_streambuf( std::istream &is, TrackLines && )
{
new LineTrackingStreambuf{ is };
}
}
namespace Alepha::Hydrogen::IOStreams::inline exports::inline LineTrackingStreambuf_m
{
using namespace detail::LineTrackingStreambuf_m::exports;
}

View File

@ -0,0 +1,55 @@
#include "../LineTrackingStreambuf.h"
#include <sstream>
#include <Alepha/Testing/test.h>
#include <Alepha/Utility/evaluation_helpers.h>
static auto init= Alepha::Utility::enroll <=[]
{
using namespace Alepha::Testing::exports;
"Do we see line numbers in a simple case?"_test <=[]( TestState &test )
{
const std::string c= "One Two Three\nFour\n\nFive\n\n\n\nSeven";
std::istringstream iss{ c };
iss >> Alepha::IOStreams::TrackLines{};
using Alepha::IOStreams::getLineNumber;
std::string s;
test.expect( getLineNumber( iss ) == 1, "Line should be 1." );
std::cerr << "Num: " << getLineNumber( iss ) << std::endl;
iss >> s; // One
std::cerr << "s: " << s << std::endl;
test.expect( getLineNumber( iss ) == 1, "Line should be 1." );
std::cerr << "Num: " << getLineNumber( iss ) << std::endl;
iss >> s; // Two
std::cerr << "s: " << s << std::endl;
test.expect( getLineNumber( iss ) == 1, "Line should be 1." );
std::cerr << "Num: " << getLineNumber( iss ) << std::endl;
iss >> s; // Three
std::cerr << "s: " << s << std::endl;
test.expect( getLineNumber( iss ) == 1, "Line should be 1." );
std::cerr << "Num: " << getLineNumber( iss ) << std::endl;
iss >> s; // Four
std::cerr << "s: " << s << std::endl;
test.expect( getLineNumber( iss ) == 2, "Line should be 2." );
std::cerr << "Num: " << getLineNumber( iss ) << std::endl;
iss >> s; // Five
std::cerr << "s: " << s << std::endl;
test.expect( getLineNumber( iss ) == 4, "Line should be 4." );
std::cerr << "Num: " << getLineNumber( iss ) << std::endl;
iss >> s; // Five
std::cerr << "s: " << s << std::endl;
test.expect( getLineNumber( iss ) == 8, "Line should be 8." );
};
};

View File

@ -0,0 +1 @@
unit_test( 0 )

View File

@ -86,9 +86,10 @@ namespace Alepha::Hydrogen::IOStreams::detail::StackableStreambuf_m
StackableStreambuf::~StackableStreambuf() {} StackableStreambuf::~StackableStreambuf() {}
StackableStreambuf::StackableStreambuf( std::ostream &host ) StackableStreambuf::StackableStreambuf( std::ios &host )
: underlying( host.rdbuf( this ) ) : underlying( host.rdbuf( this ) )
{ {
assert( underlying );
// TODO: Atomicity for this: // TODO: Atomicity for this:
if( not host.iword( index ) ) host.register_callback( iosCallback, index ); if( not host.iword( index ) ) host.register_callback( iosCallback, index );
host.iword( index )= 1; host.iword( index )= 1;

View File

@ -30,7 +30,7 @@ namespace Alepha::Hydrogen::IOStreams ::detail:: StackableStreambuf_m
~StackableStreambuf() override; ~StackableStreambuf() override;
// Children must be created by `new`. // Children must be created by `new`.
explicit StackableStreambuf( std::ostream &host ); explicit StackableStreambuf( std::ios &host );
auto out() const { return std::ostream{ underlying }; } auto out() const { return std::ostream{ underlying }; }
@ -39,7 +39,86 @@ namespace Alepha::Hydrogen::IOStreams ::detail:: StackableStreambuf_m
int overflow( const int ch ) override; int overflow( const int ch ) override;
// Underflow is not overridden. A read-oriented
// streambuf stack may choose to implement underflow.
//
// The reason underflow is not adapted to a
// char-interceptor is that memory usage would become
// unbound. For overflow/writing, we can assume
// that sufficient output storage space exists for
// whatever transformations a single char turns into.
// For instance, a decompression algorithm might
// cause several kilobytes of data to be generated
// once a single character is written. These
// kilobytes must be immediately stored deeper into
// the stack. These writes can be fire-and-forget,
// as each lower level will have to deal with and
// store those bytes (and potential further
// expansions or reductions). Writes are conveniently
// fire-and-forget.
//
// Whereas, for input, when converting a single input
// char from the lower level, a reduction would simply
// require subsequent reads proxied by the stack.
// However, expansions would potentially emit large
// blocks of data which would have to be cached
// somewhere. The intermediate state of the expansion
// generator cannot be suspended and resumed -- it's
// a function, not a coroutine. Thus, the entirety
// of the expansion routine has to be drained into
// a lookaside buffer to permit it to run to completion.
// This buffer has to be held in memory, which can
// grow without bound. Whereas, with writes, we can
// block this generator on downstream space being
// available. Yet reads require the upstream has
// to be given a chance to handle what we've seen so
// far in order to make more in-memory buffer space
// available.
std::streamsize xsputn( const char *data, std::streamsize amt ) override; std::streamsize xsputn( const char *data, std::streamsize amt ) override;
// EEEW. But to sidestep/circumvent, all this below is necessary.
auto
forwardUnderflow() const
{
int (std::streambuf::*uf)()= &StackableStreambuf::underflow;
return (underlying->*uf)();
}
auto
underlying_eback() const
{
char *(std::streambuf::*eb)() const= &StackableStreambuf::eback;
return (underlying->*eb)();
}
auto
underlying_gptr() const
{
char *(std::streambuf::*gp)() const= &StackableStreambuf::gptr;
return (underlying->*gp)();
}
auto
underlying_egptr() const
{
char *(std::streambuf::*egp)() const= &StackableStreambuf::egptr;
return (underlying->*egp)();
}
void
underlying_setg( char *const b, char *const g, char *const e )
{
void (std::streambuf::*sg)( char *, char *, char * )= &StackableStreambuf::setg;
return (underlying->*sg)( b, g, e );
}
void
assume_underlying()
{
setg( underlying_gptr(), underlying_gptr(), underlying_egptr() );
underlying_setg( egptr(), egptr(), egptr() );
}
}; };
template< typename T > template< typename T >
@ -57,9 +136,18 @@ namespace Alepha::Hydrogen::IOStreams ::detail:: StackableStreambuf_m
return os; return os;
} }
template< typename T >
std::istream &
operator >> ( std::istream &is, PushStack< T > &&params )
{
build_streambuf( is, std::move( params ) );
return is;
}
inline namespace impl inline namespace impl
{ {
std::ostream & operator << ( std::ostream &os, PopStack ); std::ostream & operator << ( std::ostream &os, PopStack );
std::istream & operator >> ( std::istream &is, PopStack );
} }
} }