diff --git a/Algorithm/CMakeLists.txt b/Algorithm/CMakeLists.txt new file mode 100644 index 0000000..b35f714 --- /dev/null +++ b/Algorithm/CMakeLists.txt @@ -0,0 +1,5 @@ +add_subdirectory( string_distance.test ) + +target_sources( alepha PRIVATE + string_distance.cc +) diff --git a/Algorithm/string_distance.cc b/Algorithm/string_distance.cc new file mode 100644 index 0000000..f7f5b31 --- /dev/null +++ b/Algorithm/string_distance.cc @@ -0,0 +1,130 @@ +static_assert( __cplusplus > 2020'99 ); + +#include "string_distance.h" + +#include +#include + +#include +#include +#include +#include + +namespace Alepha::Hydrogen::Algorithm::detail::string_distance_m +{ + namespace + { + template< typename T > + class Matrix + { + private: + const std::size_t stride; + const std::size_t height; + std::vector< T > storage; + + public: + explicit + Matrix( const std::size_t stride, const std::size_t height ) + : stride( stride ), height( height ), storage( stride * height ) {} + + struct Coordinate { std::size_t x; std::size_t y; }; + + private: + template< typename Self > + T & + index_impl( Self &self, const Coordinate coordinate ) + { + return self.storage[ coordinate.x + coordinate.y * self.stride ]; + } + + template< typename Self > + T & + at_impl( Self &self, const Coordinate coordinate ) + { + if( coordinate.x >= stride ) throw std::out_of_range{ "Overindexed x" }; + if( coordinate.y >= height ) throw std::out_of_range{ "Overindexed y" }; + return self.storage.at( coordinate.x + coordinate.y * self.stride ); + } + + public: + T &at( Coordinate coordinate ) { return at_impl( *this, coordinate ); } + const T &at( Coordinate coordinate ) const { return at_impl( *this, coordinate ); } + + T &operator[]( Coordinate coordinate ) { return index_impl( *this, coordinate ); } + const T &operator[]( Coordinate coordinate ) const { return index_impl( *this, coordinate ); } + }; + + template< typename T0, typename T1 > + auto + min( T0 a, T1 b ) + { + return std::min( a, b ); + } + + template< typename T0, typename ... T > + auto + min( T0 a, T ... t ) + { + return min( a, min( t... ) ); + } + } + + std::size_t + exports::rewriteStringDistance( const std::string_view a, const std::string_view b ) + { + Matrix< std::size_t > table( a.size() + 1, b.size() + 1 ); + + for( std::size_t i= 0; i < a.size(); ++i ) table.at( { i, 0 } ); + for( std::size_t i= 0; i < b.size(); ++i ) table.at( { 0, i } ); + + for( std::size_t i= 1; i <= a.size(); ++i ) + { + for( std::size_t j= 1; j <= b.size(); ++j ) + { + auto &next= table.at( { i, j } ); + const auto &up= table.at( { i - 1, j - 1 } ); + const auto &left= table.at( { i - 1, j } ); + const auto &right= table.at( { i, j - 1 } ); + if( a.at( i - 1 ) == b.at( j - 1 ) ) next= up; + else next= 1 + min( up, left, right ); + } + } + + return table.at( { a.size(), b.size() } ); + } + + std::size_t + exports::optimalStringDistance( const std::string_view a, const std::string_view b ) + { + Matrix< std::size_t > table( a.size() + 1, b.size() + 1 ); + + for( std::size_t i= 0; i < a.size(); ++i ) table.at( { i, 0 } ); + for( std::size_t i= 0; i < b.size(); ++i ) table.at( { 0, i } ); + + for( std::size_t i= 1; i <= a.size(); ++i ) + { + for( std::size_t j= 1; j <= b.size(); ++j ) + { + const std::size_t cost= a.at( i - 1 ) != b.at( j - 1 ); + assert( cost == 0 or cost == 1 ); + + auto &next= table.at( { i, j } ); + const auto del= table.at( { i - 1, j } ) + 1; + const auto ins= table.at( { i, j - 1 } ) + 1; + const auto sub= table.at( { i - 1, j - 1 } ) + cost; + + next= min( del, ins, sub ); + + if( i > 1 and j > 1 + and a.at( i - 1 ) == b.at( j - 2 ) + and a.at( i - 2 ) == b.at( j - 1 ) ) + { + next= min( table.at( { i, j } ), + table.at( { i - 2, j - 2 } ) + cost );// transposition + } + } + } + + return table.at( { a.size(), b.size() } ); + } +} diff --git a/Algorithm/string_distance.h b/Algorithm/string_distance.h new file mode 100644 index 0000000..e0b6326 --- /dev/null +++ b/Algorithm/string_distance.h @@ -0,0 +1,26 @@ +static_assert( __cplusplus > 2020'99 ); + +#pragma once + +#include + +#include + +#include + +namespace Alepha::Hydrogen::Algorithm ::detail:: string_distance_m +{ + inline namespace exports + { + // Doesn't handle transpositions... + std::size_t rewriteStringDistance( std::string_view a, std::string_view b ); + + // Does handle transpositions... + std::size_t optimalStringDistance( std::string_view a, std::string_view b ); + } +} + +namespace Alepha::Hydrogen::Algorithm::inline exports::inline string_distance_m +{ + using namespace detail::string_distance_m::exports; +} diff --git a/Algorithm/string_distance.test/0.cc b/Algorithm/string_distance.test/0.cc new file mode 100644 index 0000000..c30b6d5 --- /dev/null +++ b/Algorithm/string_distance.test/0.cc @@ -0,0 +1,35 @@ +static_assert( __cplusplus > 2020'99 ); + +#include "../string_distance.h" + +#include +#include + +static auto init= Alepha::Utility::enroll <=[] +{ + using namespace Alepha::Testing::exports; + + using namespace Alepha::Algorithm::exports::string_distance_m; + + "Rewrite string distance examples."_test <=TableTest< rewriteStringDistance >::Cases + { + { "Simple example of equality"_case, { "Hello World", "Hello World" }, 0 }, + { "Simple example of single substitution"_case, { "Hello Worrd", "Hello World" }, 1 }, + { "Simple example of single addition"_case, { "Hello Worlda", "Hello World" }, 1 }, + { "Simple example of single removal"_case, { "Hello Worl", "Hello World" }, 1 }, + { "Simple example of single transposition"_case, { "Hello Wolrd", "Hello World" }, 2 }, + { "Two transposition"_case, { "xxxabxxxcdxxx", "xxxbaxxxdcxxx" }, 4 }, + { "Reversal case"_case, { "123456789", "987654321" }, 8 }, + }; + + "Optimal string distance examples."_test <=TableTest< optimalStringDistance >::Cases + { + { "Simple example of equality"_case, { "Hello World", "Hello World" }, 0 }, + { "Simple example of single substitution"_case, { "Hello Worrd", "Hello World" }, 1 }, + { "Simple example of single addition"_case, { "Hello Worlda", "Hello World" }, 1 }, + { "Simple example of single removal"_case, { "Hello Worl", "Hello World" }, 1 }, + { "Simple example of single transposition"_case, { "Hello Wolrd", "Hello World" }, 1 }, + { "Two transposition"_case, { "xxxabxxxcdxxx", "xxxbaxxxdcxxx" }, 2 }, + { "Reversal case"_case, { "123456789", "987654321" }, 8 }, + }; +}; diff --git a/Algorithm/string_distance.test/CMakeLists.txt b/Algorithm/string_distance.test/CMakeLists.txt new file mode 100644 index 0000000..b099603 --- /dev/null +++ b/Algorithm/string_distance.test/CMakeLists.txt @@ -0,0 +1 @@ +unit_test( 0 ) diff --git a/CMakeLists.txt b/CMakeLists.txt index e18158b..b206fd2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,7 @@ add_subdirectory( Atomic ) add_subdirectory( Proof ) add_subdirectory( IOStreams ) add_subdirectory( Reflection ) +add_subdirectory( Algorithm ) add_subdirectory( Testing ) add_subdirectory( Utility )