1
0
forked from Alepha/Alepha

String distance algorithms.

This commit is contained in:
2023-11-17 01:22:30 -05:00
parent 4426fc2857
commit 001de28231
6 changed files with 198 additions and 0 deletions

View File

@ -0,0 +1,130 @@
static_assert( __cplusplus > 2020'99 );
#include "string_distance.h"
#include <cstdlib>
#include <cassert>
#include <vector>
#include <algorithm>
#include <exception>
#include <stdexcept>
namespace Alepha::Hydrogen::Algorithm::detail::string_distance_m
{
namespace
{
template< typename T >
class Matrix
{
private:
const std::size_t stride;
const std::size_t height;
std::vector< T > storage;
public:
explicit
Matrix( const std::size_t stride, const std::size_t height )
: stride( stride ), height( height ), storage( stride * height ) {}
struct Coordinate { std::size_t x; std::size_t y; };
private:
template< typename Self >
T &
index_impl( Self &self, const Coordinate coordinate )
{
return self.storage[ coordinate.x + coordinate.y * self.stride ];
}
template< typename Self >
T &
at_impl( Self &self, const Coordinate coordinate )
{
if( coordinate.x >= stride ) throw std::out_of_range{ "Overindexed x" };
if( coordinate.y >= height ) throw std::out_of_range{ "Overindexed y" };
return self.storage.at( coordinate.x + coordinate.y * self.stride );
}
public:
T &at( Coordinate coordinate ) { return at_impl( *this, coordinate ); }
const T &at( Coordinate coordinate ) const { return at_impl( *this, coordinate ); }
T &operator[]( Coordinate coordinate ) { return index_impl( *this, coordinate ); }
const T &operator[]( Coordinate coordinate ) const { return index_impl( *this, coordinate ); }
};
template< typename T0, typename T1 >
auto
min( T0 a, T1 b )
{
return std::min( a, b );
}
template< typename T0, typename ... T >
auto
min( T0 a, T ... t )
{
return min( a, min( t... ) );
}
}
std::size_t
exports::rewriteStringDistance( const std::string_view a, const std::string_view b )
{
Matrix< std::size_t > table( a.size() + 1, b.size() + 1 );
for( std::size_t i= 0; i < a.size(); ++i ) table.at( { i, 0 } );
for( std::size_t i= 0; i < b.size(); ++i ) table.at( { 0, i } );
for( std::size_t i= 1; i <= a.size(); ++i )
{
for( std::size_t j= 1; j <= b.size(); ++j )
{
auto &next= table.at( { i, j } );
const auto &up= table.at( { i - 1, j - 1 } );
const auto &left= table.at( { i - 1, j } );
const auto &right= table.at( { i, j - 1 } );
if( a.at( i - 1 ) == b.at( j - 1 ) ) next= up;
else next= 1 + min( up, left, right );
}
}
return table.at( { a.size(), b.size() } );
}
std::size_t
exports::optimalStringDistance( const std::string_view a, const std::string_view b )
{
Matrix< std::size_t > table( a.size() + 1, b.size() + 1 );
for( std::size_t i= 0; i < a.size(); ++i ) table.at( { i, 0 } );
for( std::size_t i= 0; i < b.size(); ++i ) table.at( { 0, i } );
for( std::size_t i= 1; i <= a.size(); ++i )
{
for( std::size_t j= 1; j <= b.size(); ++j )
{
const std::size_t cost= a.at( i - 1 ) != b.at( j - 1 );
assert( cost == 0 or cost == 1 );
auto &next= table.at( { i, j } );
const auto del= table.at( { i - 1, j } ) + 1;
const auto ins= table.at( { i, j - 1 } ) + 1;
const auto sub= table.at( { i - 1, j - 1 } ) + cost;
next= min( del, ins, sub );
if( i > 1 and j > 1
and a.at( i - 1 ) == b.at( j - 2 )
and a.at( i - 2 ) == b.at( j - 1 ) )
{
next= min( table.at( { i, j } ),
table.at( { i - 2, j - 2 } ) + cost );// transposition
}
}
}
return table.at( { a.size(), b.size() } );
}
}