forked from Alepha/Alepha
String distance algorithms.
This commit is contained in:
5
Algorithm/CMakeLists.txt
Normal file
5
Algorithm/CMakeLists.txt
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
add_subdirectory( string_distance.test )
|
||||||
|
|
||||||
|
target_sources( alepha PRIVATE
|
||||||
|
string_distance.cc
|
||||||
|
)
|
130
Algorithm/string_distance.cc
Normal file
130
Algorithm/string_distance.cc
Normal file
@ -0,0 +1,130 @@
|
|||||||
|
static_assert( __cplusplus > 2020'99 );
|
||||||
|
|
||||||
|
#include "string_distance.h"
|
||||||
|
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <exception>
|
||||||
|
#include <stdexcept>
|
||||||
|
|
||||||
|
namespace Alepha::Hydrogen::Algorithm::detail::string_distance_m
|
||||||
|
{
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
template< typename T >
|
||||||
|
class Matrix
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
const std::size_t stride;
|
||||||
|
const std::size_t height;
|
||||||
|
std::vector< T > storage;
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit
|
||||||
|
Matrix( const std::size_t stride, const std::size_t height )
|
||||||
|
: stride( stride ), height( height ), storage( stride * height ) {}
|
||||||
|
|
||||||
|
struct Coordinate { std::size_t x; std::size_t y; };
|
||||||
|
|
||||||
|
private:
|
||||||
|
template< typename Self >
|
||||||
|
T &
|
||||||
|
index_impl( Self &self, const Coordinate coordinate )
|
||||||
|
{
|
||||||
|
return self.storage[ coordinate.x + coordinate.y * self.stride ];
|
||||||
|
}
|
||||||
|
|
||||||
|
template< typename Self >
|
||||||
|
T &
|
||||||
|
at_impl( Self &self, const Coordinate coordinate )
|
||||||
|
{
|
||||||
|
if( coordinate.x >= stride ) throw std::out_of_range{ "Overindexed x" };
|
||||||
|
if( coordinate.y >= height ) throw std::out_of_range{ "Overindexed y" };
|
||||||
|
return self.storage.at( coordinate.x + coordinate.y * self.stride );
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
T &at( Coordinate coordinate ) { return at_impl( *this, coordinate ); }
|
||||||
|
const T &at( Coordinate coordinate ) const { return at_impl( *this, coordinate ); }
|
||||||
|
|
||||||
|
T &operator[]( Coordinate coordinate ) { return index_impl( *this, coordinate ); }
|
||||||
|
const T &operator[]( Coordinate coordinate ) const { return index_impl( *this, coordinate ); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template< typename T0, typename T1 >
|
||||||
|
auto
|
||||||
|
min( T0 a, T1 b )
|
||||||
|
{
|
||||||
|
return std::min( a, b );
|
||||||
|
}
|
||||||
|
|
||||||
|
template< typename T0, typename ... T >
|
||||||
|
auto
|
||||||
|
min( T0 a, T ... t )
|
||||||
|
{
|
||||||
|
return min( a, min( t... ) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t
|
||||||
|
exports::rewriteStringDistance( const std::string_view a, const std::string_view b )
|
||||||
|
{
|
||||||
|
Matrix< std::size_t > table( a.size() + 1, b.size() + 1 );
|
||||||
|
|
||||||
|
for( std::size_t i= 0; i < a.size(); ++i ) table.at( { i, 0 } );
|
||||||
|
for( std::size_t i= 0; i < b.size(); ++i ) table.at( { 0, i } );
|
||||||
|
|
||||||
|
for( std::size_t i= 1; i <= a.size(); ++i )
|
||||||
|
{
|
||||||
|
for( std::size_t j= 1; j <= b.size(); ++j )
|
||||||
|
{
|
||||||
|
auto &next= table.at( { i, j } );
|
||||||
|
const auto &up= table.at( { i - 1, j - 1 } );
|
||||||
|
const auto &left= table.at( { i - 1, j } );
|
||||||
|
const auto &right= table.at( { i, j - 1 } );
|
||||||
|
if( a.at( i - 1 ) == b.at( j - 1 ) ) next= up;
|
||||||
|
else next= 1 + min( up, left, right );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return table.at( { a.size(), b.size() } );
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t
|
||||||
|
exports::optimalStringDistance( const std::string_view a, const std::string_view b )
|
||||||
|
{
|
||||||
|
Matrix< std::size_t > table( a.size() + 1, b.size() + 1 );
|
||||||
|
|
||||||
|
for( std::size_t i= 0; i < a.size(); ++i ) table.at( { i, 0 } );
|
||||||
|
for( std::size_t i= 0; i < b.size(); ++i ) table.at( { 0, i } );
|
||||||
|
|
||||||
|
for( std::size_t i= 1; i <= a.size(); ++i )
|
||||||
|
{
|
||||||
|
for( std::size_t j= 1; j <= b.size(); ++j )
|
||||||
|
{
|
||||||
|
const std::size_t cost= a.at( i - 1 ) != b.at( j - 1 );
|
||||||
|
assert( cost == 0 or cost == 1 );
|
||||||
|
|
||||||
|
auto &next= table.at( { i, j } );
|
||||||
|
const auto del= table.at( { i - 1, j } ) + 1;
|
||||||
|
const auto ins= table.at( { i, j - 1 } ) + 1;
|
||||||
|
const auto sub= table.at( { i - 1, j - 1 } ) + cost;
|
||||||
|
|
||||||
|
next= min( del, ins, sub );
|
||||||
|
|
||||||
|
if( i > 1 and j > 1
|
||||||
|
and a.at( i - 1 ) == b.at( j - 2 )
|
||||||
|
and a.at( i - 2 ) == b.at( j - 1 ) )
|
||||||
|
{
|
||||||
|
next= min( table.at( { i, j } ),
|
||||||
|
table.at( { i - 2, j - 2 } ) + cost );// transposition
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return table.at( { a.size(), b.size() } );
|
||||||
|
}
|
||||||
|
}
|
26
Algorithm/string_distance.h
Normal file
26
Algorithm/string_distance.h
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
static_assert( __cplusplus > 2020'99 );
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <Alepha/Alepha.h>
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
|
||||||
|
#include <string_view>
|
||||||
|
|
||||||
|
namespace Alepha::Hydrogen::Algorithm ::detail:: string_distance_m
|
||||||
|
{
|
||||||
|
inline namespace exports
|
||||||
|
{
|
||||||
|
// Doesn't handle transpositions...
|
||||||
|
std::size_t rewriteStringDistance( std::string_view a, std::string_view b );
|
||||||
|
|
||||||
|
// Does handle transpositions...
|
||||||
|
std::size_t optimalStringDistance( std::string_view a, std::string_view b );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace Alepha::Hydrogen::Algorithm::inline exports::inline string_distance_m
|
||||||
|
{
|
||||||
|
using namespace detail::string_distance_m::exports;
|
||||||
|
}
|
35
Algorithm/string_distance.test/0.cc
Normal file
35
Algorithm/string_distance.test/0.cc
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
static_assert( __cplusplus > 2020'99 );
|
||||||
|
|
||||||
|
#include "../string_distance.h"
|
||||||
|
|
||||||
|
#include <Alepha/Testing/TableTest.h>
|
||||||
|
#include <Alepha/Testing/test.h>
|
||||||
|
|
||||||
|
static auto init= Alepha::Utility::enroll <=[]
|
||||||
|
{
|
||||||
|
using namespace Alepha::Testing::exports;
|
||||||
|
|
||||||
|
using namespace Alepha::Algorithm::exports::string_distance_m;
|
||||||
|
|
||||||
|
"Rewrite string distance examples."_test <=TableTest< rewriteStringDistance >::Cases
|
||||||
|
{
|
||||||
|
{ "Simple example of equality"_case, { "Hello World", "Hello World" }, 0 },
|
||||||
|
{ "Simple example of single substitution"_case, { "Hello Worrd", "Hello World" }, 1 },
|
||||||
|
{ "Simple example of single addition"_case, { "Hello Worlda", "Hello World" }, 1 },
|
||||||
|
{ "Simple example of single removal"_case, { "Hello Worl", "Hello World" }, 1 },
|
||||||
|
{ "Simple example of single transposition"_case, { "Hello Wolrd", "Hello World" }, 2 },
|
||||||
|
{ "Two transposition"_case, { "xxxabxxxcdxxx", "xxxbaxxxdcxxx" }, 4 },
|
||||||
|
{ "Reversal case"_case, { "123456789", "987654321" }, 8 },
|
||||||
|
};
|
||||||
|
|
||||||
|
"Optimal string distance examples."_test <=TableTest< optimalStringDistance >::Cases
|
||||||
|
{
|
||||||
|
{ "Simple example of equality"_case, { "Hello World", "Hello World" }, 0 },
|
||||||
|
{ "Simple example of single substitution"_case, { "Hello Worrd", "Hello World" }, 1 },
|
||||||
|
{ "Simple example of single addition"_case, { "Hello Worlda", "Hello World" }, 1 },
|
||||||
|
{ "Simple example of single removal"_case, { "Hello Worl", "Hello World" }, 1 },
|
||||||
|
{ "Simple example of single transposition"_case, { "Hello Wolrd", "Hello World" }, 1 },
|
||||||
|
{ "Two transposition"_case, { "xxxabxxxcdxxx", "xxxbaxxxdcxxx" }, 2 },
|
||||||
|
{ "Reversal case"_case, { "123456789", "987654321" }, 8 },
|
||||||
|
};
|
||||||
|
};
|
1
Algorithm/string_distance.test/CMakeLists.txt
Normal file
1
Algorithm/string_distance.test/CMakeLists.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
unit_test( 0 )
|
@ -24,6 +24,7 @@ add_subdirectory( Atomic )
|
|||||||
add_subdirectory( Proof )
|
add_subdirectory( Proof )
|
||||||
add_subdirectory( IOStreams )
|
add_subdirectory( IOStreams )
|
||||||
add_subdirectory( Reflection )
|
add_subdirectory( Reflection )
|
||||||
|
add_subdirectory( Algorithm )
|
||||||
add_subdirectory( Testing )
|
add_subdirectory( Testing )
|
||||||
add_subdirectory( Utility )
|
add_subdirectory( Utility )
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user