Note that there are some explanatory texts on larger screens.

plurals
  1. PO
    primarykey
    data
    text
    <p>You could use the <a href="http://en.wikipedia.org/wiki/Levenshtein_distance" rel="nofollow">Levenshtein distance</a></p> <p><strong>EDIT</strong></p> <p>Since I finally needed something similar myself, and this question remains open. Here is some code I played around with. Both straight up string distance and also applying the Levenshtein algorithm to the path tokens.</p> <p><strong>C++ Code</strong></p> <pre><code>/* ----- String based Levenshtein ---- Matching : this/is/a/strange/path 0 : this/is/a/strange/path 2 : this/is/a/strong/path 2 : that/is/a/strange/path 4 : is/this/a/strange/path 5 : this/is/a/strange 13 : this/is/a 15 : this/is 16 : that/was 18 : this 24 : completely/different/folder ----- Path based Levenshtein ---- Matching : this/is/a/strange/path 0 : this/is/a/strange/path 1 : this/is/a/strange 2 : this/is/a/strong/path 2 : that/is/a/strange/path 2 : this/is/a 2 : is/this/a/strange/path 3 : this/is 4 : this 7 : that/was 8 : completely/different/folder */ #include &lt;string&gt; #include &lt;vector&gt; #include &lt;algorithm&gt; #include &lt;stdio.h&gt; /// returns the smaller of two parameters template&lt; typename T &gt; T levmin( T v1, T v2 ) { return ( v1 &lt; v2 ) ? v1 : v2; } /// Returns the Levenshtein distance between the specified strings template &lt; typename T, typename T_STR &gt; typename T_STR::size_type levstr(const T_STR &amp;s1, const T_STR &amp;s2) { typename T_STR::size_type l1 = s1.length(), l2 = s2.length(); std::vector&lt; typename T_STR::size_type &gt; d( ( l1 + 1 ) * ( l2 + 1 ) ); typename T_STR::size_type i, j; for ( i = 0; i &lt;= l1; i++ ) d[ i * l2 ] = i; for ( i = 0; i &lt;= l2; i++ ) d[ i ] = i; for ( i = 1; i &lt;= l1; i++ ) for ( j = 1; j &lt;= l2; j++ ) d[ i * l2 + j ] = levmin( levmin( d[ ( i - 1 ) * l2 + j ] + 1, d[ i * l2 + ( j - 1 ) ] + 1 ), d[ ( i - 1 ) * l2 + ( j - 1 ) ] + ( s1[ i - 1 ] == s2[ j - 1 ] ? 0 : 1 ) ); return d[ ( l1 * l2 ) + l2 ]; } /// Returns the Levenshtein distance between the specified split paths template &lt; typename T, typename T_STR, typename T_LST &gt; typename T_STR::size_type levpath(const T_LST &amp;lst1, const T_LST &amp;lst2) { typename T_STR::size_type l1 = lst1.size(), l2 = lst2.size(); std::vector&lt; typename T_STR::size_type &gt; d( ( l1 + 1 ) * ( l2 + 1 ) ); typename T_STR::size_type i, j; for ( i = 0; i &lt;= l1; i++ ) d[ i * l2 ] = i; for ( i = 0; i &lt;= l2; i++ ) d[ i ] = i; for ( i = 1; i &lt;= l1; i++ ) for ( j = 1; j &lt;= l2; j++ ) d[ i * l2 + j ] = levmin( levmin( d[ ( i - 1 ) * l2 + j ] + 1, d[ i * l2 + ( j - 1 ) ] + 1 ), d[ ( i - 1 ) * l2 + ( j - 1 ) ] + levstr&lt; T, T_STR&gt;( lst1[ i - 1 ], lst2[ j - 1 ] ) ); return d[ ( l1 * l2 ) + l2 ]; } /// Generic split path function /* Returns a vector of path tokens */ template &lt; typename T, typename T_STR, typename T_LST &gt; T_LST splitpath( const T_STR &amp;s, const T sep ) { T_LST lst; typename T_STR::size_type i = 0, l = 0; while( T_STR::npos != ( i = s.find_first_of( sep, i ) ) ) { if ( l &lt; i ) lst.push_back( T_STR( s, l, i - l ) ); l = ++i; } // end while if ( l &lt; s.length() ) lst.push_back( T_STR( s, l, s.length() - l ) ); return lst; } /// Generic join path function /* Returns a string of joined vector tokens */ template &lt; typename T, typename T_STR, typename T_LST &gt; T_STR joinpath( const T_LST &amp;p, const T sep ) { T_STR s; for ( typename T_LST::const_iterator it = p.begin(); it != p.end(); it++ ) { if ( s.length() ) s += sep; s += *it; } return s; } // String types typedef char t_levchar; typedef std::basic_string&lt; t_levchar &gt; t_levstr; typedef std::vector&lt; t_levstr &gt; t_levpath; typedef std::vector&lt; t_levpath &gt; t_levpathlist; // Sort compare for split paths template&lt; typename T, typename T_STR, typename T_LST &gt; struct levcmp { levcmp( const T_LST &amp;p ) { m_p = p; } bool operator() ( const T_LST &amp;i, const T_LST &amp;j ) { return levpath&lt; T, T_STR, T_LST &gt;( i, m_p ) &lt; levpath&lt; T, T_STR, T_LST &gt;( j, m_p ); } T_LST m_p; }; // Sort compare for strings template&lt; typename T, typename T_STR &gt; struct levcmp_str { levcmp_str( const T_STR &amp;s ) { m_s = s; } bool operator() ( const T_STR &amp;i, const T_STR &amp;j ) { return levstr&lt; T, T_STR &gt;( i, m_s ) &lt; levstr&lt; T, T_STR &gt;( j, m_s ); } T_STR m_s; }; int main( int argc, char* argv[] ) { // Path to compare with const t_levchar* compare_path = "this/is/a/strange/path"; // Paths to sort const t_levchar* path_list[] = { "this/is/a/strong/path", "that/is/a/strange/path", "this/is/a/strange", "this/is", "this/is/a", "this", "this/is/a/strange/path", "is/this/a/strange/path", "that/was", "completely/different/folder", 0 }; printf( "\n----- String based Levenshtein ----\n" "Matching : %s\n\n", compare_path ); // Create vector from paths std::vector&lt; t_levstr &gt; paths; for( int i = 0; path_list[ i ]; i++ ) paths.push_back( path_list[ i ] ); // Sort the paths std::sort( paths.begin(), paths.end(), levcmp_str&lt; t_levchar, t_levstr &gt;( compare_path ) ); // Show the result for ( std::vector&lt; t_levstr &gt;::iterator it = paths.begin(); it != paths.end(); it++ ) printf( "%d : %s\n", (int)levstr&lt; t_levchar, t_levstr &gt;( *it, compare_path ), (const char*)it-&gt;c_str() ); printf( "\n----- Path based Levenshtein ----\n" "Matching : %s\n\n", compare_path ); // Create vector from split paths t_levpath splitcompare = splitpath&lt; t_levchar, t_levstr, t_levpath &gt;( compare_path, '/' ); t_levpathlist splitpaths; for ( int i = 0; path_list[ i ]; i++ ) splitpaths.push_back( splitpath&lt; t_levchar, t_levstr, t_levpath &gt;( path_list[ i ], '/' ) ); // Sort the paths std::sort( splitpaths.begin(), splitpaths.end(), levcmp&lt; t_levchar, t_levstr, t_levpath &gt;( splitcompare ) ); // Show the results for ( t_levpathlist::iterator it = splitpaths.begin(); it != splitpaths.end(); it++ ) printf( "%d : %s\n", (int)levpath&lt; t_levchar, t_levstr, t_levpath &gt;( *it, splitcompare ), (const char*)joinpath&lt; t_levchar, t_levstr, t_levpath &gt;( *it, '/' ).c_str() ); return 0; } </code></pre>
    singulars
    1. This table or related slice is empty.
    plurals
    1. This table or related slice is empty.
    1. This table or related slice is empty.
    1. This table or related slice is empty.
    1. This table or related slice is empty.
    1. VO
      singulars
      1. This table or related slice is empty.
 

Querying!

 
Guidance

SQuiL has stopped working due to an internal error.

If you are curious you may find further information in the browser console, which is accessible through the devtools (F12).

Reload