Skip to content

Commit

Permalink
improving fuzzy match
Browse files Browse the repository at this point in the history
  • Loading branch information
nticaric committed Jun 29, 2018
1 parent 8d95869 commit 8447a36
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 18 deletions.
10 changes: 8 additions & 2 deletions src/TNTFuzzyMatch.php
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ public function fuzzyMatchFromFile($pattern, $path)
if ($lines) {
while (!feof($lines)) {
$line = fgets($lines, 4096);
$line = str_replace("\r", "", $line);

This comment has been minimized.

Copy link
@staabm

staabm Jul 18, 2018

Contributor

I guess you actually want rtrim instead

This comment has been minimized.

Copy link
@nticaric

nticaric Jul 18, 2018

Author Contributor

feel free to submit a PR

$line = str_replace("\n", "", $line);
if ($this->hasCommonSubsequence($pattern, $line)) {
$res[] = $line;
}
Expand All @@ -118,7 +120,6 @@ public function fuzzyMatchFromFile($pattern, $path)
}

arsort($sorted);

return $sorted;
}

Expand All @@ -140,7 +141,12 @@ public function fuzzyMatch($pattern, $items)
$wordVector = $this->wordToVector($word);
$normalizedPaternVector = $this->makeVectorSameLength($wordVector, $paternVector);

$angle = $this->angleBetweenVectors($wordVector, $normalizedPaternVector);
$angle = $this->angleBetweenVectors($wordVector, $normalizedPaternVector);

if (strpos($word, $pattern) !== false) {
$angle += 0.2;
}

$sorted[$word] = $angle;
}

Expand Down
36 changes: 20 additions & 16 deletions tests/TNTFuzzyMatchTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -72,39 +72,43 @@ public function testFuzzyMatchFromFile()
{
$res = $this->fm->fuzzyMatchFromFile('search', __DIR__.'/_files/english_wordlist_2k.txt');

$this->assertEquals([
'search' => 1,
'research' => 0.86602345529065
], $res);
$equal = bccomp($res['search'], 1.2, 2);
$this->assertEquals(0, $equal);

$equal = bccomp($res['research'], 1.06, 2);
$this->assertEquals(0, $equal);
}

public function testFuzzyMatchFromFileFunction()
{
$res = fuzzyMatchFromFile('search', __DIR__.'/_files/english_wordlist_2k.txt');

$this->assertEquals([
'search' => 1,
'research' => 0.86602345529065
], $res);
$equal = bccomp($res['search'], 1.2, 2);
$this->assertEquals(0, $equal);

$equal = bccomp($res['research'], 1.06, 2);
$this->assertEquals(0, $equal);
}

public function testFuzzyMatch()
{
$res = $this->fm->fuzzyMatch('search', ['search', 'research', 'something']);

$this->assertEquals([
'search' => 1,
'research' => 0.86602345529065
], $res);
$equal = bccomp($res['search'], 1.2, 2);
$this->assertEquals(0, $equal);

$equal = bccomp($res['research'], 1.06, 2);
$this->assertEquals(0, $equal);
}

public function testFuzzyMatchFunction()
{
$res = fuzzyMatch('search', ['search', 'research', 'something']);

$this->assertEquals([
'search' => 1,
'research' => 0.86602345529065
], $res);
$equal = bccomp($res['search'], 1.2, 2);
$this->assertEquals(0, $equal);

$equal = bccomp($res['research'], 1.06, 2);
$this->assertEquals(0, $equal);
}
}

0 comments on commit 8447a36

Please sign in to comment.