Skip to content

Commit

Permalink
exclude the primary key from index by default
Browse files Browse the repository at this point in the history
  • Loading branch information
nticaric committed Dec 8, 2017
1 parent 053c0a0 commit b81ce82
Show file tree
Hide file tree
Showing 6 changed files with 113 additions and 55 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,14 @@ Note: If your primary key is different than `id` set it like:
$indexer->setPrimaryKey('article_id');
```

### Making the primary key searchable

By default the primary key is not searchable, if you wanna make it searchable simply run:

```php
$indexer->includePrimaryKey();
```

### Searching

Searching for a phrase or keyword is trivial
Expand Down
24 changes: 21 additions & 3 deletions src/Indexer/TNTIndexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class TNTIndexer
protected $index = null;
protected $dbh = null;
protected $primaryKey = null;
protected $excludePrimaryKey = true;
public $stemmer = null;
public $tokenizer = null;
public $filereader = null;
Expand Down Expand Up @@ -96,6 +97,16 @@ public function setPrimaryKey($primaryKey)
$this->primaryKey = $primaryKey;
}

public function excludePrimaryKey()
{
$this->excludePrimaryKey = true;
}

public function includePrimaryKey()
{
$this->excludePrimaryKey = false;
}

public function setStemmer($stemmer)
{
$this->stemmer = $stemmer;
Expand Down Expand Up @@ -311,10 +322,17 @@ public function readDocumentsFromFileSystem()

public function processDocument($row)
{
$stems = $row->map(function ($column, $name) {
return $this->stemText($column);
$documentId = $row->get($this->getPrimaryKey());

if ($this->excludePrimaryKey) {
$row->forget($this->getPrimaryKey());
}

$stems = $row->map(function ($columnContent, $columnName) use ($row) {
return $this->stemText($columnContent);
});
$this->saveToIndex($stems, $row->get($this->getPrimaryKey()));

$this->saveToIndex($stems, $documentId);
}

public function insert($document)
Expand Down
93 changes: 49 additions & 44 deletions src/Support/Collection.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,16 @@ public function __construct($items = [])
$this->items = $items;
}

/**
* @param callable $callback
*
* @return Collection
*/
public function forget($key)
{
unset($this->items[$key]);
}

/**
* @param callable $callback
*
* @return Collection
*/
public function each(callable $callback)
{
foreach ($this->items as $key => $item) {
Expand All @@ -32,11 +37,11 @@ public function each(callable $callback)
return $this;
}

/**
* @param callable|null $callback
*
* @return static
*/
/**
* @param callable|null $callback
*
* @return static
*/
public function filter(callable $callback = null)
{
if ($callback) {
Expand All @@ -54,19 +59,19 @@ public function filter(callable $callback = null)
return new static(array_filter($this->items));
}

/**
* @return bool
*/
/**
* @return bool
*/
public function isEmpty()
{
return empty($this->items);
}

/**
* @param callable $callback
*
* @return static
*/
/**
* @param callable $callback
*
* @return static
*/
public function map(callable $callback)
{
$keys = array_keys($this->items);
Expand All @@ -76,12 +81,12 @@ public function map(callable $callback)
return new static(array_combine($keys, $items));
}

/**
* @param callable $callback
* @param null $initial
*
* @return mixed
*/
/**
* @param callable $callback
* @param null $initial
*
* @return mixed
*/
public function reduce(callable $callback, $initial = null)
{
return array_reduce($this->items, $callback, $initial);
Expand All @@ -92,46 +97,46 @@ public function get($key)
return $this->items[$key];
}

/**
* @param $value
* @param null $key
*
* @return array
*/
/**
* @param $value
* @param null $key
*
* @return array
*/
public function pluck($value, $key = null)
{
return array_column($this->items, $value, $key);
}

/**
* @param $glue
*
* @return string
*/
/**
* @param $glue
*
* @return string
*/
public function implode($glue)
{
return implode($glue, $this->items);
}

/**
* @return int
*/
/**
* @return int
*/
public function count()
{
return count($this->items);
}

/**
* @return ArrayIterator
*/
/**
* @return ArrayIterator
*/
public function getIterator()
{
return new ArrayIterator($this->items);
}

/**
* @return array
*/
/**
* @return array
*/
public function toArray()
{
return $this->items;
Expand Down
39 changes: 33 additions & 6 deletions tests/TNTSearchTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ class TNTSearchTest extends PHPUnit_Framework_TestCase

protected $config = [
'driver' => 'sqlite',
'database' => __DIR__ . '/_files/articles.sqlite',
'database' => __DIR__.'/_files/articles.sqlite',
'host' => 'localhost',
'username' => 'testUser',
'password' => 'testPass',
'storage' => __DIR__ . '/_files/',
'storage' => __DIR__.'/_files/'
];

public function testLoadConfig()
Expand All @@ -35,7 +35,7 @@ public function testCreateIndex()
$indexer = $tnt->createIndex($this->indexName);

$this->assertInstanceOf('TeamTNT\TNTSearch\Indexer\TNTIndexer', $indexer);
$this->assertFileExists($indexer->getStoragePath() . $this->indexName);
$this->assertFileExists($indexer->getStoragePath().$this->indexName);
}

public function testSearchBoolean()
Expand Down Expand Up @@ -107,10 +107,37 @@ public function testTotalDocumentCountOnIndexUpdate()

//now we try with a document that does not exist, the total number should increase for 1
$index->update(1234, ['id' => '1234', 'title' => 'updated title', 'article' => 'updated article']);

$this->assertEquals(12, $tnt->totalDocumentsInCollection());
}

public function testRemovePrimaryKeyFromIndex()
{
$tnt = new TNTSearch;

$tnt->loadConfig($this->config);

$indexer = $tnt->createIndex($this->indexName);
$indexer->disableOutput = true;
$indexer->query('SELECT id, title, article FROM articles;');
$indexer->includePrimaryKey();
$indexer->run();

$tnt->selectIndex($this->indexName);
$res = $tnt->search(3);
$this->assertEquals([3], $res['ids']);

$indexer = $tnt->createIndex($this->indexName);
$indexer->disableOutput = true;
$indexer->query('SELECT id, title, article FROM articles;');
$indexer->run();

$tnt->selectIndex($this->indexName);
$res = $tnt->search(3);
$this->assertEquals([], $res['ids']);

}

public function testIndexUpdate()
{
$tnt = new TNTSearch;
Expand Down Expand Up @@ -281,8 +308,8 @@ public function testIndexDoesNotExistException()

public function tearDown()
{
if (file_exists(__DIR__ . "/" . $this->indexName)) {
unlink(__DIR__ . "/" . $this->indexName);
if (file_exists(__DIR__."/".$this->indexName)) {
unlink(__DIR__."/".$this->indexName);
}

}
Expand Down
Binary file removed tests/_files/testIndex
Binary file not shown.
4 changes: 2 additions & 2 deletions tests/classifier/TNTClassifierTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ public function testPredictSpamHam()
}

}

echo "\nSuccess rate is: ".number_format(($guessCount * 100 / $counter), 4)."%";
$precision = number_format(($guessCount * 100 / $counter), 4);
$this->assertGreaterThanOrEqual(98, $precision);
}

public function testPredictClass()
Expand Down

0 comments on commit b81ce82

Please sign in to comment.