diff --git a/src/Database/Adapter.php b/src/Database/Adapter.php index 62a8eb7fe..811539aef 100644 --- a/src/Database/Adapter.php +++ b/src/Database/Adapter.php @@ -1442,4 +1442,38 @@ public function enableAlterLocks(bool $enable): self return $this; } + + /** + * Does the adapter support trigram index? + * + * @return bool + */ + abstract public function getSupportForTrigramIndex(): bool; + + /** + * Is PCRE regex supported? + * PCRE (Perl Compatible Regular Expressions) supports \b for word boundaries + * + * @return bool + */ + abstract public function getSupportForPCRERegex(): bool; + + /** + * Is POSIX regex supported? + * POSIX regex uses \y for word boundaries instead of \b + * + * @return bool + */ + abstract public function getSupportForPOSIXRegex(): bool; + + /** + * Is regex supported at all? + * Returns true if either PCRE or POSIX regex is supported + * + * @return bool + */ + public function getSupportForRegex(): bool + { + return $this->getSupportForPCRERegex() || $this->getSupportForPOSIXRegex(); + } } diff --git a/src/Database/Adapter/MariaDB.php b/src/Database/Adapter/MariaDB.php index 2876139f7..2201ecc09 100644 --- a/src/Database/Adapter/MariaDB.php +++ b/src/Database/Adapter/MariaDB.php @@ -2230,4 +2230,19 @@ public function getSupportForAlterLocks(): bool { return true; } + + public function getSupportForTrigramIndex(): bool + { + return false; + } + + public function getSupportForPCRERegex(): bool + { + return true; + } + + public function getSupportForPOSIXRegex(): bool + { + return false; + } } diff --git a/src/Database/Adapter/Mongo.php b/src/Database/Adapter/Mongo.php index 009ad1f7c..1e0fa6930 100644 --- a/src/Database/Adapter/Mongo.php +++ b/src/Database/Adapter/Mongo.php @@ -2469,7 +2469,8 @@ protected function getQueryOperator(string $operator): string Query::TYPE_STARTS_WITH, Query::TYPE_NOT_STARTS_WITH, Query::TYPE_ENDS_WITH, - Query::TYPE_NOT_ENDS_WITH => '$regex', + Query::TYPE_NOT_ENDS_WITH, + Query::TYPE_REGEX => '$regex', Query::TYPE_OR => '$or', Query::TYPE_AND => '$and', default => throw new DatabaseException('Unknown operator:' . $operator . '. Must be one of ' . Query::TYPE_EQUAL . ', ' . Query::TYPE_NOT_EQUAL . ', ' . Query::TYPE_LESSER . ', ' . Query::TYPE_LESSER_EQUAL . ', ' . Query::TYPE_GREATER . ', ' . Query::TYPE_GREATER_EQUAL . ', ' . Query::TYPE_IS_NULL . ', ' . Query::TYPE_IS_NOT_NULL . ', ' . Query::TYPE_BETWEEN . ', ' . Query::TYPE_NOT_BETWEEN . ', ' . Query::TYPE_STARTS_WITH . ', ' . Query::TYPE_NOT_STARTS_WITH . ', ' . Query::TYPE_ENDS_WITH . ', ' . Query::TYPE_NOT_ENDS_WITH . ', ' . Query::TYPE_CONTAINS . ', ' . Query::TYPE_NOT_CONTAINS . ', ' . Query::TYPE_SEARCH . ', ' . Query::TYPE_NOT_SEARCH . ', ' . Query::TYPE_SELECT), @@ -2740,6 +2741,26 @@ public function getSupportForGetConnectionId(): bool return false; } + /** + * Is PCRE regex supported? + * + * @return bool + */ + public function getSupportForPCRERegex(): bool + { + return true; + } + + /** + * Is POSIX regex supported? + * + * @return bool + */ + public function getSupportForPOSIXRegex(): bool + { + return false; + } + /** * Is cache fallback supported? * @@ -3221,4 +3242,9 @@ public function getSupportForAlterLocks(): bool { return false; } + + public function getSupportForTrigramIndex(): bool + { + return false; + } } diff --git a/src/Database/Adapter/Pool.php b/src/Database/Adapter/Pool.php index 76c98e8b2..1e61004a9 100644 --- a/src/Database/Adapter/Pool.php +++ b/src/Database/Adapter/Pool.php @@ -365,6 +365,21 @@ public function getSupportForFulltextWildcardIndex(): bool return $this->delegate(__FUNCTION__, \func_get_args()); } + public function getSupportForPCRERegex(): bool + { + return $this->delegate(__FUNCTION__, \func_get_args()); + } + + public function getSupportForPOSIXRegex(): bool + { + return $this->delegate(__FUNCTION__, \func_get_args()); + } + + public function getSupportForTrigramIndex(): bool + { + return $this->delegate(__FUNCTION__, \func_get_args()); + } + public function getSupportForCasting(): bool { return $this->delegate(__FUNCTION__, \func_get_args()); diff --git a/src/Database/Adapter/Postgres.php b/src/Database/Adapter/Postgres.php index 86da09a58..2d5b9ff3b 100644 --- a/src/Database/Adapter/Postgres.php +++ b/src/Database/Adapter/Postgres.php @@ -154,6 +154,7 @@ public function create(string $name): bool // Enable extensions $this->getPDO()->prepare('CREATE EXTENSION IF NOT EXISTS postgis')->execute(); $this->getPDO()->prepare('CREATE EXTENSION IF NOT EXISTS vector')->execute(); + $this->getPDO()->prepare('CREATE EXTENSION IF NOT EXISTS pg_trgm')->execute(); $collation = " CREATE COLLATION IF NOT EXISTS utf8_ci_ai ( @@ -899,9 +900,10 @@ public function createIndex(string $collection, string $id, string $type, array Database::INDEX_SPATIAL, Database::INDEX_HNSW_EUCLIDEAN, Database::INDEX_HNSW_COSINE, - Database::INDEX_HNSW_DOT => 'INDEX', + Database::INDEX_HNSW_DOT, + Database::INDEX_OBJECT, + Database::INDEX_TRIGRAM => 'INDEX', Database::INDEX_UNIQUE => 'UNIQUE INDEX', - Database::INDEX_OBJECT => 'INDEX', default => throw new DatabaseException('Unknown index type: ' . $type . '. Must be one of ' . Database::INDEX_KEY . ', ' . Database::INDEX_UNIQUE . ', ' . Database::INDEX_FULLTEXT . ', ' . Database::INDEX_SPATIAL . ', ' . Database::INDEX_OBJECT . ', ' . Database::INDEX_HNSW_EUCLIDEAN . ', ' . Database::INDEX_HNSW_COSINE . ', ' . Database::INDEX_HNSW_DOT), }; @@ -922,6 +924,11 @@ public function createIndex(string $collection, string $id, string $type, array Database::INDEX_HNSW_COSINE => " USING HNSW ({$attributes} vector_cosine_ops)", Database::INDEX_HNSW_DOT => " USING HNSW ({$attributes} vector_ip_ops)", Database::INDEX_OBJECT => " USING GIN ({$attributes})", + Database::INDEX_TRIGRAM => + " USING GIN (" . implode(', ', array_map( + fn ($a) => "$a gin_trgm_ops", + array_map('trim', explode(',', $attributes)) + )) . ")", default => " ({$attributes})", }; @@ -2112,6 +2119,21 @@ public function getSupportForVectors(): bool return true; } + public function getSupportForPCRERegex(): bool + { + return false; + } + + public function getSupportForPOSIXRegex(): bool + { + return true; + } + + public function getSupportForTrigramIndex(): bool + { + return true; + } + /** * @return string */ @@ -2120,6 +2142,14 @@ public function getLikeOperator(): string return 'ILIKE'; } + /** + * @return string + */ + public function getRegexOperator(): string + { + return '~'; + } + protected function processException(PDOException $e): \Exception { // Timeout diff --git a/src/Database/Adapter/SQL.php b/src/Database/Adapter/SQL.php index 4bd0bb653..bac50e476 100644 --- a/src/Database/Adapter/SQL.php +++ b/src/Database/Adapter/SQL.php @@ -1794,6 +1794,8 @@ protected function getSQLOperator(string $method): string case Query::TYPE_NOT_ENDS_WITH: case Query::TYPE_NOT_CONTAINS: return $this->getLikeOperator(); + case Query::TYPE_REGEX: + return $this->getRegexOperator(); case Query::TYPE_VECTOR_DOT: case Query::TYPE_VECTOR_COSINE: case Query::TYPE_VECTOR_EUCLIDEAN: @@ -2284,6 +2286,14 @@ public function getLikeOperator(): string return 'LIKE'; } + /** + * @return string + */ + public function getRegexOperator(): string + { + return 'REGEXP'; + } + public function getInternalIndexesKeys(): array { return []; diff --git a/src/Database/Adapter/SQLite.php b/src/Database/Adapter/SQLite.php index a3d31db68..6d00bb90a 100644 --- a/src/Database/Adapter/SQLite.php +++ b/src/Database/Adapter/SQLite.php @@ -1876,4 +1876,26 @@ public function getSupportForAlterLocks(): bool { return false; } + + /** + * Is PCRE regex supported? + * SQLite does not have native REGEXP support - it requires compile-time option or user-defined function + * + * @return bool + */ + public function getSupportForPCRERegex(): bool + { + return false; + } + + /** + * Is POSIX regex supported? + * SQLite does not have native REGEXP support - it requires compile-time option or user-defined function + * + * @return bool + */ + public function getSupportForPOSIXRegex(): bool + { + return false; + } } diff --git a/src/Database/Database.php b/src/Database/Database.php index d5595df38..cdd4491e1 100644 --- a/src/Database/Database.php +++ b/src/Database/Database.php @@ -85,6 +85,7 @@ class Database public const INDEX_HNSW_EUCLIDEAN = 'hnsw_euclidean'; public const INDEX_HNSW_COSINE = 'hnsw_cosine'; public const INDEX_HNSW_DOT = 'hnsw_dot'; + public const INDEX_TRIGRAM = 'trigram'; // Max limits public const MAX_INT = 2147483647; @@ -1641,6 +1642,7 @@ public function createCollection(string $id, array $attributes = [], array $inde $this->adapter->getSupportForMultipleFulltextIndexes(), $this->adapter->getSupportForIdenticalIndexes(), $this->adapter->getSupportForObject(), + $this->adapter->getSupportForTrigramIndex(), ); foreach ($indexes as $index) { if (!$validator->isValid($index)) { @@ -2785,7 +2787,8 @@ public function updateAttribute(string $collection, string $id, ?string $type = $this->adapter->getSupportForAttributes(), $this->adapter->getSupportForMultipleFulltextIndexes(), $this->adapter->getSupportForIdenticalIndexes(), - $this->adapter->getSupportForObject() + $this->adapter->getSupportForObject(), + $this->adapter->getSupportForTrigramIndex() ); foreach ($indexes as $index) { @@ -3665,8 +3668,14 @@ public function createIndex(string $collection, string $id, string $type, array } break; + case self::INDEX_TRIGRAM: + if (!$this->adapter->getSupportForTrigramIndex()) { + throw new DatabaseException('Trigram indexes are not supported'); + } + break; + default: - throw new DatabaseException('Unknown index type: ' . $type . '. Must be one of ' . Database::INDEX_KEY . ', ' . Database::INDEX_UNIQUE . ', ' . Database::INDEX_FULLTEXT . ', ' . Database::INDEX_SPATIAL . ', ' . Database::INDEX_OBJECT . ', ' . Database::INDEX_HNSW_EUCLIDEAN . ', ' . Database::INDEX_HNSW_COSINE . ', ' . Database::INDEX_HNSW_DOT); + throw new DatabaseException('Unknown index type: ' . $type . '. Must be one of ' . Database::INDEX_KEY . ', ' . Database::INDEX_UNIQUE . ', ' . Database::INDEX_FULLTEXT . ', ' . Database::INDEX_SPATIAL . ', ' . Database::INDEX_OBJECT . ', ' . Database::INDEX_HNSW_EUCLIDEAN . ', ' . Database::INDEX_HNSW_COSINE . ', ' . Database::INDEX_HNSW_DOT . ', '.Database::INDEX_TRIGRAM); } /** @var array $collectionAttributes */ @@ -3722,6 +3731,7 @@ public function createIndex(string $collection, string $id, string $type, array $this->adapter->getSupportForMultipleFulltextIndexes(), $this->adapter->getSupportForIdenticalIndexes(), $this->adapter->getSupportForObject(), + $this->adapter->getSupportForTrigramIndex(), ); if (!$validator->isValid($index)) { throw new IndexException($validator->getDescription()); diff --git a/src/Database/Query.php b/src/Database/Query.php index 60ec1d712..1c77439d3 100644 --- a/src/Database/Query.php +++ b/src/Database/Query.php @@ -26,6 +26,7 @@ class Query public const TYPE_NOT_STARTS_WITH = 'notStartsWith'; public const TYPE_ENDS_WITH = 'endsWith'; public const TYPE_NOT_ENDS_WITH = 'notEndsWith'; + public const TYPE_REGEX = 'regex'; // Spatial methods public const TYPE_CROSSES = 'crosses'; @@ -109,6 +110,7 @@ class Query self::TYPE_CURSOR_BEFORE, self::TYPE_AND, self::TYPE_OR, + self::TYPE_REGEX ]; public const VECTOR_TYPES = [ @@ -1178,4 +1180,16 @@ public static function vectorEuclidean(string $attribute, array $vector): self { return new self(self::TYPE_VECTOR_EUCLIDEAN, $attribute, [$vector]); } + + /** + * Helper method to create Query with regex method + * + * @param string $attribute + * @param string $pattern + * @return Query + */ + public static function regex(string $attribute, string $pattern): self + { + return new self(self::TYPE_REGEX, $attribute, [$pattern]); + } } diff --git a/src/Database/Validator/Index.php b/src/Database/Validator/Index.php index 33648feeb..8d6ca22c5 100644 --- a/src/Database/Validator/Index.php +++ b/src/Database/Validator/Index.php @@ -29,6 +29,7 @@ class Index extends Validator * @param bool $supportForMultipleFulltextIndexes * @param bool $supportForIdenticalIndexes * @param bool $supportForObjectIndexes + * @param bool $supportForTrigramIndexes * @throws DatabaseException */ public function __construct( @@ -43,7 +44,8 @@ public function __construct( protected bool $supportForAttributes = true, protected bool $supportForMultipleFulltextIndexes = true, protected bool $supportForIdenticalIndexes = true, - protected bool $supportForObjectIndexes = false + protected bool $supportForObjectIndexes = false, + protected bool $supportForTrigramIndexes = false ) { foreach ($attributes as $attribute) { $key = \strtolower($attribute->getAttribute('key', $attribute->getAttribute('$id'))); @@ -137,6 +139,9 @@ public function isValid($value): bool if (!$this->checkObjectIndexes($value)) { return false; } + if (!$this->checkTrigramIndexes($value)) { + return false; + } return true; } @@ -462,6 +467,44 @@ public function checkVectorIndexes(Document $index): bool return true; } + /** + * @param Document $index + * @return bool + * @throws DatabaseException + */ + public function checkTrigramIndexes(Document $index): bool + { + $type = $index->getAttribute('type'); + + if ($type !== Database::INDEX_TRIGRAM) { + return true; + } + + if ($this->supportForTrigramIndexes === false) { + $this->message = 'Trigram indexes are not supported'; + return false; + } + + $attributes = $index->getAttribute('attributes', []); + + foreach ($attributes as $attributeName) { + $attribute = $this->attributes[\strtolower($attributeName)] ?? new Document(); + if ($attribute->getAttribute('type', '') !== Database::VAR_STRING) { + $this->message = 'Trigram index can only be created on string type attributes'; + return false; + } + } + + $orders = $index->getAttribute('orders', []); + $lengths = $index->getAttribute('lengths', []); + if (!empty($orders) || \count(\array_filter($lengths)) > 0) { + $this->message = 'Trigram indexes do not support orders or lengths'; + return false; + } + + return true; + } + /** * @param Document $index * @return bool diff --git a/src/Database/Validator/Queries.php b/src/Database/Validator/Queries.php index 8066228e3..97b3f5824 100644 --- a/src/Database/Validator/Queries.php +++ b/src/Database/Validator/Queries.php @@ -121,7 +121,8 @@ public function isValid($value): bool Query::TYPE_NOT_TOUCHES, Query::TYPE_VECTOR_DOT, Query::TYPE_VECTOR_COSINE, - Query::TYPE_VECTOR_EUCLIDEAN => Base::METHOD_TYPE_FILTER, + Query::TYPE_VECTOR_EUCLIDEAN, + Query::TYPE_REGEX => Base::METHOD_TYPE_FILTER, default => '', }; diff --git a/src/Database/Validator/Query/Filter.php b/src/Database/Validator/Query/Filter.php index 11053f14c..4c47872a8 100644 --- a/src/Database/Validator/Query/Filter.php +++ b/src/Database/Validator/Query/Filter.php @@ -334,6 +334,7 @@ public function isValid($value): bool case Query::TYPE_NOT_STARTS_WITH: case Query::TYPE_ENDS_WITH: case Query::TYPE_NOT_ENDS_WITH: + case Query::TYPE_REGEX: if (count($value->getValues()) != 1) { $this->message = \ucfirst($method) . ' queries require exactly one value.'; return false; diff --git a/tests/e2e/Adapter/Scopes/DocumentTests.php b/tests/e2e/Adapter/Scopes/DocumentTests.php index 151a5ae26..a70bc39f4 100644 --- a/tests/e2e/Adapter/Scopes/DocumentTests.php +++ b/tests/e2e/Adapter/Scopes/DocumentTests.php @@ -6549,4 +6549,487 @@ public function testUpsertWithJSONFilters(): void // Cleanup $database->deleteCollection($collection); } + + public function testFindRegex(): void + { + Authorization::setRole(Role::any()->toString()); + + /** @var Database $database */ + $database = static::getDatabase(); + + // Skip test if regex is not supported + if (!$database->getAdapter()->getSupportForRegex()) { + $this->expectNotToPerformAssertions(); + return; + } + + // Determine regex support type + $supportsPCRE = $database->getAdapter()->getSupportForPCRERegex(); + $supportsPOSIX = $database->getAdapter()->getSupportForPOSIXRegex(); + + // Determine word boundary pattern based on support + $wordBoundaryPattern = null; + $wordBoundaryPatternPHP = null; + if ($supportsPCRE) { + $wordBoundaryPattern = '\\b'; // PCRE uses \b + $wordBoundaryPatternPHP = '\\b'; // PHP preg_match uses \b + } elseif ($supportsPOSIX) { + $wordBoundaryPattern = '\\y'; // POSIX uses \y + $wordBoundaryPatternPHP = '\\b'; // PHP preg_match still uses \b for verification + } + + $database->createCollection('moviesRegex', permissions: [ + Permission::create(Role::any()), + Permission::read(Role::any()), + Permission::update(Role::any()), + Permission::delete(Role::any()), + ]); + + if ($database->getAdapter()->getSupportForAttributes()) { + $this->assertEquals(true, $database->createAttribute('moviesRegex', 'name', Database::VAR_STRING, 128, true)); + $this->assertEquals(true, $database->createAttribute('moviesRegex', 'director', Database::VAR_STRING, 128, true)); + $this->assertEquals(true, $database->createAttribute('moviesRegex', 'year', Database::VAR_INTEGER, 0, true)); + } + + if ($database->getAdapter()->getSupportForTrigramIndex()) { + $database->createIndex('moviesRegex', 'trigram_name', Database::INDEX_TRIGRAM, ['name']); + $database->createIndex('moviesRegex', 'trigram_director', Database::INDEX_TRIGRAM, ['director']); + } + + // Create test documents + $database->createDocument('moviesRegex', new Document([ + '$permissions' => [ + Permission::read(Role::any()), + Permission::create(Role::any()), + Permission::update(Role::any()), + Permission::delete(Role::any()), + ], + 'name' => 'Frozen', + 'director' => 'Chris Buck & Jennifer Lee', + 'year' => 2013, + ])); + + $database->createDocument('moviesRegex', new Document([ + '$permissions' => [ + Permission::read(Role::any()), + Permission::create(Role::any()), + Permission::update(Role::any()), + Permission::delete(Role::any()), + ], + 'name' => 'Frozen II', + 'director' => 'Chris Buck & Jennifer Lee', + 'year' => 2019, + ])); + + $database->createDocument('moviesRegex', new Document([ + '$permissions' => [ + Permission::read(Role::any()), + Permission::create(Role::any()), + Permission::update(Role::any()), + Permission::delete(Role::any()), + ], + 'name' => 'Captain America: The First Avenger', + 'director' => 'Joe Johnston', + 'year' => 2011, + ])); + + $database->createDocument('moviesRegex', new Document([ + '$permissions' => [ + Permission::read(Role::any()), + Permission::create(Role::any()), + Permission::update(Role::any()), + Permission::delete(Role::any()), + ], + 'name' => 'Captain Marvel', + 'director' => 'Anna Boden & Ryan Fleck', + 'year' => 2019, + ])); + + $database->createDocument('moviesRegex', new Document([ + '$permissions' => [ + Permission::read(Role::any()), + Permission::create(Role::any()), + Permission::update(Role::any()), + Permission::delete(Role::any()), + ], + 'name' => 'Work in Progress', + 'director' => 'TBD', + 'year' => 2025, + ])); + + $database->createDocument('moviesRegex', new Document([ + '$permissions' => [ + Permission::read(Role::any()), + Permission::create(Role::any()), + Permission::update(Role::any()), + Permission::delete(Role::any()), + ], + 'name' => 'Work in Progress 2', + 'director' => 'TBD', + 'year' => 2026, + ])); + + // Helper function to verify regex query completeness + $verifyRegexQuery = function (string $attribute, string $regexPattern, array $queryResults) use ($database) { + // Convert regex pattern to PHP regex format + $phpPattern = '/' . str_replace('/', '\/', $regexPattern) . '/'; + + // Get all documents to manually verify + $allDocuments = $database->find('moviesRegex'); + + // Manually filter documents that match the pattern + $expectedMatches = []; + foreach ($allDocuments as $doc) { + $value = $doc->getAttribute($attribute); + if (preg_match($phpPattern, $value)) { + $expectedMatches[] = $doc->getId(); + } + } + + // Get IDs from query results + $actualMatches = array_map(fn ($doc) => $doc->getId(), $queryResults); + + // Verify no extra documents are returned + foreach ($queryResults as $doc) { + $value = $doc->getAttribute($attribute); + $this->assertTrue( + (bool) preg_match($phpPattern, $value), + "Document '{$doc->getId()}' with {$attribute}='{$value}' should match pattern '{$regexPattern}'" + ); + } + + // Verify all expected documents are returned (no missing) + sort($expectedMatches); + sort($actualMatches); + $this->assertEquals( + $expectedMatches, + $actualMatches, + "Query should return exactly the documents matching pattern '{$regexPattern}' on attribute '{$attribute}'" + ); + }; + + // Test basic regex pattern - match movies starting with 'Captain' + // Note: Pattern format may vary by adapter (MongoDB uses regex strings, SQL uses REGEXP) + $pattern = '/^Captain/'; + $documents = $database->find('moviesRegex', [ + Query::regex('name', '^Captain'), + ]); + + // Verify completeness: all matching documents returned, no extra documents + $verifyRegexQuery('name', '^Captain', $documents); + + // Verify expected documents are included + $names = array_map(fn ($doc) => $doc->getAttribute('name'), $documents); + $this->assertTrue(in_array('Captain America: The First Avenger', $names)); + $this->assertTrue(in_array('Captain Marvel', $names)); + + // Test regex pattern - match movies containing 'Frozen' + $pattern = '/Frozen/'; + $documents = $database->find('moviesRegex', [ + Query::regex('name', 'Frozen'), + ]); + + // Verify completeness: all matching documents returned, no extra documents + $verifyRegexQuery('name', 'Frozen', $documents); + + // Verify expected documents are included + $names = array_map(fn ($doc) => $doc->getAttribute('name'), $documents); + $this->assertTrue(in_array('Frozen', $names)); + $this->assertTrue(in_array('Frozen II', $names)); + + // Test regex pattern - match movies ending with 'Marvel' + $pattern = '/Marvel$/'; + $documents = $database->find('moviesRegex', [ + Query::regex('name', 'Marvel$'), + ]); + + // Verify completeness: all matching documents returned, no extra documents + $verifyRegexQuery('name', 'Marvel$', $documents); + + $this->assertEquals(1, count($documents)); // Only Captain Marvel + $this->assertEquals('Captain Marvel', $documents[0]->getAttribute('name')); + + // Test regex pattern - match movies with 'Work' in the name + $pattern = '/.*Work.*/'; + $documents = $database->find('moviesRegex', [ + Query::regex('name', '.*Work.*'), + ]); + + // Verify completeness: all matching documents returned, no extra documents + $verifyRegexQuery('name', '.*Work.*', $documents); + + // Verify expected documents are included + $names = array_map(fn ($doc) => $doc->getAttribute('name'), $documents); + $this->assertTrue(in_array('Work in Progress', $names)); + $this->assertTrue(in_array('Work in Progress 2', $names)); + + // Test regex pattern - match movies with 'Buck' in director + $pattern = '/.*Buck.*/'; + $documents = $database->find('moviesRegex', [ + Query::regex('director', '.*Buck.*'), + ]); + + // Verify completeness: all matching documents returned, no extra documents + $verifyRegexQuery('director', '.*Buck.*', $documents); + + // Verify expected documents are included + $names = array_map(fn ($doc) => $doc->getAttribute('name'), $documents); + $this->assertTrue(in_array('Frozen', $names)); + $this->assertTrue(in_array('Frozen II', $names)); + + // Test regex with case pattern - adapters may be case-sensitive or case-insensitive + // MySQL/MariaDB REGEXP is case-insensitive by default, MongoDB is case-sensitive + $patternCaseSensitive = '/captain/'; + $patternCaseInsensitive = '/captain/i'; + $documents = $database->find('moviesRegex', [ + Query::regex('name', 'captain'), // lowercase + ]); + + // Verify all returned documents match the pattern (case-insensitive check for verification) + foreach ($documents as $doc) { + $name = $doc->getAttribute('name'); + // Verify that returned documents contain 'captain' (case-insensitive check) + $this->assertTrue( + (bool) preg_match($patternCaseInsensitive, $name), + "Document '{$name}' should match pattern 'captain' (case-insensitive check)" + ); + } + + // Verify completeness: Check what the database actually returns + // Some adapters (MongoDB) are case-sensitive, others (MySQL/MariaDB) are case-insensitive + // We'll determine expected matches based on case-sensitive matching (pure regex behavior) + // If the adapter is case-insensitive, it will return more documents, which is fine + $allDocuments = $database->find('moviesRegex'); + $expectedMatchesCaseSensitive = []; + $expectedMatchesCaseInsensitive = []; + foreach ($allDocuments as $doc) { + $name = $doc->getAttribute('name'); + if (preg_match($patternCaseSensitive, $name)) { + $expectedMatchesCaseSensitive[] = $doc->getId(); + } + if (preg_match($patternCaseInsensitive, $name)) { + $expectedMatchesCaseInsensitive[] = $doc->getId(); + } + } + + $actualMatches = array_map(fn ($doc) => $doc->getId(), $documents); + sort($actualMatches); + + // The database might be case-sensitive (MongoDB) or case-insensitive (MySQL/MariaDB) + // Check which one matches the actual results + sort($expectedMatchesCaseSensitive); + sort($expectedMatchesCaseInsensitive); + + // Verify that actual results match either case-sensitive or case-insensitive expectations + $matchesCaseSensitive = ($expectedMatchesCaseSensitive === $actualMatches); + $matchesCaseInsensitive = ($expectedMatchesCaseInsensitive === $actualMatches); + + $this->assertTrue( + $matchesCaseSensitive || $matchesCaseInsensitive, + "Query results should match either case-sensitive (" . count($expectedMatchesCaseSensitive) . " docs) or case-insensitive (" . count($expectedMatchesCaseInsensitive) . " docs) expectations. Got " . count($actualMatches) . " documents." + ); + + // Test regex with case-insensitive pattern (if adapter supports it via flags) + // Test with uppercase to verify case sensitivity + $pattern = '/Captain/'; + $documents = $database->find('moviesRegex', [ + Query::regex('name', 'Captain'), // uppercase + ]); + + // Verify all returned documents match the pattern + foreach ($documents as $doc) { + $name = $doc->getAttribute('name'); + $this->assertTrue( + (bool) preg_match($pattern, $name), + "Document '{$name}' should match pattern 'Captain'" + ); + } + + // Verify completeness + $allDocuments = $database->find('moviesRegex'); + $expectedMatches = []; + foreach ($allDocuments as $doc) { + $name = $doc->getAttribute('name'); + if (preg_match($pattern, $name)) { + $expectedMatches[] = $doc->getId(); + } + } + $actualMatches = array_map(fn ($doc) => $doc->getId(), $documents); + sort($expectedMatches); + sort($actualMatches); + $this->assertEquals( + $expectedMatches, + $actualMatches, + "Query should return exactly the documents matching pattern 'Captain'" + ); + + // Test regex combined with other queries + $pattern = '/^Captain/'; + $documents = $database->find('moviesRegex', [ + Query::regex('name', '^Captain'), + Query::greaterThan('year', 2010), + ]); + + // Verify all returned documents match both conditions + foreach ($documents as $doc) { + $name = $doc->getAttribute('name'); + $year = $doc->getAttribute('year'); + $this->assertTrue( + (bool) preg_match($pattern, $name), + "Document '{$name}' should match pattern '{$pattern}'" + ); + $this->assertGreaterThan(2010, $year, "Document '{$name}' should have year > 2010"); + } + + // Verify completeness: manually check all documents that match both conditions + $allDocuments = $database->find('moviesRegex'); + $expectedMatches = []; + foreach ($allDocuments as $doc) { + $name = $doc->getAttribute('name'); + $year = $doc->getAttribute('year'); + if (preg_match($pattern, $name) && $year > 2010) { + $expectedMatches[] = $doc->getId(); + } + } + $actualMatches = array_map(fn ($doc) => $doc->getId(), $documents); + sort($expectedMatches); + sort($actualMatches); + $this->assertEquals( + $expectedMatches, + $actualMatches, + "Query should return exactly the documents matching both regex '^Captain' and year > 2010" + ); + + // Test regex with limit + $pattern = '/.*/'; + $documents = $database->find('moviesRegex', [ + Query::regex('name', '.*'), // Match all + Query::limit(3), + ]); + + $this->assertEquals(3, count($documents)); + + // Verify all returned documents match the pattern (should match all) + foreach ($documents as $doc) { + $name = $doc->getAttribute('name'); + $this->assertTrue( + (bool) preg_match($pattern, $name), + "Document '{$name}' should match pattern '{$pattern}'" + ); + } + + // Note: With limit, we can't verify completeness, but we can verify all returned match + + // Test regex with non-matching pattern + $pattern = '/^NonExistentPattern$/'; + $documents = $database->find('moviesRegex', [ + Query::regex('name', '^NonExistentPattern$'), + ]); + + $this->assertEquals(0, count($documents)); + + // Verify no documents match (double-check by getting all and filtering) + $allDocuments = $database->find('moviesRegex'); + $matchingCount = 0; + foreach ($allDocuments as $doc) { + $name = $doc->getAttribute('name'); + if (preg_match($pattern, $name)) { + $matchingCount++; + } + } + $this->assertEquals(0, $matchingCount, "No documents should match pattern '{$pattern}'"); + + // Verify completeness: no documents should be returned + $this->assertEquals([], array_map(fn ($doc) => $doc->getId(), $documents)); + + // Test regex with special characters (should be escaped or handled properly) + $pattern = '/.*:.*/'; + $documents = $database->find('moviesRegex', [ + Query::regex('name', '.*:.*'), // Match movies with colon + ]); + + // Verify completeness: all matching documents returned, no extra documents + $verifyRegexQuery('name', '.*:.*', $documents); + + // Verify expected document is included + $names = array_map(fn ($doc) => $doc->getAttribute('name'), $documents); + $this->assertTrue(in_array('Captain America: The First Avenger', $names)); + + // Test regex search pattern - match movies with word boundaries + // Only test if word boundaries are supported (PCRE or POSIX) + if ($wordBoundaryPattern !== null) { + $dbPattern = $wordBoundaryPattern . 'Work' . $wordBoundaryPattern; + $phpPattern = '/' . $wordBoundaryPatternPHP . 'Work' . $wordBoundaryPatternPHP . '/'; + $documents = $database->find('moviesRegex', [ + Query::regex('name', $dbPattern), + ]); + + // Verify all returned documents match the pattern + foreach ($documents as $doc) { + $name = $doc->getAttribute('name'); + $this->assertTrue( + (bool) preg_match($phpPattern, $name), + "Document '{$name}' should match pattern '{$dbPattern}'" + ); + } + + // Verify completeness: manually check all documents + $allDocuments = $database->find('moviesRegex'); + $expectedMatches = []; + foreach ($allDocuments as $doc) { + $name = $doc->getAttribute('name'); + if (preg_match($phpPattern, $name)) { + $expectedMatches[] = $doc->getId(); + } + } + $actualMatches = array_map(fn ($doc) => $doc->getId(), $documents); + sort($expectedMatches); + sort($actualMatches); + $this->assertEquals( + $expectedMatches, + $actualMatches, + "Query should return exactly the documents matching pattern '{$dbPattern}'" + ); + } + + // Test regex search with multiple patterns - match movies containing 'Captain' or 'Frozen' + $pattern1 = '/Captain/'; + $pattern2 = '/Frozen/'; + $documents = $database->find('moviesRegex', [ + Query::or([ + Query::regex('name', 'Captain'), + Query::regex('name', 'Frozen'), + ]), + ]); + + // Verify all returned documents match at least one pattern + foreach ($documents as $doc) { + $name = $doc->getAttribute('name'); + $matchesPattern1 = (bool) preg_match($pattern1, $name); + $matchesPattern2 = (bool) preg_match($pattern2, $name); + $this->assertTrue( + $matchesPattern1 || $matchesPattern2, + "Document '{$name}' should match either pattern 'Captain' or 'Frozen'" + ); + } + + // Verify completeness: manually check all documents + $allDocuments = $database->find('moviesRegex'); + $expectedMatches = []; + foreach ($allDocuments as $doc) { + $name = $doc->getAttribute('name'); + if (preg_match($pattern1, $name) || preg_match($pattern2, $name)) { + $expectedMatches[] = $doc->getId(); + } + } + $actualMatches = array_map(fn ($doc) => $doc->getId(), $documents); + sort($expectedMatches); + sort($actualMatches); + $this->assertEquals( + $expectedMatches, + $actualMatches, + "Query should return exactly the documents matching pattern 'Captain' OR 'Frozen'" + ); + $database->deleteCollection('moviesRegex'); + } } diff --git a/tests/e2e/Adapter/Scopes/IndexTests.php b/tests/e2e/Adapter/Scopes/IndexTests.php index 77f276cd6..ef700658f 100644 --- a/tests/e2e/Adapter/Scopes/IndexTests.php +++ b/tests/e2e/Adapter/Scopes/IndexTests.php @@ -173,7 +173,9 @@ public function testIndexValidation(): void $database->getAdapter()->getSupportForVectors(), $database->getAdapter()->getSupportForAttributes(), $database->getAdapter()->getSupportForMultipleFulltextIndexes(), - $database->getAdapter()->getSupportForIdenticalIndexes() + $database->getAdapter()->getSupportForIdenticalIndexes(), + false, + $database->getAdapter()->getSupportForTrigramIndex() ); if ($database->getAdapter()->getSupportForIdenticalIndexes()) { $errorMessage = 'Index length 701 is larger than the size for title1: 700"'; @@ -264,7 +266,9 @@ public function testIndexValidation(): void $database->getAdapter()->getSupportForVectors(), $database->getAdapter()->getSupportForAttributes(), $database->getAdapter()->getSupportForMultipleFulltextIndexes(), - $database->getAdapter()->getSupportForIdenticalIndexes() + $database->getAdapter()->getSupportForIdenticalIndexes(), + false, + $database->getAdapter()->getSupportForTrigramIndex() ); $this->assertFalse($validator->isValid($newIndex)); @@ -644,4 +648,126 @@ public function testIdenticalIndexValidation(): void $database->deleteCollection($collectionId); } } + + public function testTrigramIndex(): void + { + $trigramSupport = $this->getDatabase()->getAdapter()->getSupportForTrigramIndex(); + if (!$trigramSupport) { + $this->expectNotToPerformAssertions(); + return; + } + + /** @var Database $database */ + $database = static::getDatabase(); + + $collectionId = 'trigram_test'; + try { + $database->createCollection($collectionId); + + $database->createAttribute($collectionId, 'name', Database::VAR_STRING, 256, false); + $database->createAttribute($collectionId, 'description', Database::VAR_STRING, 512, false); + + // Create trigram index on name attribute + $this->assertEquals(true, $database->createIndex($collectionId, 'trigram_name', Database::INDEX_TRIGRAM, ['name'])); + + $collection = $database->getCollection($collectionId); + $indexes = $collection->getAttribute('indexes'); + $this->assertCount(1, $indexes); + $this->assertEquals('trigram_name', $indexes[0]['$id']); + $this->assertEquals(Database::INDEX_TRIGRAM, $indexes[0]['type']); + $this->assertEquals(['name'], $indexes[0]['attributes']); + + // Create another trigram index on description + $this->assertEquals(true, $database->createIndex($collectionId, 'trigram_description', Database::INDEX_TRIGRAM, ['description'])); + + $collection = $database->getCollection($collectionId); + $indexes = $collection->getAttribute('indexes'); + $this->assertCount(2, $indexes); + + // Test that trigram index can be deleted + $this->assertEquals(true, $database->deleteIndex($collectionId, 'trigram_name')); + $this->assertEquals(true, $database->deleteIndex($collectionId, 'trigram_description')); + + $collection = $database->getCollection($collectionId); + $indexes = $collection->getAttribute('indexes'); + $this->assertCount(0, $indexes); + + } finally { + // Clean up + $database->deleteCollection($collectionId); + } + } + + public function testTrigramIndexValidation(): void + { + $trigramSupport = $this->getDatabase()->getAdapter()->getSupportForTrigramIndex(); + if (!$trigramSupport) { + $this->expectNotToPerformAssertions(); + return; + } + + /** @var Database $database */ + $database = static::getDatabase(); + + $collectionId = 'trigram_validation_test'; + try { + $database->createCollection($collectionId); + + $database->createAttribute($collectionId, 'name', Database::VAR_STRING, 256, false); + $database->createAttribute($collectionId, 'description', Database::VAR_STRING, 412, false); + $database->createAttribute($collectionId, 'age', Database::VAR_INTEGER, 8, false); + + // Test: Trigram index on non-string attribute should fail + try { + $database->createIndex($collectionId, 'trigram_invalid', Database::INDEX_TRIGRAM, ['age']); + $this->fail('Expected exception when creating trigram index on non-string attribute'); + } catch (Exception $e) { + $this->assertStringContainsString('Trigram index can only be created on string type attributes', $e->getMessage()); + } + + // Test: Trigram index with multiple string attributes should succeed + $this->assertEquals(true, $database->createIndex($collectionId, 'trigram_multi', Database::INDEX_TRIGRAM, ['name', 'description'])); + + $collection = $database->getCollection($collectionId); + $indexes = $collection->getAttribute('indexes'); + $trigramMultiIndex = null; + foreach ($indexes as $idx) { + if ($idx['$id'] === 'trigram_multi') { + $trigramMultiIndex = $idx; + break; + } + } + $this->assertNotNull($trigramMultiIndex); + $this->assertEquals(Database::INDEX_TRIGRAM, $trigramMultiIndex['type']); + $this->assertEquals(['name', 'description'], $trigramMultiIndex['attributes']); + + // Test: Trigram index with mixed string and non-string attributes should fail + try { + $database->createIndex($collectionId, 'trigram_mixed', Database::INDEX_TRIGRAM, ['name', 'age']); + $this->fail('Expected exception when creating trigram index with mixed attribute types'); + } catch (Exception $e) { + $this->assertStringContainsString('Trigram index can only be created on string type attributes', $e->getMessage()); + } + + // Test: Trigram index with orders should fail + try { + $database->createIndex($collectionId, 'trigram_order', Database::INDEX_TRIGRAM, ['name'], [], [Database::ORDER_ASC]); + $this->fail('Expected exception when creating trigram index with orders'); + } catch (Exception $e) { + $this->assertStringContainsString('Trigram indexes do not support orders or lengths', $e->getMessage()); + } + + // Test: Trigram index with lengths should fail + try { + $database->createIndex($collectionId, 'trigram_length', Database::INDEX_TRIGRAM, ['name'], [128]); + $this->fail('Expected exception when creating trigram index with lengths'); + } catch (Exception $e) { + $this->assertStringContainsString('Trigram indexes do not support orders or lengths', $e->getMessage()); + } + + } finally { + // Clean up + $database->deleteCollection($collectionId); + } + } } diff --git a/tests/unit/Validator/IndexTest.php b/tests/unit/Validator/IndexTest.php index 608a65d2b..5dfe80e4e 100644 --- a/tests/unit/Validator/IndexTest.php +++ b/tests/unit/Validator/IndexTest.php @@ -477,4 +477,123 @@ public function testIndexWithNoAttributeSupport(): void $index = $collection->getAttribute('indexes')[0]; $this->assertTrue($validator->isValid($index)); } + + /** + * @throws Exception + */ + public function testTrigramIndexValidation(): void + { + $collection = new Document([ + '$id' => ID::custom('test'), + 'name' => 'test', + 'attributes' => [ + new Document([ + '$id' => ID::custom('name'), + 'type' => Database::VAR_STRING, + 'format' => '', + 'size' => 255, + 'signed' => true, + 'required' => false, + 'default' => null, + 'array' => false, + 'filters' => [], + ]), + new Document([ + '$id' => ID::custom('description'), + 'type' => Database::VAR_STRING, + 'format' => '', + 'size' => 512, + 'signed' => true, + 'required' => false, + 'default' => null, + 'array' => false, + 'filters' => [], + ]), + new Document([ + '$id' => ID::custom('age'), + 'type' => Database::VAR_INTEGER, + 'format' => '', + 'size' => 0, + 'signed' => true, + 'required' => false, + 'default' => null, + 'array' => false, + 'filters' => [], + ]), + ], + 'indexes' => [] + ]); + + // Validator with supportForTrigramIndexes enabled + $validator = new Index($collection->getAttribute('attributes'), $collection->getAttribute('indexes', []), 768, [], false, false, false, false, false, false, false, false, supportForTrigramIndexes: true); + + // Valid: Trigram index on single VAR_STRING attribute + $validIndex = new Document([ + '$id' => ID::custom('idx_trigram_valid'), + 'type' => Database::INDEX_TRIGRAM, + 'attributes' => ['name'], + 'lengths' => [], + 'orders' => [], + ]); + $this->assertTrue($validator->isValid($validIndex)); + + // Valid: Trigram index on multiple string attributes + $validIndexMulti = new Document([ + '$id' => ID::custom('idx_trigram_multi_valid'), + 'type' => Database::INDEX_TRIGRAM, + 'attributes' => ['name', 'description'], + 'lengths' => [], + 'orders' => [], + ]); + $this->assertTrue($validator->isValid($validIndexMulti)); + + // Invalid: Trigram index on non-string attribute + $invalidIndexType = new Document([ + '$id' => ID::custom('idx_trigram_invalid_type'), + 'type' => Database::INDEX_TRIGRAM, + 'attributes' => ['age'], + 'lengths' => [], + 'orders' => [], + ]); + $this->assertFalse($validator->isValid($invalidIndexType)); + $this->assertStringContainsString('Trigram index can only be created on string type attributes', $validator->getDescription()); + + // Invalid: Trigram index with mixed string and non-string attributes + $invalidIndexMixed = new Document([ + '$id' => ID::custom('idx_trigram_mixed'), + 'type' => Database::INDEX_TRIGRAM, + 'attributes' => ['name', 'age'], + 'lengths' => [], + 'orders' => [], + ]); + $this->assertFalse($validator->isValid($invalidIndexMixed)); + $this->assertStringContainsString('Trigram index can only be created on string type attributes', $validator->getDescription()); + + // Invalid: Trigram index with orders + $invalidIndexOrder = new Document([ + '$id' => ID::custom('idx_trigram_order'), + 'type' => Database::INDEX_TRIGRAM, + 'attributes' => ['name'], + 'lengths' => [], + 'orders' => ['asc'], + ]); + $this->assertFalse($validator->isValid($invalidIndexOrder)); + $this->assertStringContainsString('Trigram indexes do not support orders or lengths', $validator->getDescription()); + + // Invalid: Trigram index with lengths + $invalidIndexLength = new Document([ + '$id' => ID::custom('idx_trigram_length'), + 'type' => Database::INDEX_TRIGRAM, + 'attributes' => ['name'], + 'lengths' => [128], + 'orders' => [], + ]); + $this->assertFalse($validator->isValid($invalidIndexLength)); + $this->assertStringContainsString('Trigram indexes do not support orders or lengths', $validator->getDescription()); + + // Validator with supportForTrigramIndexes disabled should reject trigram + $validatorNoSupport = new Index($collection->getAttribute('attributes'), $collection->getAttribute('indexes', []), 768, [], false, false, false, false, false, false, false, false, false); + $this->assertFalse($validatorNoSupport->isValid($validIndex)); + $this->assertEquals('Trigram indexes are not supported', $validatorNoSupport->getDescription()); + } }