-
Notifications
You must be signed in to change notification settings - Fork 54
Non null arrays to allow index usage #819
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
f76a0de
4f02d63
31d3db1
1ada153
6cdbe98
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -892,6 +892,26 @@ public function createIndex(string $collection, string $id, string $type, array | |
| $collection = $this->filter($collection); | ||
| $id = $this->filter($id); | ||
|
|
||
| // JSONB array columns need GIN indexes for containment queries (@>). | ||
| $isArrayIndex = false; | ||
| if ($type === Database::INDEX_KEY) { | ||
| $metadataCollection = new Document(['$id' => Database::METADATA]); | ||
| $collectionDoc = $this->getDocument($metadataCollection, $collection); | ||
| if (!$collectionDoc->isEmpty()) { | ||
| $collectionAttributes = \json_decode($collectionDoc->getAttribute('attributes', '[]'), true); | ||
| $arrayCount = 0; | ||
| foreach ($attributes as $attr) { | ||
| foreach ($collectionAttributes as $collectionAttribute) { | ||
| if (\strtolower($collectionAttribute['$id']) === \strtolower($attr) && !empty($collectionAttribute['array'])) { | ||
| $arrayCount++; | ||
| break; | ||
| } | ||
| } | ||
| } | ||
| $isArrayIndex = $arrayCount > 0 && $arrayCount === \count($attributes); | ||
| } | ||
| } | ||
|
|
||
| foreach ($attributes as $i => $attr) { | ||
| $order = empty($orders[$i]) || Database::INDEX_FULLTEXT === $type ? '' : $orders[$i]; | ||
| $isNestedPath = isset($indexAttributeTypes[$attr]) && \str_contains($attr, '.') && $indexAttributeTypes[$attr] === Database::VAR_OBJECT; | ||
|
|
@@ -933,13 +953,13 @@ public function createIndex(string $collection, string $id, string $type, array | |
| $sql = "CREATE {$sqlType} \"{$keyName}\" ON {$this->getSQLTable($collection)}"; | ||
|
|
||
| // Add USING clause for special index types | ||
| $sql .= match ($type) { | ||
| Database::INDEX_SPATIAL => " USING GIST ({$attributes})", | ||
| Database::INDEX_HNSW_EUCLIDEAN => " USING HNSW ({$attributes} vector_l2_ops)", | ||
| Database::INDEX_HNSW_COSINE => " USING HNSW ({$attributes} vector_cosine_ops)", | ||
| Database::INDEX_HNSW_DOT => " USING HNSW ({$attributes} vector_ip_ops)", | ||
| Database::INDEX_OBJECT => " USING GIN ({$attributes})", | ||
| Database::INDEX_TRIGRAM => | ||
| $sql .= match (true) { | ||
| $type === Database::INDEX_SPATIAL => " USING GIST ({$attributes})", | ||
| $type === Database::INDEX_HNSW_EUCLIDEAN => " USING HNSW ({$attributes} vector_l2_ops)", | ||
| $type === Database::INDEX_HNSW_COSINE => " USING HNSW ({$attributes} vector_cosine_ops)", | ||
| $type === Database::INDEX_HNSW_DOT => " USING HNSW ({$attributes} vector_ip_ops)", | ||
| $type === Database::INDEX_OBJECT, $isArrayIndex => " USING GIN ({$attributes})", | ||
| $type === Database::INDEX_TRIGRAM => | ||
| " USING GIN (" . implode(', ', array_map( | ||
| fn ($attr) => "$attr gin_trgm_ops", | ||
| array_map(fn ($attr) => trim($attr), explode(',', $attributes)) | ||
|
|
@@ -1937,7 +1957,7 @@ protected function getFulltextValue(string $value): string | |
| protected function getSQLType(string $type, int $size, bool $signed = true, bool $array = false, bool $required = false): string | ||
| { | ||
| if ($array === true) { | ||
| return 'JSONB'; | ||
| return "JSONB NOT NULL DEFAULT '[]'::jsonb"; | ||
| } | ||
|
Comment on lines
1957
to
1961
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: # Search for the getSQLType method implementation to confirm the behavior with arrays
rg -A 30 "protected function getSQLType" src/Database/Adapter/Postgres.phpRepository: utopia-php/database Length of output: 1197 🏁 Script executed: # Search for updateAttribute method to see how it uses getSQLType
rg -A 50 "protected function updateAttribute" src/Database/Adapter/Postgres.php | head -80Repository: utopia-php/database Length of output: 45 🌐 Web query:
💡 Result:
To change those properties, use separate subcommands: ALTER TABLE t
ALTER COLUMN c TYPE new_type USING ...,
ALTER COLUMN c SET DEFAULT new_default,
ALTER COLUMN c SET NOT NULL;PostgreSQL will also try to cast the existing column default (if any) and constraints that reference the column when you change the type, which is why doing Sources: Citations:
🏁 Script executed: # Search for updateAttribute method with more context
rg -A 100 "protected function updateAttribute" src/Database/Adapter/Postgres.php | head -120Repository: utopia-php/database Length of output: 45 🏁 Script executed: # Also search for where getSQLType is called within updateAttribute context
rg -B 5 -A 15 "getSQLType.*\$array" src/Database/Adapter/Postgres.phpRepository: utopia-php/database Length of output: 45 🏁 Script executed: # Search for ALTER COLUMN in Postgres.php to find where updateAttribute uses the type
rg -B 10 "ALTER COLUMN" src/Database/Adapter/Postgres.php | head -100Repository: utopia-php/database Length of output: 299 🏁 Script executed: # Try to find updateAttribute by searching for the method name more broadly
rg "updateAttribute" src/Database/Adapter/Postgres.phpRepository: utopia-php/database Length of output: 251 🏁 Script executed: # Get the full updateAttribute implementation
rg -A 150 "public function updateAttribute" src/Database/Adapter/Postgres.php | head -200Repository: utopia-php/database Length of output: 5027
ALTER TABLE schema.table ALTER COLUMN "col" TYPE JSONB NOT NULL DEFAULT '[]'::jsonbPostgreSQL's This is a regression: the previous return value ( Proposed fix in
|
||
|
|
||
| switch ($type) { | ||
|
|
@@ -2117,6 +2137,16 @@ public function getSupportForTimeouts(): bool | |
| return true; | ||
| } | ||
|
|
||
| public function getSupportForCastIndexArray(): bool | ||
| { | ||
| return true; | ||
| } | ||
|
|
||
| public function getSupportForGinIndex(): bool | ||
| { | ||
| return true; | ||
| } | ||
|
|
||
| /** | ||
| * Does the adapter handle Query Array Overlaps? | ||
| * | ||
|
|
@@ -2688,12 +2718,12 @@ protected function getOperatorSQL(string $column, Operator $operator, int &$bind | |
| case Operator::TYPE_ARRAY_APPEND: | ||
| $bindKey = "op_{$bindIndex}"; | ||
| $bindIndex++; | ||
| return "{$quotedColumn} = COALESCE({$columnRef}, '[]'::jsonb) || :$bindKey::jsonb"; | ||
| return "{$quotedColumn} = {$columnRef} || :$bindKey::jsonb"; | ||
|
|
||
| case Operator::TYPE_ARRAY_PREPEND: | ||
| $bindKey = "op_{$bindIndex}"; | ||
| $bindIndex++; | ||
| return "{$quotedColumn} = :$bindKey::jsonb || COALESCE({$columnRef}, '[]'::jsonb)"; | ||
| return "{$quotedColumn} = :$bindKey::jsonb || {$columnRef}"; | ||
|
|
||
| case Operator::TYPE_ARRAY_UNIQUE: | ||
| return "{$quotedColumn} = COALESCE(( | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1545,6 +1545,11 @@ public function getSupportForCastIndexArray(): bool | |
| return false; | ||
| } | ||
|
|
||
| public function getSupportForGinIndex(): bool | ||
| { | ||
| return false; | ||
| } | ||
|
|
||
| public function getSupportForRelationships(): bool | ||
| { | ||
| return true; | ||
|
|
@@ -2599,8 +2604,12 @@ public function upsertDocuments( | |
| $spatialAttributes = $this->getSpatialAttributes($collection); | ||
|
|
||
| $attributeDefaults = []; | ||
| $arrayAttributes = []; | ||
| foreach ($collection->getAttribute('attributes', []) as $attr) { | ||
| $attributeDefaults[$attr['$id']] = $attr['default'] ?? null; | ||
| if ($attr['array'] ?? false) { | ||
| $arrayAttributes[$attr['$id']] = true; | ||
| } | ||
| } | ||
|
|
||
| $collection = $collection->getId(); | ||
|
|
@@ -2671,6 +2680,11 @@ public function upsertDocuments( | |
| foreach ($allColumnNames as $attributeKey) { | ||
| $attrValue = $currentRegularAttributes[$attributeKey] ?? null; | ||
|
|
||
| // Array columns are NOT NULL DEFAULT '[]', so coerce null to empty array | ||
| if ($attrValue === null && isset($arrayAttributes[$attributeKey])) { | ||
| $attrValue = '[]'; | ||
| } | ||
|
Comment on lines
+2683
to
+2686
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: # First, check the file structure and locate the relevant methods
fd "SQL.php" --type f | head -5Repository: utopia-php/database Length of output: 122 🏁 Script executed: # Get line count for SQL.php to understand the file size
wc -l src/Database/Adapter/SQL.phpRepository: utopia-php/database Length of output: 96 🏁 Script executed: # Check the createDocuments method around line 2451-2586
sed -n '2451,2500p' src/Database/Adapter/SQL.php | cat -nRepository: utopia-php/database Length of output: 2189 🏁 Script executed: # Check the $arrayAttributes collection and coercion in upsertDocuments (lines 2607-2686)
sed -n '2607,2700p' src/Database/Adapter/SQL.php | cat -nRepository: utopia-php/database Length of output: 4999 🏁 Script executed: # Check the second coercion block mentioned (lines 2815-2818)
sed -n '2800,2830p' src/Database/Adapter/SQL.php | cat -nRepository: utopia-php/database Length of output: 1895 🏁 Script executed: # Search for encode method in Database.php to check for upstream coercion
rg -n "public function encode|private function encode" src/Database/Database.php -A 15 | head -80Repository: utopia-php/database Length of output: 899 🏁 Script executed: # See the rest of createDocuments to find the value binding logic
sed -n '2500,2586p' src/Database/Adapter/SQL.php | cat -nRepository: utopia-php/database Length of output: 4231 🏁 Script executed: # See the full Database::encode() method to check for array coercion
sed -n '8177,8250p' src/Database/Database.php | cat -nRepository: utopia-php/database Length of output: 3322 🏁 Script executed: # Verify that createDocuments is called in context where encode() may not be applied first
rg -n "createDocuments" src/Database/Database.php -B 3 -A 3 | head -50Repository: utopia-php/database Length of output: 662 🏁 Script executed: # Check if Database::createDocuments calls encode() before adapter->createDocuments()
sed -n '5118,5200p' src/Database/Database.php | cat -nRepository: utopia-php/database Length of output: 4233
🛡️ Proposed fix in
|
||
|
|
||
| if (\is_array($attrValue)) { | ||
| $attrValue = \json_encode($attrValue); | ||
| } | ||
|
|
@@ -2798,6 +2812,11 @@ public function upsertDocuments( | |
| foreach ($allColumnNames as $attributeKey) { | ||
| $attrValue = $currentRegularAttributes[$attributeKey] ?? null; | ||
|
|
||
| // Array columns are NOT NULL DEFAULT '[]', so coerce null to empty array | ||
| if ($attrValue === null && isset($arrayAttributes[$attributeKey])) { | ||
| $attrValue = '[]'; | ||
| } | ||
|
|
||
| if (\is_array($attrValue)) { | ||
| $attrValue = \json_encode($attrValue); | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🏁 Script executed:
Repository: utopia-php/database
Length of output: 2845
🏁 Script executed:
Repository: utopia-php/database
Length of output: 1486
🏁 Script executed:
Repository: utopia-php/database
Length of output: 3009
🏁 Script executed:
Repository: utopia-php/database
Length of output: 16758
🏁 Script executed:
Repository: utopia-php/database
Length of output: 45
🏁 Script executed:
Repository: utopia-php/database
Length of output: 16616
🏁 Script executed:
Repository: utopia-php/database
Length of output: 45
🏁 Script executed:
Repository: utopia-php/database
Length of output: 45
🏁 Script executed:
Repository: utopia-php/database
Length of output: 45
🏁 Script executed:
Repository: utopia-php/database
Length of output: 707
🏁 Script executed:
Repository: utopia-php/database
Length of output: 279
🏁 Script executed:
Repository: utopia-php/database
Length of output: 1068
🏁 Script executed:
Repository: utopia-php/database
Length of output: 4513
🏁 Script executed:
Repository: utopia-php/database
Length of output: 45
🏁 Script executed:
Repository: utopia-php/database
Length of output: 860
🏁 Script executed:
Repository: utopia-php/database
Length of output: 45
🏁 Script executed:
Repository: utopia-php/database
Length of output: 3855
🏁 Script executed:
Repository: utopia-php/database
Length of output: 45
🏁 Script executed:
Repository: utopia-php/database
Length of output: 45
🏁 Script executed:
# Check if there's a difference between initial creation and update for NOT NULL handling git log --oneline -20 -- src/Database/Adapter/MariaDB.phpRepository: utopia-php/database
Length of output: 159
🏁 Script executed:
Repository: utopia-php/database
Length of output: 2426
🏁 Script executed:
Repository: utopia-php/database
Length of output: 45
🏁 Script executed:
Repository: utopia-php/database
Length of output: 5879
🏁 Script executed:
Repository: utopia-php/database
Length of output: 1608
🏁 Script executed:
Repository: utopia-php/database
Length of output: 364
🏁 Script executed:
Repository: utopia-php/database
Length of output: 221
🏁 Script executed:
Repository: utopia-php/database
Length of output: 45
🏁 Script executed:
Repository: utopia-php/database
Length of output: 1508
🏁 Script executed:
Repository: utopia-php/database
Length of output: 45
🏁 Script executed:
Repository: utopia-php/database
Length of output: 45
🏁 Script executed:
Repository: utopia-php/database
Length of output: 1706
🏁 Script executed:
Repository: utopia-php/database
Length of output: 1632
🏁 Script executed:
Repository: utopia-php/database
Length of output: 1066
🏁 Script executed:
Repository: utopia-php/database
Length of output: 45
🏁 Script executed:
Repository: utopia-php/database
Length of output: 45
🏁 Script executed:
Repository: utopia-php/database
Length of output: 5648
🏁 Script executed:
Repository: utopia-php/database
Length of output: 598
🏁 Script executed:
Repository: utopia-php/database
Length of output: 539
MODIFY/CHANGE COLUMN … NOT NULLwill fail for existing tables with any NULL array values.updateAttribute()(Database.php, line 2915) passesgetSQLType()output directly to the adapter'supdateAttribute()method, which executesALTER TABLE … MODIFY/CHANGE COLUMNwith no preceding UPDATE to backfill NULLs. Any production collection with existing NULL values in an array column will cause theALTER TABLEto fail with a data-integrity error when this code path is triggered.The PHP-side null-to-
[]coercion only governs future writes and does not retroactively fix existing DB NULLs.A safe approach is to issue an
UPDATEto coerce NULLs toJSON_ARRAY()before theALTER TABLE:🛡️ Proposed fix: backfill NULLs before MODIFY
public function updateAttribute(string $collection, string $id, string $type, int $size, bool $signed = true, bool $array = false, ?string $newKey = null, bool $required = false): bool { $name = $this->filter($collection); $id = $this->filter($id); $newKey = empty($newKey) ? null : $this->filter($newKey); $type = $this->getSQLType($type, $size, $signed, $array, $required); + + // Backfill NULLs before adding NOT NULL constraint on array columns + if ($array) { + $backfill = "UPDATE {$this->getSQLTable($name)} SET `{$id}` = JSON_ARRAY() WHERE `{$id}` IS NULL"; + $this->getPDO()->prepare($backfill)->execute(); + } + if (!empty($newKey)) { $sql = "ALTER TABLE {$this->getSQLTable($name)} CHANGE COLUMN `{$id}` `{$newKey}` {$type};"; } else { $sql = "ALTER TABLE {$this->getSQLTable($name)} MODIFY `{$id}` {$type};"; }Additionally,
TYPE_ARRAY_APPENDandTYPE_ARRAY_PREPEND(lines 2085, 2090) dropped theirIFNULLguards, whileTYPE_ARRAY_REMOVE,TYPE_ARRAY_UNIQUE,TYPE_ARRAY_INTERSECT,TYPE_ARRAY_DIFF, andTYPE_ARRAY_FILTER(lines 2103–2161) retain them. This asymmetry is harmless with theNOT NULLconstraint but creates inconsistency that will mislead future maintainers. Consider applyingIFNULLconsistently across all array operators for clarity.🤖 Prompt for AI Agents