diff --git a/Cargo.lock b/Cargo.lock index 758356029b890..1689d76fa0ee8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11686,7 +11686,7 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" [[package]] name = "ownedbytes" version = "0.9.0" -source = "git+https://github.com/datafuse-extras/tantivy?rev=9065a4d#9065a4de248d7b077560dd3602e0ced82471d8b5" +source = "git+https://github.com/b41sh/tantivy?rev=48fb0f7f3c393de67a0b46d8a3be50f085312fd4#48fb0f7f3c393de67a0b46d8a3be50f085312fd4" dependencies = [ "stable_deref_trait", ] @@ -15453,7 +15453,7 @@ checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" [[package]] name = "tantivy" version = "0.25.0" -source = "git+https://github.com/datafuse-extras/tantivy?rev=9065a4d#9065a4de248d7b077560dd3602e0ced82471d8b5" +source = "git+https://github.com/b41sh/tantivy?rev=48fb0f7f3c393de67a0b46d8a3be50f085312fd4#48fb0f7f3c393de67a0b46d8a3be50f085312fd4" dependencies = [ "aho-corasick", "arc-swap", @@ -15504,7 +15504,7 @@ dependencies = [ [[package]] name = "tantivy-bitpacker" version = "0.9.0" -source = "git+https://github.com/datafuse-extras/tantivy?rev=9065a4d#9065a4de248d7b077560dd3602e0ced82471d8b5" +source = "git+https://github.com/b41sh/tantivy?rev=48fb0f7f3c393de67a0b46d8a3be50f085312fd4#48fb0f7f3c393de67a0b46d8a3be50f085312fd4" dependencies = [ "bitpacking 0.9.2", ] @@ -15512,7 +15512,7 @@ dependencies = [ [[package]] name = "tantivy-columnar" version = "0.6.0" -source = "git+https://github.com/datafuse-extras/tantivy?rev=9065a4d#9065a4de248d7b077560dd3602e0ced82471d8b5" +source = "git+https://github.com/b41sh/tantivy?rev=48fb0f7f3c393de67a0b46d8a3be50f085312fd4#48fb0f7f3c393de67a0b46d8a3be50f085312fd4" dependencies = [ "downcast-rs", "fastdivide", @@ -15527,7 +15527,7 @@ dependencies = [ [[package]] name = "tantivy-common" version = "0.10.0" -source = "git+https://github.com/datafuse-extras/tantivy?rev=9065a4d#9065a4de248d7b077560dd3602e0ced82471d8b5" +source = "git+https://github.com/b41sh/tantivy?rev=48fb0f7f3c393de67a0b46d8a3be50f085312fd4#48fb0f7f3c393de67a0b46d8a3be50f085312fd4" dependencies = [ "async-trait", "byteorder", @@ -15550,7 +15550,7 @@ dependencies = [ [[package]] name = "tantivy-jieba" version = "0.17.0" -source = "git+https://github.com/datafuse-extras/tantivy-jieba?rev=ac27464#ac27464d5d2f35320b83cd7cb66df68052d9bc18" +source = "git+https://github.com/b41sh/tantivy-jieba?rev=de314415bdcbf01ab10ab3f76b8eef7cdb6fadf1#de314415bdcbf01ab10ab3f76b8eef7cdb6fadf1" dependencies = [ "jieba-rs", "lazy_static", @@ -15560,7 +15560,7 @@ dependencies = [ [[package]] name = "tantivy-query-grammar" version = "0.25.0" -source = "git+https://github.com/datafuse-extras/tantivy?rev=9065a4d#9065a4de248d7b077560dd3602e0ced82471d8b5" +source = "git+https://github.com/b41sh/tantivy?rev=48fb0f7f3c393de67a0b46d8a3be50f085312fd4#48fb0f7f3c393de67a0b46d8a3be50f085312fd4" dependencies = [ "fnv", "nom 7.1.3", @@ -15572,7 +15572,7 @@ dependencies = [ [[package]] name = "tantivy-sstable" version = "0.6.0" -source = "git+https://github.com/datafuse-extras/tantivy?rev=9065a4d#9065a4de248d7b077560dd3602e0ced82471d8b5" +source = "git+https://github.com/b41sh/tantivy?rev=48fb0f7f3c393de67a0b46d8a3be50f085312fd4#48fb0f7f3c393de67a0b46d8a3be50f085312fd4" dependencies = [ "futures-util", "itertools 0.14.0", @@ -15585,7 +15585,7 @@ dependencies = [ [[package]] name = "tantivy-stacker" version = "0.6.0" -source = "git+https://github.com/datafuse-extras/tantivy?rev=9065a4d#9065a4de248d7b077560dd3602e0ced82471d8b5" +source = "git+https://github.com/b41sh/tantivy?rev=48fb0f7f3c393de67a0b46d8a3be50f085312fd4#48fb0f7f3c393de67a0b46d8a3be50f085312fd4" dependencies = [ "murmurhash32", "rand_distr", @@ -15595,7 +15595,7 @@ dependencies = [ [[package]] name = "tantivy-tokenizer-api" version = "0.6.0" -source = "git+https://github.com/datafuse-extras/tantivy?rev=9065a4d#9065a4de248d7b077560dd3602e0ced82471d8b5" +source = "git+https://github.com/b41sh/tantivy?rev=48fb0f7f3c393de67a0b46d8a3be50f085312fd4#48fb0f7f3c393de67a0b46d8a3be50f085312fd4" dependencies = [ "serde", ] diff --git a/Cargo.toml b/Cargo.toml index 8143f848e0d7e..929fd0472419c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -661,9 +661,9 @@ recursive = { git = "https://github.com/datafuse-extras/recursive.git", rev = "1 sled = { git = "https://github.com/datafuse-extras/sled", tag = "v0.34.7-datafuse.1" } state-machine-api = { git = "https://github.com/databendlabs/state-machine-api.git", tag = "v0.3.4" } sub-cache = { git = "https://github.com/databendlabs/sub-cache", tag = "v0.2.1" } -tantivy = { git = "https://github.com/datafuse-extras/tantivy", rev = "9065a4d" } -tantivy-common = { git = "https://github.com/datafuse-extras/tantivy", rev = "9065a4d", package = "tantivy-common" } -tantivy-jieba = { git = "https://github.com/datafuse-extras/tantivy-jieba", rev = "ac27464" } -tantivy-query-grammar = { git = "https://github.com/datafuse-extras/tantivy", rev = "9065a4d", package = "tantivy-query-grammar" } +tantivy = { git = "https://github.com/b41sh/tantivy", rev = "48fb0f7f3c393de67a0b46d8a3be50f085312fd4" } +tantivy-common = { git = "https://github.com/b41sh/tantivy", rev = "48fb0f7f3c393de67a0b46d8a3be50f085312fd4", package = "tantivy-common" } +tantivy-jieba = { git = "https://github.com/b41sh/tantivy-jieba", rev = "de314415bdcbf01ab10ab3f76b8eef7cdb6fadf1" } +tantivy-query-grammar = { git = "https://github.com/b41sh/tantivy", rev = "48fb0f7f3c393de67a0b46d8a3be50f085312fd4", package = "tantivy-query-grammar" } watcher = { git = "https://github.com/databendlabs/watcher", tag = "v0.4.2" } xorfilter-rs = { git = "https://github.com/datafuse-extras/xorfilter", tag = "databend-alpha.4" } diff --git a/src/query/storages/fuse/src/pruning/inverted_index_pruner.rs b/src/query/storages/fuse/src/pruning/inverted_index_pruner.rs index 703ec1c65dd71..df571d03ab5d4 100644 --- a/src/query/storages/fuse/src/pruning/inverted_index_pruner.rs +++ b/src/query/storages/fuse/src/pruning/inverted_index_pruner.rs @@ -18,6 +18,7 @@ use std::sync::Arc; use databend_common_catalog::plan::InvertedIndexInfo; use databend_common_catalog::plan::PushDownInfo; use databend_common_exception::Result; +use databend_common_expression::types::DataType; use databend_common_expression::types::F32; use databend_storages_common_io::ReadSettings; use opendal::Operator; @@ -95,6 +96,14 @@ impl InvertedIndexPruner { need_position = true; } }); + for field_id in &field_ids { + let field = inverted_index_info.index_schema.field(*field_id as usize); + let data_type = field.data_type().remove_nullable(); + if data_type == DataType::Variant { + need_position = true; + break; + } + } // whether need to generate score internl column let has_score = inverted_index_info.has_score; diff --git a/tests/sqllogictests/suites/query/index/04_inverted_index/04_0000_inverted_index_base.test b/tests/sqllogictests/suites/query/index/04_inverted_index/04_0000_inverted_index_base.test index 56275d246e699..6a2c632acdea0 100644 --- a/tests/sqllogictests/suites/query/index/04_inverted_index/04_0000_inverted_index_base.test +++ b/tests/sqllogictests/suites/query/index/04_inverted_index/04_0000_inverted_index_base.test @@ -500,6 +500,33 @@ query IT select * from t2 where query('body:test'); ---- + +statement ok +CREATE TABLE t3 (id int, body variant, INVERTED INDEX idx (body)) + +statement ok +INSERT INTO t3 VALUES +(1, '{"videoInfo":{"extraData":[{ "name": "codecA", "type": "mp4" },{ "name": "codecB", "type": "jpg" }]}}'), +(2, '{"videoInfo":{"extraData":[{ "name": "codecA", "type": "jpg" },{ "name": "codecA", "type": "mp4" }]}}'), +(3, '{"videoInfo":{"extraData":[{ "name": "codecA", "type": "jpg" },{ "name": "codecB", "type": "mp4" }]}}'); + +query IT +select * from t3 where query('body.videoInfo.extraData.name:codecA AND body.videoInfo.extraData.type:jpg'); +---- +2 {"videoInfo":{"extraData":[{"name":"codecA","type":"jpg"},{"name":"codecA","type":"mp4"}]}} +3 {"videoInfo":{"extraData":[{"name":"codecA","type":"jpg"},{"name":"codecB","type":"mp4"}]}} + +query IT +select * from t3 where query('body.videoInfo.extraData.name:codecA AND body.videoInfo.extraData.type:mp4'); +---- +1 {"videoInfo":{"extraData":[{"name":"codecA","type":"mp4"},{"name":"codecB","type":"jpg"}]}} +2 {"videoInfo":{"extraData":[{"name":"codecA","type":"jpg"},{"name":"codecA","type":"mp4"}]}} + +query IT +select * from t3 where query('body.videoInfo.extraData.name:codecB AND body.videoInfo.extraData.type:jpg'); +---- +1 {"videoInfo":{"extraData":[{"name":"codecA","type":"mp4"},{"name":"codecB","type":"jpg"}]}} + statement ok CREATE TABLE t_native (id int, content string, INVERTED INDEX idx1 (content)) storage_format = 'native' row_per_page = 2; @@ -528,3 +555,4 @@ use default statement ok drop database test_inverted_index +