Skip to content

Commit dc9132b

Browse files
committed
feat(query): Enhanced Inverted Index for VARIANT Type to precise matching Object within Arrays
1 parent b66c492 commit dc9132b

File tree

4 files changed

+51
-14
lines changed

4 files changed

+51
-14
lines changed

Cargo.lock

Lines changed: 10 additions & 10 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -661,9 +661,9 @@ recursive = { git = "https://github.com/datafuse-extras/recursive.git", rev = "1
661661
sled = { git = "https://github.com/datafuse-extras/sled", tag = "v0.34.7-datafuse.1" }
662662
state-machine-api = { git = "https://github.com/databendlabs/state-machine-api.git", tag = "v0.3.4" }
663663
sub-cache = { git = "https://github.com/databendlabs/sub-cache", tag = "v0.2.1" }
664-
tantivy = { git = "https://github.com/datafuse-extras/tantivy", rev = "9065a4d" }
665-
tantivy-common = { git = "https://github.com/datafuse-extras/tantivy", rev = "9065a4d", package = "tantivy-common" }
666-
tantivy-jieba = { git = "https://github.com/datafuse-extras/tantivy-jieba", rev = "ac27464" }
667-
tantivy-query-grammar = { git = "https://github.com/datafuse-extras/tantivy", rev = "9065a4d", package = "tantivy-query-grammar" }
664+
tantivy = { git = "https://github.com/b41sh/tantivy", rev = "edadb8a6bc167723219d60d11f509fa4882e93b7" }
665+
tantivy-common = { git = "https://github.com/b41sh/tantivy", rev = "edadb8a6bc167723219d60d11f509fa4882e93b7", package = "tantivy-common" }
666+
tantivy-jieba = { git = "https://github.com/b41sh/tantivy-jieba", rev = "4486c42aa2933fe28c4bcd40273193f59c4e3182" }
667+
tantivy-query-grammar = { git = "https://github.com/b41sh/tantivy", rev = "edadb8a6bc167723219d60d11f509fa4882e93b7", package = "tantivy-query-grammar" }
668668
watcher = { git = "https://github.com/databendlabs/watcher", tag = "v0.4.2" }
669669
xorfilter-rs = { git = "https://github.com/datafuse-extras/xorfilter", tag = "databend-alpha.4" }

src/query/storages/fuse/src/pruning/inverted_index_pruner.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use std::sync::Arc;
1818
use databend_common_catalog::plan::InvertedIndexInfo;
1919
use databend_common_catalog::plan::PushDownInfo;
2020
use databend_common_exception::Result;
21+
use databend_common_expression::types::DataType;
2122
use databend_common_expression::types::F32;
2223
use databend_storages_common_io::ReadSettings;
2324
use opendal::Operator;
@@ -95,6 +96,14 @@ impl InvertedIndexPruner {
9596
need_position = true;
9697
}
9798
});
99+
for field_id in &field_ids {
100+
let field = inverted_index_info.index_schema.field(*field_id as usize);
101+
let data_type = field.data_type().remove_nullable();
102+
if data_type == DataType::Variant {
103+
need_position = true;
104+
break;
105+
}
106+
}
98107

99108
// whether need to generate score internl column
100109
let has_score = inverted_index_info.has_score;

tests/sqllogictests/suites/query/index/04_inverted_index/04_0000_inverted_index_base.test

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,33 @@ query IT
500500
select * from t2 where query('body:test');
501501
----
502502

503+
504+
statement ok
505+
CREATE TABLE t3 (id int, body variant, INVERTED INDEX idx (body))
506+
507+
statement ok
508+
INSERT INTO t3 VALUES
509+
(1, '{"videoInfo":{"extraData":[{ "name": "codecA", "type": "mp4" },{ "name": "codecB", "type": "jpg" }]}}'),
510+
(2, '{"videoInfo":{"extraData":[{ "name": "codecA", "type": "jpg" },{ "name": "codecA", "type": "mp4" }]}}'),
511+
(3, '{"videoInfo":{"extraData":[{ "name": "codecA", "type": "jpg" },{ "name": "codecB", "type": "mp4" }]}}');
512+
513+
query IT
514+
select * from t3 where query('body.videoInfo.extraData.name:codecA AND body.videoInfo.extraData.type:jpg');
515+
----
516+
2 {"videoInfo":{"extraData":[{"name":"codecA","type":"jpg"},{"name":"codecA","type":"mp4"}]}}
517+
3 {"videoInfo":{"extraData":[{"name":"codecA","type":"jpg"},{"name":"codecB","type":"mp4"}]}}
518+
519+
query IT
520+
select * from t3 where query('body.videoInfo.extraData.name:codecA AND body.videoInfo.extraData.type:mp4');
521+
----
522+
1 {"videoInfo":{"extraData":[{"name":"codecA","type":"mp4"},{"name":"codecB","type":"jpg"}]}}
523+
2 {"videoInfo":{"extraData":[{"name":"codecA","type":"jpg"},{"name":"codecA","type":"mp4"}]}}
524+
525+
query IT
526+
select * from t3 where query('body.videoInfo.extraData.name:codecB AND body.videoInfo.extraData.type:jpg');
527+
----
528+
1 {"videoInfo":{"extraData":[{"name":"codecA","type":"mp4"},{"name":"codecB","type":"jpg"}]}}
529+
503530
statement ok
504531
CREATE TABLE t_native (id int, content string, INVERTED INDEX idx1 (content)) storage_format = 'native' row_per_page = 2;
505532

@@ -528,3 +555,4 @@ use default
528555
statement ok
529556
drop database test_inverted_index
530557

558+

0 commit comments

Comments
 (0)