Skip to content

Commit 85a7a7b

Browse files
authored
fix: skip json path index if the query path includes number (#46200)
issue: #45511 our tantivy inverted index currently does not include item index if the value is an array, thus we can't do `a[0] == 'b'` type of look up in the inverted index. for such, we need to skip the index and use brute force search. we may improve our index in the future, so this is a temp solution Signed-off-by: Buqian Zheng <[email protected]>
1 parent bb486c0 commit 85a7a7b

File tree

2 files changed

+53
-2
lines changed

2 files changed

+53
-2
lines changed

internal/core/src/exec/expression/Expr.h

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1431,7 +1431,59 @@ class SegmentExpr : public Expr {
14311431
CanUseIndex() const {
14321432
// Ngram index should be used in specific execution path (CanUseNgramIndex -> ExecNgramMatch).
14331433
// TODO: if multiple indexes are supported, this logic should be changed
1434-
return num_index_chunk_ != 0 && !CanUseNgramIndex();
1434+
if (num_index_chunk_ == 0 || CanUseNgramIndex()) {
1435+
return false;
1436+
}
1437+
1438+
// For JSON fields with JsonFlatIndex, check if prefix matching is valid.
1439+
// Tantivy JSON index can handle nested object paths (e.g., "a.b") but NOT
1440+
// numeric array indices (e.g., "a.0"). Per RFC 6901, JSON Pointer doesn't
1441+
// distinguish between array indices and object keys syntactically. Since
1442+
// Tantivy doesn't store array index information, we must fall back to
1443+
// brute-force search when the relative path contains numeric segments.
1444+
if (field_type_ != DataType::JSON || pinned_index_.empty()) {
1445+
return true;
1446+
}
1447+
1448+
auto json_flat_index =
1449+
dynamic_cast<const index::JsonFlatIndex*>(pinned_index_[0].get());
1450+
if (json_flat_index == nullptr) {
1451+
return true;
1452+
}
1453+
1454+
auto index_path = json_flat_index->GetNestedPath();
1455+
auto query_path = milvus::Json::pointer(nested_path_);
1456+
1457+
// Exact match - safe to use index
1458+
if (index_path == query_path) {
1459+
return true;
1460+
}
1461+
1462+
// PinJsonIndex guarantees index_path is a prefix of query_path
1463+
1464+
// Get relative path (e.g., if index_path="/a" and query_path="/a/0/b",
1465+
// relative_path="/0/b")
1466+
auto relative_path = query_path.substr(index_path.length());
1467+
1468+
// Check if any path segment is numeric (potential array index)
1469+
size_t pos = 0;
1470+
while (pos < relative_path.length()) {
1471+
if (relative_path[pos] == '/') {
1472+
pos++;
1473+
continue;
1474+
}
1475+
size_t end = relative_path.find('/', pos);
1476+
if (end == std::string::npos) {
1477+
end = relative_path.length();
1478+
}
1479+
auto segment = relative_path.substr(pos, end - pos);
1480+
if (!segment.empty() && milvus::IsInteger(segment)) {
1481+
return false;
1482+
}
1483+
pos = end;
1484+
}
1485+
1486+
return true;
14351487
}
14361488

14371489
template <typename T>

tests/python_client/milvus_client/test_milvus_client_search.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3633,7 +3633,6 @@ def test_milvus_client_search_null_expr_array(self, nullable, null_expr_op, is_f
36333633
"limit": limit})
36343634

36353635

3636-
@pytest.mark.skip(reason="issue #45511")
36373636
class TestMilvusClientSearchJsonPathIndex(TestMilvusClientV2Base):
36383637
""" Test case of search interface """
36393638

0 commit comments

Comments
 (0)