Skip to content

Commit da14480

Browse files
authored
Add num dimensions and metadata_mappings to ElasticsearchStore with testing (#75)
* add num_dimensions and metadata_mappings to async vectorestore * add num_dimensions and metadata_mappings to sync vectorestore * add async integeration tests * add sync integeration tests * add async unit tests * add sync unit tests * Fix num_dimensions integration tests to use ConsistentFakeEmbeddings and proper assertions * make match test use new ConsistentFakeEmbeddings * check expected dimensionality for tests * fix lint for #3.9 * Edit code comment * adjust test_metadata_mappings_integration to use * fix import for PEP8 reccomendations * lint
1 parent 9131ea1 commit da14480

File tree

6 files changed

+344
-2
lines changed

6 files changed

+344
-2
lines changed

libs/elasticsearch/langchain_elasticsearch/_async/vectorstores.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,8 @@ def __init__(
320320
] = ApproxRetrievalStrategy(),
321321
es_params: Optional[Dict[str, Any]] = None,
322322
custom_index_settings: Optional[Dict[str, Any]] = None,
323+
num_dimensions: Optional[int] = None,
324+
metadata_mappings: Optional[Dict[str, Any]] = None,
323325
):
324326
if isinstance(strategy, BaseRetrievalStrategy):
325327
strategy = _convert_retrieval_strategy(
@@ -345,8 +347,10 @@ def __init__(
345347
index=index_name,
346348
retrieval_strategy=strategy,
347349
embedding_service=embedding_service,
350+
num_dimensions=num_dimensions,
348351
text_field=query_field,
349352
vector_field=vector_query_field,
353+
metadata_mappings=metadata_mappings,
350354
user_agent=user_agent("langchain-py-vs"),
351355
custom_index_settings=custom_index_settings,
352356
)

libs/elasticsearch/langchain_elasticsearch/_sync/vectorstores.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,8 @@ def __init__(
320320
] = ApproxRetrievalStrategy(),
321321
es_params: Optional[Dict[str, Any]] = None,
322322
custom_index_settings: Optional[Dict[str, Any]] = None,
323+
num_dimensions: Optional[int] = None,
324+
metadata_mappings: Optional[Dict[str, Any]] = None,
323325
):
324326
if isinstance(strategy, BaseRetrievalStrategy):
325327
strategy = _convert_retrieval_strategy(
@@ -345,8 +347,10 @@ def __init__(
345347
index=index_name,
346348
retrieval_strategy=strategy,
347349
embedding_service=embedding_service,
350+
num_dimensions=num_dimensions,
348351
text_field=query_field,
349352
vector_field=vector_query_field,
353+
metadata_mappings=metadata_mappings,
350354
user_agent=user_agent("langchain-py-vs"),
351355
custom_index_settings=custom_index_settings,
352356
)

libs/elasticsearch/tests/integration_tests/_async/test_vectorstores.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1051,3 +1051,85 @@ async def test_elasticsearch_delete_ids(
10511051
await docsearch.adelete([ids[3]])
10521052
output = await docsearch.asimilarity_search("gni", k=10)
10531053
assert len(output) == 0
1054+
1055+
@pytest.mark.asyncio
1056+
async def test_num_dimensions_mismatch_and_match(
1057+
self, es_params: dict, index_name: str
1058+
) -> None:
1059+
"""Test that mismatched num_dimensions causes an error."""
1060+
texts = ["foo", "bar"]
1061+
1062+
# Test 1: Mismatch should fail
1063+
with pytest.raises(Exception): # Should fail when trying to add documents
1064+
docsearch = await AsyncElasticsearchStore.afrom_texts(
1065+
texts,
1066+
AsyncConsistentFakeEmbeddings(), # Creates 16-dimensional vectors
1067+
num_dimensions=5, # Mismatch: 5 vs 16
1068+
**es_params,
1069+
index_name=f"{index_name}_mismatch", # Use separate index
1070+
)
1071+
1072+
# Test 2: Match should work
1073+
docsearch = await AsyncElasticsearchStore.afrom_texts(
1074+
texts,
1075+
AsyncConsistentFakeEmbeddings(), # Creates 16-dimensional vectors
1076+
num_dimensions=16, # Match: 16 vs 16
1077+
**es_params,
1078+
index_name=f"{index_name}_match", # Use separate index
1079+
)
1080+
1081+
# Verify it works by doing a search
1082+
results = await docsearch.asimilarity_search("foo", k=1)
1083+
assert results == [Document(page_content="foo")]
1084+
1085+
await docsearch.aclose()
1086+
1087+
@pytest.mark.asyncio
1088+
async def test_metadata_mappings_integration(
1089+
self, es_params: dict, index_name: str
1090+
) -> None:
1091+
"""Test that metadata_mappings parameter works correctly.
1092+
1093+
This test verifies that custom metadata field mappings are properly applied to
1094+
Elasticsearch index, allowing for proper indexing and searching of metadata.
1095+
"""
1096+
metadata_mappings = {
1097+
"category": {"type": "keyword"},
1098+
"score": {"type": "float"},
1099+
"tags": {"type": "text"},
1100+
}
1101+
1102+
texts = ["Document about cats", "Document about dogs", "Document about birds"]
1103+
metadatas = [
1104+
{"category": "animals", "score": 0.9, "tags": "some tag about cats"},
1105+
{"category": "animals", "score": 0.8, "tags": "some tag about dogs"},
1106+
{"category": "animals", "score": 0.7, "tags": "some tag about birds"},
1107+
]
1108+
1109+
docsearch = await AsyncElasticsearchStore.afrom_texts(
1110+
texts,
1111+
AsyncConsistentFakeEmbeddings(),
1112+
metadatas=metadatas,
1113+
metadata_mappings=metadata_mappings,
1114+
num_dimensions=16,
1115+
**es_params,
1116+
index_name=index_name,
1117+
)
1118+
1119+
mapping_response = await docsearch.client.indices.get_mapping(index=index_name)
1120+
mapping_properties = mapping_response[index_name]["mappings"]["properties"]
1121+
1122+
assert "metadata" in mapping_properties
1123+
metadata_props = mapping_properties["metadata"]["properties"]
1124+
1125+
assert metadata_props["category"] == {"type": "keyword"}
1126+
assert metadata_props["score"] == {"type": "float"}
1127+
assert metadata_props["tags"] == {"type": "text"}
1128+
1129+
results = await docsearch.asimilarity_search(
1130+
"pets", k=3, filter=[{"term": {"metadata.category": "animals"}}]
1131+
)
1132+
1133+
assert len(results) == 3
1134+
1135+
await docsearch.aclose()

libs/elasticsearch/tests/integration_tests/_sync/test_vectorstores.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,3 +1029,85 @@ def test_elasticsearch_delete_ids(self, es_params: dict, index_name: str) -> Non
10291029
docsearch.delete([ids[3]])
10301030
output = docsearch.similarity_search("gni", k=10)
10311031
assert len(output) == 0
1032+
1033+
@pytest.mark.sync
1034+
def test_num_dimensions_mismatch_and_match(
1035+
self, es_params: dict, index_name: str
1036+
) -> None:
1037+
"""Test that mismatched num_dimensions causes an error."""
1038+
texts = ["foo", "bar"]
1039+
1040+
# Test 1: Mismatch should fail
1041+
with pytest.raises(Exception): # Should fail when trying to add documents
1042+
docsearch = ElasticsearchStore.from_texts(
1043+
texts,
1044+
ConsistentFakeEmbeddings(), # Creates 16-dimensional vectors
1045+
num_dimensions=5, # Mismatch: 5 vs 16
1046+
**es_params,
1047+
index_name=f"{index_name}_mismatch", # Use separate index
1048+
)
1049+
1050+
# Test 2: Match should work
1051+
docsearch = ElasticsearchStore.from_texts(
1052+
texts,
1053+
ConsistentFakeEmbeddings(), # Creates 16-dimensional vectors
1054+
num_dimensions=16, # Match: 16 vs 16
1055+
**es_params,
1056+
index_name=f"{index_name}_match", # Use separate index
1057+
)
1058+
1059+
# Verify it works by doing a search
1060+
results = docsearch.similarity_search("foo", k=1)
1061+
assert results == [Document(page_content="foo")]
1062+
1063+
docsearch.close()
1064+
1065+
@pytest.mark.sync
1066+
def test_metadata_mappings_integration(
1067+
self, es_params: dict, index_name: str
1068+
) -> None:
1069+
"""Test that metadata_mappings parameter works correctly.
1070+
1071+
This test verifies that custom metadata field mappings are properly applied to
1072+
Elasticsearch index, allowing for proper indexing and searching of metadata.
1073+
"""
1074+
metadata_mappings = {
1075+
"category": {"type": "keyword"},
1076+
"score": {"type": "float"},
1077+
"tags": {"type": "text"},
1078+
}
1079+
1080+
texts = ["Document about cats", "Document about dogs", "Document about birds"]
1081+
metadatas = [
1082+
{"category": "animals", "score": 0.9, "tags": "some tag about cats"},
1083+
{"category": "animals", "score": 0.8, "tags": "some tag about dogs"},
1084+
{"category": "animals", "score": 0.7, "tags": "some tag about birds"},
1085+
]
1086+
1087+
docsearch = ElasticsearchStore.from_texts(
1088+
texts,
1089+
ConsistentFakeEmbeddings(),
1090+
metadatas=metadatas,
1091+
metadata_mappings=metadata_mappings,
1092+
num_dimensions=16,
1093+
**es_params,
1094+
index_name=index_name,
1095+
)
1096+
1097+
mapping_response = docsearch.client.indices.get_mapping(index=index_name)
1098+
mapping_properties = mapping_response[index_name]["mappings"]["properties"]
1099+
1100+
assert "metadata" in mapping_properties
1101+
metadata_props = mapping_properties["metadata"]["properties"]
1102+
1103+
assert metadata_props["category"] == {"type": "keyword"}
1104+
assert metadata_props["score"] == {"type": "float"}
1105+
assert metadata_props["tags"] == {"type": "text"}
1106+
1107+
results = docsearch.similarity_search(
1108+
"pets", k=3, filter=[{"term": {"metadata.category": "animals"}}]
1109+
)
1110+
1111+
assert len(results) == 3
1112+
1113+
docsearch.close()

libs/elasticsearch/tests/unit_tests/_async/test_vectorstores.py

Lines changed: 87 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
"""Test Elasticsearch functionality."""
22

3+
import inspect
34
import re
45
from typing import Any, AsyncGenerator, Dict, List, Optional
5-
from unittest.mock import AsyncMock
6+
from unittest.mock import AsyncMock, patch
67

78
import pytest
89
from elasticsearch import AsyncElasticsearch
10+
from elasticsearch.helpers.vectorstore import VectorStore as EVectorStore
911
from langchain_core.documents import Document
1012

1113
from langchain_elasticsearch._async.vectorstores import (
@@ -417,3 +419,87 @@ async def test_elasticsearch_hybrid_scores_guard(
417419
await hybrid_store.asimilarity_search_by_vector_with_relevance_scores(
418420
[1, 2, 3]
419421
)
422+
423+
@pytest.mark.asyncio
424+
async def test_parameter_forwarding_to_evectorstore(self) -> None:
425+
"""Test to catch missing AsyncEVectorStore parameters.
426+
427+
This test compares the AsyncEVectorStore constructor signature against what
428+
AsyncElasticsearchStore actually forwards. If AsyncEVectorStore adds new
429+
parameters, this test will fail and alert us to update AsyncElasticsearchStore.
430+
"""
431+
432+
client = AsyncElasticsearch(hosts=["http://dummy:9200"])
433+
434+
# Get EVectorStore constructor signature
435+
evectorstore_sig = inspect.signature(EVectorStore.__init__)
436+
# Remove self from the parameters set
437+
evectorstore_params = set(evectorstore_sig.parameters.keys()) - {"self"}
438+
439+
with patch(
440+
"langchain_elasticsearch._async.vectorstores.EVectorStore"
441+
) as mock_evectorstore:
442+
# Mock the close method to be async
443+
mock_evectorstore.return_value.close = AsyncMock()
444+
445+
store = AsyncElasticsearchStore(
446+
index_name="test_index",
447+
es_connection=client,
448+
num_dimensions=1536,
449+
)
450+
451+
# Get what parameters were actually passed to EVectorStore
452+
mock_evectorstore.assert_called_once()
453+
call_args = mock_evectorstore.call_args
454+
forwarded_params = set(call_args.kwargs.keys())
455+
456+
# Check for missing parameters
457+
missing_params = evectorstore_params - forwarded_params
458+
if missing_params:
459+
pytest.fail(
460+
f"AsyncElasticsearchStore is missing these EVectorStore parameters:"
461+
f"{missing_params}. Please add them to AsyncElasticsearchStore "
462+
f"and forward them to EVectorStore."
463+
)
464+
465+
# Check for unexpected parameters
466+
unexpected_params = forwarded_params - evectorstore_params
467+
if unexpected_params:
468+
pytest.fail(
469+
f"AsyncElasticsearchStore is forwarding unexpected parameters to "
470+
f"EVectorStore: {unexpected_params}. These parameters don't exist "
471+
f"in EVectorStore.__init__."
472+
)
473+
474+
await store.aclose()
475+
476+
@pytest.mark.asyncio
477+
async def test_parameter_forwarding_defaults(self) -> None:
478+
"""Test that default parameter values are properly forwarded to
479+
AsyncEVectorStore."""
480+
481+
client = AsyncElasticsearch(hosts=["http://dummy:9200"])
482+
483+
with patch(
484+
"langchain_elasticsearch._async.vectorstores.EVectorStore"
485+
) as mock_evectorstore:
486+
# Mock the close method to be async
487+
mock_evectorstore.return_value.close = AsyncMock()
488+
489+
# Test with minimal parameters (should use defaults)
490+
store = AsyncElasticsearchStore(
491+
index_name="test_index", es_connection=client
492+
)
493+
494+
# Verify EVectorStore was called with default values
495+
mock_evectorstore.assert_called_once()
496+
call_args = mock_evectorstore.call_args
497+
498+
# Check default values
499+
assert call_args.kwargs["index"] == "test_index"
500+
assert call_args.kwargs["client"] == client
501+
assert call_args.kwargs["vector_field"] == "vector" # default
502+
assert call_args.kwargs["text_field"] == "text" # default
503+
assert call_args.kwargs["num_dimensions"] is None # default
504+
505+
await store.aclose()

0 commit comments

Comments
 (0)