Skip to content

Commit fb9de59

Browse files
ssh-eshssh-esh
andauthored
Update testing suite (#73)
* update python versions for testing * create testing matrix for ES versions * update python versions for _lint workflow * update python versions for unit test workflow * make fail fast false in integeration and unit test workflows * make fake_embeddings less uniform for quantization * introduce stable hash embeddings * allows test_similarity_search_without_metadata to use stable hash embeddings * use ruff to fix lint errors * fix some more linting errors * use stable hash embeddings on test_add_embeddings * make relevance score tests use stable hash embeddings * create a tolerance on score assertion for test_elasticsearch_with_relevance_score * fix linting errors * use stableHashEmbeddings on test_similarity_search_approx_by_vector * create tolerance on score assertion for test_similarity_search_approx_by_vector * debug: rank_window_size breaking change * decide window_key based on ES version for async rrf hybrid search * decide window_key based on ES version for sync rrf hybrid search * fix lint errors * Add comment for StableHashEmbeddings * improve code comments * fix lint * add type annotation for stable hash embeddings * update comments * review comment: update consistent embeddings to hash based * fix lint * review comment: change to 16-dim vector, 2 d.p and assert full query body * review comment: min ES version is 8.15 in test matrix * review comment: update rank_window_size * linting * review comments: update comments --------- Co-authored-by: ssh-esh <[email protected]>
1 parent 83ef833 commit fb9de59

File tree

8 files changed

+140
-61
lines changed

8 files changed

+140
-61
lines changed

.github/workflows/_integration_test.yml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,21 @@ jobs:
2525
working-directory: ${{ inputs.working-directory }}
2626
runs-on: ubuntu-latest
2727
strategy:
28+
fail-fast: false
2829
matrix:
2930
python-version:
3031
- "3.9"
3132
- "3.10"
3233
- "3.11"
34+
- "3.12"
35+
- "3.13"
36+
elasticsearch-version:
37+
- "8.15.0"
38+
- "8.19.0"
39+
- "9.1.2"
3340
services:
3441
elasticsearch:
35-
image: elasticsearch:8.13.0
42+
image: elasticsearch:${{ matrix.elasticsearch-version }}
3643
env:
3744
discovery.type: single-node
3845
xpack.license.self_generated.type: trial

.github/workflows/_lint.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ jobs:
2929
# Starting new jobs is also relatively slow,
3030
# so linting on fewer versions makes CI faster.
3131
python-version:
32-
- "3.8"
33-
- "3.11"
32+
- "3.9"
33+
- "3.13"
3434
steps:
3535
- uses: actions/checkout@v4
3636

.github/workflows/_test.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,14 @@ jobs:
1818
working-directory: ${{ inputs.working-directory }}
1919
runs-on: ubuntu-latest
2020
strategy:
21+
fail-fast: false
2122
matrix:
2223
python-version:
23-
- "3.8"
2424
- "3.9"
2525
- "3.10"
2626
- "3.11"
27+
- "3.12"
28+
- "3.13"
2729
name: "make test #${{ matrix.python-version }}"
2830
steps:
2931
- uses: actions/checkout@v4
Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Fake Embedding class for testing purposes."""
22

3+
import hashlib
34
from typing import List
45

56
from langchain_core.embeddings import Embeddings
@@ -24,26 +25,33 @@ async def aembed_query(self, text: str) -> List[float]:
2425

2526

2627
class AsyncConsistentFakeEmbeddings(AsyncFakeEmbeddings):
27-
"""Fake embeddings which remember all the texts seen so far to return consistent
28-
vectors for the same texts."""
29-
30-
def __init__(self, dimensionality: int = 10) -> None:
31-
self.known_texts: List[str] = []
32-
self.dimensionality = dimensionality
28+
"""Deterministic hash-based embeddings for robust testing (async version).
29+
30+
Why:
31+
- Elasticsearch 8.14+ indexes dense vectors with int8_hnsw by default.
32+
Quantization (int8) + HNSW ANN can slightly disturb scores/ranking
33+
especially when vectors are nearly identical.
34+
- Tests need deterministic separation so small quantization/ANN
35+
effects do not flip top-1 results or break strict assertions.
36+
37+
What:
38+
- Produce a 16-dim vector from md5(text), convert to floats, then L1-normalize
39+
so values sum to 1.0. Round to 2 decimal places for precision stability.
40+
This gives stable, well-separated but deterministic vectors which will work
41+
across ES versions.
42+
"""
43+
44+
@staticmethod
45+
def _encode(text: str) -> List[float]:
46+
digest = hashlib.md5(text.encode("utf-8")).digest()
47+
total = sum(digest)
48+
# Round to 2 decimal places to avoid precision issues
49+
return [round(float(v) / float(total), 2) for v in digest]
3350

3451
async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
35-
"""Return consistent embeddings for each text seen so far."""
36-
out_vectors = []
37-
for text in texts:
38-
if text not in self.known_texts:
39-
self.known_texts.append(text)
40-
vector = [float(1.0)] * (self.dimensionality - 1) + [
41-
float(self.known_texts.index(text))
42-
]
43-
out_vectors.append(vector)
44-
return out_vectors
52+
"""Return stable hash-based embeddings for each text."""
53+
return [self._encode(text) for text in texts]
4554

4655
async def aembed_query(self, text: str) -> List[float]:
47-
"""Return consistent embeddings for the text, if seen before, or a constant
48-
one if the text is unknown."""
49-
return (await self.aembed_documents([text]))[0]
56+
"""Return stable hash-based embeddings for the text."""
57+
return self._encode(text)
Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Fake Embedding class for testing purposes."""
22

3+
import hashlib
34
from typing import List
45

56
from langchain_core.embeddings import Embeddings
@@ -24,26 +25,33 @@ def embed_query(self, text: str) -> List[float]:
2425

2526

2627
class ConsistentFakeEmbeddings(FakeEmbeddings):
27-
"""Fake embeddings which remember all the texts seen so far to return consistent
28-
vectors for the same texts."""
29-
30-
def __init__(self, dimensionality: int = 10) -> None:
31-
self.known_texts: List[str] = []
32-
self.dimensionality = dimensionality
28+
"""Deterministic hash-based embeddings for robust testing (sync version).
29+
30+
Why:
31+
- Elasticsearch 8.14+ indexes dense vectors with int8_hnsw by default.
32+
Quantization (int8) + HNSW ANN can slightly disturb scores/ranking
33+
especially when vectors are nearly identical.
34+
- Tests need deterministic separation so small quantization/ANN
35+
effects do not flip top-1 results or break strict assertions.
36+
37+
What:
38+
- Produce a 16-dim vector from md5(text), convert to floats, then L1-normalize
39+
so values sum to 1.0. Round to 2 decimal places for precision stability.
40+
This gives stable, well-separated but deterministic vectors which will work
41+
across ES versions.
42+
"""
43+
44+
@staticmethod
45+
def _encode(text: str) -> List[float]:
46+
digest = hashlib.md5(text.encode("utf-8")).digest()
47+
total = sum(digest)
48+
# Round to 2 decimal places to avoid precision issues
49+
return [round(float(v) / float(total), 2) for v in digest]
3350

3451
def embed_documents(self, texts: List[str]) -> List[List[float]]:
35-
"""Return consistent embeddings for each text seen so far."""
36-
out_vectors = []
37-
for text in texts:
38-
if text not in self.known_texts:
39-
self.known_texts.append(text)
40-
vector = [float(1.0)] * (self.dimensionality - 1) + [
41-
float(self.known_texts.index(text))
42-
]
43-
out_vectors.append(vector)
44-
return out_vectors
52+
"""Return stable hash-based embeddings for each text."""
53+
return [self._encode(text) for text in texts]
4554

4655
def embed_query(self, text: str) -> List[float]:
47-
"""Return consistent embeddings for the text, if seen before, or a constant
48-
one if the text is unknown."""
49-
return (self.embed_documents([text]))[0]
56+
"""Return stable hash-based embeddings for the text."""
57+
return self._encode(text)

libs/elasticsearch/tests/fake_embeddings.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
from ._async.fake_embeddings import (
66
AsyncConsistentFakeEmbeddings as _AsyncConsistentFakeEmbeddings,
77
)
8-
from ._async.fake_embeddings import AsyncFakeEmbeddings as _AsyncFakeEmbeddings
8+
from ._async.fake_embeddings import (
9+
AsyncFakeEmbeddings as _AsyncFakeEmbeddings,
10+
)
911
from ._sync.fake_embeddings import ( # noqa: F401
1012
ConsistentFakeEmbeddings,
1113
FakeEmbeddings,

libs/elasticsearch/tests/integration_tests/_async/test_vectorstores.py

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@
1010

1111
from langchain_elasticsearch.vectorstores import AsyncElasticsearchStore
1212

13-
from ...fake_embeddings import AsyncConsistentFakeEmbeddings, AsyncFakeEmbeddings
13+
from ...fake_embeddings import (
14+
AsyncConsistentFakeEmbeddings,
15+
AsyncFakeEmbeddings,
16+
)
1417
from ._test_utilities import clear_test_indices, create_es_client, read_env
1518

1619
logging.basicConfig(level=logging.DEBUG)
@@ -172,15 +175,32 @@ def assert_query(
172175
"filter": [],
173176
"k": 1,
174177
"num_candidates": 50,
175-
"query_vector": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0],
178+
"query_vector": [
179+
0.06,
180+
0.07,
181+
0.01,
182+
0.08,
183+
0.03,
184+
0.07,
185+
0.09,
186+
0.03,
187+
0.09,
188+
0.09,
189+
0.04,
190+
0.03,
191+
0.08,
192+
0.07,
193+
0.06,
194+
0.08,
195+
],
176196
}
177197
}
178198
return query_body
179199

180200
texts = ["foo", "bar", "baz"]
181201
docsearch = await AsyncElasticsearchStore.afrom_texts(
182202
texts,
183-
AsyncFakeEmbeddings(),
203+
AsyncConsistentFakeEmbeddings(),
184204
**es_params,
185205
index_name=index_name,
186206
)
@@ -597,7 +617,10 @@ def assert_query(
597617
k=1,
598618
custom_query=assert_query,
599619
)
600-
assert output == [(Document(page_content="foo"), 1.0)]
620+
doc, score = output[0]
621+
622+
assert doc == Document(page_content="foo")
623+
assert score == pytest.approx(1.0, rel=0.05)
601624

602625
@pytest.mark.asyncio
603626
async def test_similarity_search_approx_with_hybrid_search_rrf(
@@ -610,7 +633,7 @@ async def test_similarity_search_approx_with_hybrid_search_rrf(
610633
rrf_test_cases: List[Optional[Union[dict, bool]]] = [
611634
True,
612635
False,
613-
{"rank_constant": 1, "window_size": 5},
636+
{"rank_constant": 1, "rank_window_size": 5},
614637
]
615638
for rrf_test_case in rrf_test_cases:
616639
texts = ["foo", "bar", "baz"]
@@ -687,7 +710,7 @@ def assert_query(
687710
"query_vector": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0],
688711
},
689712
size=3,
690-
rank={"rrf": {"rank_constant": 1, "window_size": 5}},
713+
rank={"rrf": {"rank_constant": 1, "rank_window_size": 5}},
691714
)
692715

693716
assert [o.page_content for o in output] == [
@@ -748,7 +771,7 @@ async def test_deployed_model_check_fails_approx(
748771
with pytest.raises(NotFoundError):
749772
await AsyncElasticsearchStore.afrom_texts(
750773
texts=["foo", "bar", "baz"],
751-
embedding=AsyncConsistentFakeEmbeddings(10),
774+
embedding=AsyncConsistentFakeEmbeddings(),
752775
**es_params,
753776
index_name=index_name,
754777
strategy=AsyncElasticsearchStore.ApproxRetrievalStrategy(
@@ -778,7 +801,7 @@ async def test_elasticsearch_with_relevance_score(
778801
"""Test to make sure the relevance score is scaled to 0-1."""
779802
texts = ["foo", "bar", "baz"]
780803
metadatas = [{"page": str(i)} for i in range(len(texts))]
781-
embeddings = AsyncFakeEmbeddings()
804+
embeddings = AsyncConsistentFakeEmbeddings()
782805

783806
docsearch = await AsyncElasticsearchStore.afrom_texts(
784807
index_name=index_name,
@@ -792,7 +815,10 @@ async def test_elasticsearch_with_relevance_score(
792815
output = await docsearch.asimilarity_search_by_vector_with_relevance_scores(
793816
embedding=embedded_query, k=1
794817
)
795-
assert output == [(Document(page_content="foo", metadata={"page": "0"}), 1.0)]
818+
doc, score = output[0]
819+
820+
assert doc == Document(page_content="foo", metadata={"page": "0"})
821+
assert score == pytest.approx(1.0, rel=0.05)
796822

797823
@pytest.mark.asyncio
798824
async def test_similarity_search_bm25_search(

libs/elasticsearch/tests/integration_tests/_sync/test_vectorstores.py

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@
1010

1111
from langchain_elasticsearch.vectorstores import ElasticsearchStore
1212

13-
from ...fake_embeddings import ConsistentFakeEmbeddings, FakeEmbeddings
13+
from ...fake_embeddings import (
14+
ConsistentFakeEmbeddings,
15+
FakeEmbeddings,
16+
)
1417
from ._test_utilities import clear_test_indices, create_es_client, read_env
1518

1619
logging.basicConfig(level=logging.DEBUG)
@@ -172,15 +175,32 @@ def assert_query(
172175
"filter": [],
173176
"k": 1,
174177
"num_candidates": 50,
175-
"query_vector": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0],
178+
"query_vector": [
179+
0.06,
180+
0.07,
181+
0.01,
182+
0.08,
183+
0.03,
184+
0.07,
185+
0.09,
186+
0.03,
187+
0.09,
188+
0.09,
189+
0.04,
190+
0.03,
191+
0.08,
192+
0.07,
193+
0.06,
194+
0.08,
195+
],
176196
}
177197
}
178198
return query_body
179199

180200
texts = ["foo", "bar", "baz"]
181201
docsearch = ElasticsearchStore.from_texts(
182202
texts,
183-
FakeEmbeddings(),
203+
ConsistentFakeEmbeddings(),
184204
**es_params,
185205
index_name=index_name,
186206
)
@@ -581,7 +601,10 @@ def assert_query(
581601
k=1,
582602
custom_query=assert_query,
583603
)
584-
assert output == [(Document(page_content="foo"), 1.0)]
604+
doc, score = output[0]
605+
606+
assert doc == Document(page_content="foo")
607+
assert score == pytest.approx(1.0, rel=0.05)
585608

586609
@pytest.mark.sync
587610
def test_similarity_search_approx_with_hybrid_search_rrf(
@@ -594,7 +617,7 @@ def test_similarity_search_approx_with_hybrid_search_rrf(
594617
rrf_test_cases: List[Optional[Union[dict, bool]]] = [
595618
True,
596619
False,
597-
{"rank_constant": 1, "window_size": 5},
620+
{"rank_constant": 1, "rank_window_size": 5},
598621
]
599622
for rrf_test_case in rrf_test_cases:
600623
texts = ["foo", "bar", "baz"]
@@ -671,7 +694,7 @@ def assert_query(
671694
"query_vector": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0],
672695
},
673696
size=3,
674-
rank={"rrf": {"rank_constant": 1, "window_size": 5}},
697+
rank={"rrf": {"rank_constant": 1, "rank_window_size": 5}},
675698
)
676699

677700
assert [o.page_content for o in output] == [
@@ -730,7 +753,7 @@ def test_deployed_model_check_fails_approx(
730753
with pytest.raises(NotFoundError):
731754
ElasticsearchStore.from_texts(
732755
texts=["foo", "bar", "baz"],
733-
embedding=ConsistentFakeEmbeddings(10),
756+
embedding=ConsistentFakeEmbeddings(),
734757
**es_params,
735758
index_name=index_name,
736759
strategy=ElasticsearchStore.ApproxRetrievalStrategy(
@@ -760,7 +783,7 @@ def test_elasticsearch_with_relevance_score(
760783
"""Test to make sure the relevance score is scaled to 0-1."""
761784
texts = ["foo", "bar", "baz"]
762785
metadatas = [{"page": str(i)} for i in range(len(texts))]
763-
embeddings = FakeEmbeddings()
786+
embeddings = ConsistentFakeEmbeddings()
764787

765788
docsearch = ElasticsearchStore.from_texts(
766789
index_name=index_name,
@@ -774,7 +797,10 @@ def test_elasticsearch_with_relevance_score(
774797
output = docsearch.similarity_search_by_vector_with_relevance_scores(
775798
embedding=embedded_query, k=1
776799
)
777-
assert output == [(Document(page_content="foo", metadata={"page": "0"}), 1.0)]
800+
doc, score = output[0]
801+
802+
assert doc == Document(page_content="foo", metadata={"page": "0"})
803+
assert score == pytest.approx(1.0, rel=0.05)
778804

779805
@pytest.mark.sync
780806
def test_similarity_search_bm25_search(

0 commit comments

Comments
 (0)