Skip to content

Commit 7d87ca2

Browse files
Add Hybrid Cardinality collector to prioritize Ordinals Collector (#19524) (#20208)
* Add Hybrid Cardinality collector to prioritize Ordinals Collector Current cardinality aggregator logic selects DirectCollector over OrdinalsCollector when relative memory overhead due to OrdinalsCollector (compared to DirectCollector) is higher. Because of this relative memory consumption logic, DirectCollector is selected for high cardinality aggregation queries. DirectCollector is slower compared to OrdinalsCollector. This default selection leads to higher search latency even when Opensearch process have available memory to use ordinals collector for faster query performance. There is no way to figure out memory requirement for nested aggregation because number of buckets are dynamically created as we traverse through all the matching document ids. To overcome this limitation, this change create a hybrid collector which will first use Ordinals Collector and will switch to DirectCollector if memory usage for Ordinals Collector Increase beyond certain threshold. When Hybrid collector switch from Ordinals Collector to Direct Collector, it will utilize already computed aggregation data from Ordinals Collector so that we do not have to rebuild aggregation result using Direct Collector. * Address PR comments * Address PR comments * Fix UTs --------- (cherry picked from commit 710d02f) Signed-off-by: Anand Pravinbhai Patel <[email protected]> Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent 3e83e79 commit 7d87ca2

File tree

10 files changed

+484
-11
lines changed

10 files changed

+484
-11
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
3737
- Add search API tracker ([#18601](https://github.com/opensearch-project/OpenSearch/pull/18601))
3838
- Support dynamic consumer configuration update in pull-based ingestion ([#19963](https://github.com/opensearch-project/OpenSearch/pull/19963))
3939
- Cache the `StoredFieldsReader` for scroll query optimization ([#20112](https://github.com/opensearch-project/OpenSearch/pull/20112))
40+
- Add Hybrid Cardinality collector to prioritize Ordinals Collector ([#19524](https://github.com/opensearch-project/OpenSearch/pull/19524))
4041

4142
### Changed
4243
- Combining filter rewrite and skip list to optimize sub aggregation([#19573](https://github.com/opensearch-project/OpenSearch/pull/19573))

rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/170_cardinality_metric.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -268,8 +268,8 @@ setup:
268268
---
269269
"profiler string":
270270
- skip:
271-
version: " - 7.99.99"
272-
reason: new info added in 8.0.0 to be backported to 7.10.0
271+
version: " - 3.3.99"
272+
reason: hybrid collector added in 3.4.0
273273
- do:
274274
search:
275275
body:
@@ -287,6 +287,7 @@ setup:
287287
- gt: { profile.shards.0.aggregations.0.breakdown.post_collection: 0 }
288288
- match: { profile.shards.0.aggregations.0.debug.empty_collectors_used: 0 }
289289
- match: { profile.shards.0.aggregations.0.debug.numeric_collectors_used: 0 }
290-
- gt: { profile.shards.0.aggregations.0.debug.ordinals_collectors_used: 0 }
290+
- match: { profile.shards.0.aggregations.0.debug.ordinals_collectors_used: 0 }
291291
- match: { profile.shards.0.aggregations.0.debug.ordinals_collectors_overhead_too_high: 0 }
292292
- match: { profile.shards.0.aggregations.0.debug.string_hashing_collectors_used: 0 }
293+
- gt: { profile.shards.0.aggregations.0.debug.hybrid_collectors_used: 0 }

rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/170_cardinality_metric_unsigned.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,9 @@ setup:
265265

266266
---
267267
"profiler string":
268+
- skip:
269+
version: " - 3.3.99"
270+
reason: hybrid collector added in 3.4.0
268271
- do:
269272
search:
270273
body:
@@ -282,6 +285,7 @@ setup:
282285
- gt: { profile.shards.0.aggregations.0.breakdown.post_collection: 0 }
283286
- match: { profile.shards.0.aggregations.0.debug.empty_collectors_used: 0 }
284287
- match: { profile.shards.0.aggregations.0.debug.numeric_collectors_used: 0 }
285-
- gt: { profile.shards.0.aggregations.0.debug.ordinals_collectors_used: 0 }
288+
- match: { profile.shards.0.aggregations.0.debug.ordinals_collectors_used: 0 }
286289
- match: { profile.shards.0.aggregations.0.debug.ordinals_collectors_overhead_too_high: 0 }
287290
- match: { profile.shards.0.aggregations.0.debug.string_hashing_collectors_used: 0 }
291+
- gt: { profile.shards.0.aggregations.0.debug.hybrid_collectors_used: 0 }

server/src/main/java/org/opensearch/common/settings/ClusterSettings.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@
162162
import org.opensearch.script.ScriptService;
163163
import org.opensearch.search.SearchService;
164164
import org.opensearch.search.aggregations.MultiBucketConsumerService;
165+
import org.opensearch.search.aggregations.metrics.CardinalityAggregator;
165166
import org.opensearch.search.backpressure.settings.NodeDuressSettings;
166167
import org.opensearch.search.backpressure.settings.SearchBackpressureSettings;
167168
import org.opensearch.search.backpressure.settings.SearchShardTaskSettings;
@@ -589,6 +590,8 @@ public void apply(Settings value, Settings current, Settings previous) {
589590
SearchService.SEARCH_MAX_QUERY_STRING_LENGTH,
590591
SearchService.SEARCH_MAX_QUERY_STRING_LENGTH_MONITOR_ONLY,
591592
SearchService.CARDINALITY_AGGREGATION_PRUNING_THRESHOLD,
593+
CardinalityAggregator.CARDINALITY_AGGREGATION_HYBRID_COLLECTOR_ENABLED,
594+
CardinalityAggregator.CARDINALITY_AGGREGATION_HYBRID_COLLECTOR_MEMORY_THRESHOLD,
592595
SearchService.KEYWORD_INDEX_OR_DOC_VALUES_ENABLED,
593596
CreatePitController.PIT_INIT_KEEP_ALIVE,
594597
Node.WRITE_PORTS_FILE_SETTING,

server/src/main/java/org/opensearch/search/DefaultSearchContext.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
import org.opensearch.common.settings.Settings;
5656
import org.opensearch.common.unit.TimeValue;
5757
import org.opensearch.common.util.BigArrays;
58+
import org.opensearch.core.common.unit.ByteSizeValue;
5859
import org.opensearch.core.concurrency.OpenSearchRejectedExecutionException;
5960
import org.opensearch.index.IndexService;
6061
import org.opensearch.index.IndexSettings;
@@ -75,6 +76,8 @@
7576
import org.opensearch.search.aggregations.BucketCollectorProcessor;
7677
import org.opensearch.search.aggregations.InternalAggregation;
7778
import org.opensearch.search.aggregations.SearchContextAggregations;
79+
import org.opensearch.search.aggregations.metrics.CardinalityAggregationContext;
80+
import org.opensearch.search.aggregations.metrics.CardinalityAggregator;
7881
import org.opensearch.search.builder.SearchSourceBuilder;
7982
import org.opensearch.search.collapse.CollapseContext;
8083
import org.opensearch.search.deciders.ConcurrentSearchDecision;
@@ -220,6 +223,7 @@ final class DefaultSearchContext extends SearchContext {
220223
private final int maxAggRewriteFilters;
221224
private final int filterRewriteSegmentThreshold;
222225
private final int cardinalityAggregationPruningThreshold;
226+
private final CardinalityAggregationContext cardinalityAggregationContext;
223227
private final int bucketSelectionStrategyFactor;
224228
private final boolean keywordIndexOrDocValuesEnabled;
225229

@@ -287,6 +291,7 @@ final class DefaultSearchContext extends SearchContext {
287291
this.maxAggRewriteFilters = evaluateFilterRewriteSetting();
288292
this.filterRewriteSegmentThreshold = evaluateAggRewriteFilterSegThreshold();
289293
this.cardinalityAggregationPruningThreshold = evaluateCardinalityAggregationPruningThreshold();
294+
this.cardinalityAggregationContext = evaluateCardinalityAggregationContext();
290295
this.bucketSelectionStrategyFactor = evaluateBucketSelectionStrategyFactor();
291296
this.concurrentSearchDeciderFactories = concurrentSearchDeciderFactories;
292297
this.keywordIndexOrDocValuesEnabled = evaluateKeywordIndexOrDocValuesEnabled();
@@ -1238,6 +1243,11 @@ public int cardinalityAggregationPruningThreshold() {
12381243
return cardinalityAggregationPruningThreshold;
12391244
}
12401245

1246+
@Override
1247+
public CardinalityAggregationContext cardinalityAggregationContext() {
1248+
return cardinalityAggregationContext;
1249+
}
1250+
12411251
@Override
12421252
public int bucketSelectionStrategyFactor() {
12431253
return bucketSelectionStrategyFactor;
@@ -1255,6 +1265,17 @@ private int evaluateCardinalityAggregationPruningThreshold() {
12551265
return 0;
12561266
}
12571267

1268+
private CardinalityAggregationContext evaluateCardinalityAggregationContext() {
1269+
if (clusterService != null) {
1270+
boolean hybridCollectorEnabled = clusterService.getClusterSettings()
1271+
.get(CardinalityAggregator.CARDINALITY_AGGREGATION_HYBRID_COLLECTOR_ENABLED);
1272+
ByteSizeValue memoryThreshold = clusterService.getClusterSettings()
1273+
.get(CardinalityAggregator.CARDINALITY_AGGREGATION_HYBRID_COLLECTOR_MEMORY_THRESHOLD);
1274+
return CardinalityAggregationContext.from(hybridCollectorEnabled, memoryThreshold);
1275+
}
1276+
return new CardinalityAggregationContext(false, Runtime.getRuntime().maxMemory() / 100);
1277+
}
1278+
12581279
private int evaluateBucketSelectionStrategyFactor() {
12591280
if (clusterService != null) {
12601281
return clusterService.getClusterSettings().get(BUCKET_SELECTION_STRATEGY_FACTOR_SETTING);
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.search.aggregations.metrics;
10+
11+
import org.opensearch.common.annotation.PublicApi;
12+
import org.opensearch.core.common.unit.ByteSizeValue;
13+
14+
/**
15+
* Context object that encapsulates all cardinality aggregation related settings and configurations.
16+
* This helps keep cardinality-specific settings scoped properly and reduces SearchContext bloat.
17+
*/
18+
@PublicApi(since = "3.4.0")
19+
public class CardinalityAggregationContext {
20+
private final boolean hybridCollectorEnabled;
21+
private final long memoryThreshold;
22+
23+
public CardinalityAggregationContext(boolean hybridCollectorEnabled, long memoryThreshold) {
24+
this.hybridCollectorEnabled = hybridCollectorEnabled;
25+
this.memoryThreshold = memoryThreshold;
26+
}
27+
28+
public boolean isHybridCollectorEnabled() {
29+
return hybridCollectorEnabled;
30+
}
31+
32+
public long getMemoryThreshold() {
33+
return memoryThreshold;
34+
}
35+
36+
/**
37+
* Creates a CardinalityAggregationContext from cluster settings
38+
*/
39+
public static CardinalityAggregationContext from(boolean hybridCollectorEnabled, ByteSizeValue memoryThreshold) {
40+
return new CardinalityAggregationContext(hybridCollectorEnabled, memoryThreshold.getBytes());
41+
}
42+
}

0 commit comments

Comments
 (0)