Skip to content

Commit 95cf866

Browse files
committed
enhance: support configurable estimation on variable legnth field
Signed-off-by: xiaofanluan <[email protected]>
1 parent 9b4b0cb commit 95cf866

File tree

5 files changed

+213
-9
lines changed

5 files changed

+213
-9
lines changed

configs/milvus.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -846,6 +846,10 @@ common:
846846
defaultPartitionName: _default # Name of the default partition when a collection is created
847847
defaultIndexName: _default_idx # Name of the index when it is created with name unspecified
848848
entityExpiration: -1 # Entity expiration in seconds, CAUTION -1 means never expire
849+
estimate:
850+
varCharLengthLimit: 256 # Maximum length considered per VarChar value when estimating record size
851+
dynamicFieldLengthLimit: 512 # Maximum length considered per JSON/Array/Geometry value when estimating record size
852+
sparseFloatVectorSize: 1200 # Fallback size (bytes) for estimating SparseFloatVector fields
849853
indexSliceSize: 16 # Index slice size in MB
850854
threadCoreCoefficient:
851855
highPriority: 10 # This parameter specify how many times the number of threads is the number of cores in high priority pool

pkg/util/paramtable/component_param.go

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import (
3232
"github.com/milvus-io/milvus/pkg/v2/log"
3333
"github.com/milvus-io/milvus/pkg/v2/util/hardware"
3434
"github.com/milvus-io/milvus/pkg/v2/util/metricsinfo"
35+
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
3536
)
3637

3738
const (
@@ -269,7 +270,10 @@ type commonConfig struct {
269270
TopicNames ParamItem `refreshable:"true"`
270271
TimeTicker ParamItem `refreshable:"true"`
271272

272-
JSONMaxLength ParamItem `refreshable:"false"`
273+
JSONMaxLength ParamItem `refreshable:"false"`
274+
DynamicFieldMaxLength ParamItem `refreshable:"true"`
275+
SparseFloatVectorEstimateSize ParamItem `refreshable:"true"`
276+
VarCharEstimateLengthLimit ParamItem `refreshable:"true"`
273277

274278
MetricsPort ParamItem `refreshable:"false"`
275279

@@ -875,6 +879,45 @@ Large numeric passwords require double quotes to avoid yaml parsing precision is
875879
}
876880
p.JSONMaxLength.Init(base.mgr)
877881

882+
p.DynamicFieldMaxLength = ParamItem{
883+
Key: "common.estimate.dynamicFieldLengthLimit",
884+
Version: "2.5.0",
885+
DefaultValue: fmt.Sprint(typeutil.GetDynamicFieldEstimateLength()),
886+
Doc: "maximum length used when estimating JSON/Array/Geometry field size",
887+
Export: true,
888+
}
889+
p.DynamicFieldMaxLength.Init(base.mgr)
890+
typeutil.SetDynamicFieldEstimateLength(p.DynamicFieldMaxLength.GetAsInt())
891+
base.mgr.Dispatcher.Register(p.DynamicFieldMaxLength.Key, config.NewHandler(p.DynamicFieldMaxLength.Key, func(event *config.Event) {
892+
typeutil.SetDynamicFieldEstimateLength(p.DynamicFieldMaxLength.GetAsInt())
893+
}))
894+
895+
p.SparseFloatVectorEstimateSize = ParamItem{
896+
Key: "common.estimate.sparseFloatVectorSize",
897+
Version: "2.5.0",
898+
DefaultValue: fmt.Sprint(typeutil.GetSparseFloatVectorEstimateLength()),
899+
Doc: "fallback size (bytes) used when estimating sparse float vector fields",
900+
Export: true,
901+
}
902+
p.SparseFloatVectorEstimateSize.Init(base.mgr)
903+
typeutil.SetSparseFloatVectorEstimateLength(p.SparseFloatVectorEstimateSize.GetAsInt())
904+
base.mgr.Dispatcher.Register(p.SparseFloatVectorEstimateSize.Key, config.NewHandler(p.SparseFloatVectorEstimateSize.Key, func(event *config.Event) {
905+
typeutil.SetSparseFloatVectorEstimateLength(p.SparseFloatVectorEstimateSize.GetAsInt())
906+
}))
907+
908+
p.VarCharEstimateLengthLimit = ParamItem{
909+
Key: "common.estimate.varCharLengthLimit",
910+
Version: "2.5.0",
911+
DefaultValue: fmt.Sprint(typeutil.GetVarCharEstimateLength()),
912+
Doc: "maximum length used when estimating VarChar field size",
913+
Export: true,
914+
}
915+
p.VarCharEstimateLengthLimit.Init(base.mgr)
916+
typeutil.SetVarCharEstimateLength(p.VarCharEstimateLengthLimit.GetAsInt())
917+
base.mgr.Dispatcher.Register(p.VarCharEstimateLengthLimit.Key, config.NewHandler(p.VarCharEstimateLengthLimit.Key, func(event *config.Event) {
918+
typeutil.SetVarCharEstimateLength(p.VarCharEstimateLengthLimit.GetAsInt())
919+
}))
920+
878921
p.MetricsPort = ParamItem{
879922
Key: "common.MetricsPort",
880923
Version: "2.3.0",
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
// Licensed to the LF AI & Data foundation under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing, software
12+
// distributed under the License is distributed on an "AS IS" BASIS,
13+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
// See the License for the specific language governing permissions and
15+
// limitations under the License.
16+
17+
package typeutil
18+
19+
import "sync/atomic"
20+
21+
const (
22+
defaultVarCharEstimateLength = 256
23+
defaultDynamicFieldEstimateLength = 512
24+
defaultSparseFloatEstimateLength = 1200
25+
)
26+
27+
var (
28+
varCharEstimateLength atomic.Int64
29+
dynamicFieldEstimateLength atomic.Int64
30+
sparseEstimateLength atomic.Int64
31+
)
32+
33+
func init() {
34+
SetVarCharEstimateLength(defaultVarCharEstimateLength)
35+
SetDynamicFieldEstimateLength(defaultDynamicFieldEstimateLength)
36+
SetSparseFloatVectorEstimateLength(defaultSparseFloatEstimateLength)
37+
}
38+
39+
// SetVarCharEstimateLength updates the global cap applied when estimating record sizes for VarChar fields.
40+
func SetVarCharEstimateLength(length int) {
41+
if length <= 0 {
42+
length = defaultVarCharEstimateLength
43+
}
44+
varCharEstimateLength.Store(int64(length))
45+
}
46+
47+
// GetVarCharEstimateLength returns the current cap used when estimating VarChar field sizes.
48+
func GetVarCharEstimateLength() int {
49+
length := int(varCharEstimateLength.Load())
50+
if length <= 0 {
51+
return defaultVarCharEstimateLength
52+
}
53+
return length
54+
}
55+
56+
// SetDynamicFieldEstimateLength updates the global cap used for dynamic fields (JSON/Array/Geometry).
57+
func SetDynamicFieldEstimateLength(length int) {
58+
if length <= 0 {
59+
length = defaultDynamicFieldEstimateLength
60+
}
61+
dynamicFieldEstimateLength.Store(int64(length))
62+
}
63+
64+
// GetDynamicFieldEstimateLength returns the current cap for dynamic fields.
65+
func GetDynamicFieldEstimateLength() int {
66+
length := int(dynamicFieldEstimateLength.Load())
67+
if length <= 0 {
68+
return defaultDynamicFieldEstimateLength
69+
}
70+
return length
71+
}
72+
73+
// SetSparseFloatVectorEstimateLength updates the fallback size used when estimating sparse float vector fields.
74+
func SetSparseFloatVectorEstimateLength(length int) {
75+
if length <= 0 {
76+
length = defaultSparseFloatEstimateLength
77+
}
78+
sparseEstimateLength.Store(int64(length))
79+
}
80+
81+
// GetSparseFloatVectorEstimateLength returns the current fallback size used for sparse float vector fields.
82+
func GetSparseFloatVectorEstimateLength() int {
83+
length := int(sparseEstimateLength.Load())
84+
if length <= 0 {
85+
return defaultSparseFloatEstimateLength
86+
}
87+
return length
88+
}

pkg/util/typeutil/schema.go

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,6 @@ import (
3737
"github.com/milvus-io/milvus/pkg/v2/log"
3838
)
3939

40-
const DynamicFieldMaxLength = 512
41-
4240
type getVariableFieldLengthPolicy int
4341

4442
const (
@@ -75,16 +73,17 @@ func getVarFieldLength(fieldSchema *schemapb.FieldSchema, policy getVariableFiel
7573
// TODO this is a hack and may not accurate, we should rely on estimate size per record
7674
// However we should report size and datacoord calculate based on size
7775
// https://github.com/milvus-io/milvus/issues/17687
78-
if maxLength > 256 {
79-
return 256, nil
76+
estimateLimit := GetVarCharEstimateLength()
77+
if maxLength > estimateLimit {
78+
return estimateLimit, nil
8079
}
8180
return maxLength, nil
8281
default:
8382
return 0, fmt.Errorf("unrecognized getVariableFieldLengthPolicy %v", policy)
8483
}
85-
// geometry field max length now consider the same as json field, which is 512 bytes
84+
// geometry field max length now consider the same as json field, which is 512 bytes
8685
case schemapb.DataType_Array, schemapb.DataType_JSON, schemapb.DataType_Geometry:
87-
return DynamicFieldMaxLength, nil
86+
return GetDynamicFieldEstimateLength(), nil
8887
default:
8988
return 0, fmt.Errorf("field %s is not a variable-length type", fieldSchema.DataType.String())
9089
}
@@ -160,7 +159,7 @@ func estimateSizeBy(schema *schemapb.CollectionSchema, policy getVariableFieldLe
160159
// varies depending on the number of non-zeros. Using sparse vector
161160
// generated by SPLADE as reference and returning size of a sparse
162161
// vector with 150 non-zeros.
163-
res += 1200
162+
res += GetSparseFloatVectorEstimateLength()
164163
}
165164
}
166165
return res, nil

pkg/util/typeutil/schema_test.go

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,11 +177,81 @@ func TestSchema(t *testing.T) {
177177
}
178178

179179
t.Run("EstimateSizePerRecord", func(t *testing.T) {
180+
limit := GetDynamicFieldEstimateLength()
180181
size, err := EstimateSizePerRecord(schema)
181-
assert.Equal(t, 680+DynamicFieldMaxLength*4, size)
182+
assert.Equal(t, 680+limit*4, size)
182183
assert.NoError(t, err)
183184
})
184185

186+
t.Run("VarCharEstimateLengthLimit", func(t *testing.T) {
187+
originalLimit := GetVarCharEstimateLength()
188+
t.Cleanup(func() { SetVarCharEstimateLength(originalLimit) })
189+
190+
field := &schemapb.FieldSchema{
191+
DataType: schemapb.DataType_VarChar,
192+
TypeParams: []*commonpb.KeyValuePair{
193+
{
194+
Key: common.MaxLengthKey,
195+
Value: "1024",
196+
},
197+
},
198+
}
199+
200+
SetVarCharEstimateLength(128)
201+
length, err := getVarFieldLength(field, custom)
202+
assert.NoError(t, err)
203+
assert.Equal(t, 128, length)
204+
205+
SetVarCharEstimateLength(4096)
206+
length, err = getVarFieldLength(field, custom)
207+
assert.NoError(t, err)
208+
assert.Equal(t, 1024, length)
209+
})
210+
211+
t.Run("DynamicFieldMaxLengthLimit", func(t *testing.T) {
212+
originalLimit := GetDynamicFieldEstimateLength()
213+
t.Cleanup(func() { SetDynamicFieldEstimateLength(originalLimit) })
214+
215+
field := &schemapb.FieldSchema{
216+
DataType: schemapb.DataType_JSON,
217+
}
218+
219+
SetDynamicFieldEstimateLength(2048)
220+
length, err := getVarFieldLength(field, custom)
221+
assert.NoError(t, err)
222+
assert.Equal(t, 2048, length)
223+
224+
SetDynamicFieldEstimateLength(128)
225+
length, err = getVarFieldLength(field, custom)
226+
assert.NoError(t, err)
227+
assert.Equal(t, 128, length)
228+
})
229+
230+
t.Run("SparseFloatVectorEstimateSize", func(t *testing.T) {
231+
original := GetSparseFloatVectorEstimateLength()
232+
t.Cleanup(func() { SetSparseFloatVectorEstimateLength(original) })
233+
234+
schemaWithSparse := &schemapb.CollectionSchema{
235+
Fields: []*schemapb.FieldSchema{
236+
{
237+
FieldID: 1200,
238+
Name: "sparse",
239+
DataType: schemapb.DataType_SparseFloatVector,
240+
},
241+
},
242+
}
243+
244+
SetSparseFloatVectorEstimateLength(2048)
245+
size, err := EstimateSizePerRecord(schemaWithSparse)
246+
assert.NoError(t, err)
247+
assert.Equal(t, 2048, size)
248+
249+
SetSparseFloatVectorEstimateLength(64)
250+
size, err = EstimateSizePerRecord(schemaWithSparse)
251+
assert.NoError(t, err)
252+
assert.Equal(t, 64, size)
253+
})
254+
185255
t.Run("SchemaHelper", func(t *testing.T) {
186256
_, err := CreateSchemaHelper(nil)
187257
assert.Error(t, err)

0 commit comments

Comments
 (0)