diff --git a/mysql-test/main/vector_ef_construction.result b/mysql-test/main/vector_ef_construction.result new file mode 100644 index 0000000000000..6f6d5c5e985cb --- /dev/null +++ b/mysql-test/main/vector_ef_construction.result @@ -0,0 +1,57 @@ +select @@mhnsw_default_ef_construction; +@@mhnsw_default_ef_construction +10 +set @old_ef_construction = @@mhnsw_default_ef_construction; +create table t1 (id int auto_increment primary key, v vector(5) not null, vector index (v)); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `id` int(11) NOT NULL AUTO_INCREMENT, + `v` vector(5) NOT NULL, + PRIMARY KEY (`id`), + VECTOR KEY `v` (`v`) +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_uca1400_ai_ci +drop table t1; +set mhnsw_default_ef_construction = 100; +create table t2 (id int auto_increment primary key, v vector(5) not null, vector index (v)); +show create table t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `id` int(11) NOT NULL AUTO_INCREMENT, + `v` vector(5) NOT NULL, + PRIMARY KEY (`id`), + VECTOR KEY `v` (`v`) `ef_construction`=100 +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_uca1400_ai_ci +drop table t2; +set mhnsw_default_ef_construction = @old_ef_construction; +create table t3 (id int auto_increment primary key, v vector(5) not null, +vector index (v) ef_construction=200); +show create table t3; +Table Create Table +t3 CREATE TABLE `t3` ( + `id` int(11) NOT NULL AUTO_INCREMENT, + `v` vector(5) NOT NULL, + PRIMARY KEY (`id`), + VECTOR KEY `v` (`v`) `ef_construction`=200 +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_uca1400_ai_ci +drop table t3; +create table t4 (id int auto_increment primary key, v vector(5) not null, +vector index (v) m=10 ef_construction=150); +show create table t4; +Table Create Table +t4 CREATE TABLE `t4` ( + `id` int(11) NOT NULL AUTO_INCREMENT, + `v` vector(5) NOT NULL, + PRIMARY KEY (`id`), + VECTOR KEY `v` (`v`) `M`=10 `ef_construction`=150 +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_uca1400_ai_ci +drop table t4; +set mhnsw_default_ef_construction = 1; +select @@mhnsw_default_ef_construction; +@@mhnsw_default_ef_construction +1 +set mhnsw_default_ef_construction = 10000; +select @@mhnsw_default_ef_construction; +@@mhnsw_default_ef_construction +10000 +set mhnsw_default_ef_construction = @old_ef_construction; diff --git a/mysql-test/main/vector_ef_construction.test b/mysql-test/main/vector_ef_construction.test new file mode 100644 index 0000000000000..168f21cfb5fb6 --- /dev/null +++ b/mysql-test/main/vector_ef_construction.test @@ -0,0 +1,43 @@ +# Test for configurable ef_construction parameter in MHNSW vector index +# MDEV-38284: Add configurable ef_construction for vector indexes + +# Test that the system variable exists and has proper default +select @@mhnsw_default_ef_construction; +set @old_ef_construction = @@mhnsw_default_ef_construction; + +# Test creating table with default ef_construction +create table t1 (id int auto_increment primary key, v vector(5) not null, vector index (v)); +replace_result InnoDB MyISAM; +show create table t1; +drop table t1; + +# Test creating table with custom ef_construction via system variable +set mhnsw_default_ef_construction = 100; +create table t2 (id int auto_increment primary key, v vector(5) not null, vector index (v)); +replace_result InnoDB MyISAM; +show create table t2; +drop table t2; + +# Test creating table with custom ef_construction via index option +set mhnsw_default_ef_construction = @old_ef_construction; +create table t3 (id int auto_increment primary key, v vector(5) not null, + vector index (v) ef_construction=200); +replace_result InnoDB MyISAM; +show create table t3; +drop table t3; + +# Test combining M and ef_construction options +create table t4 (id int auto_increment primary key, v vector(5) not null, + vector index (v) m=10 ef_construction=150); +replace_result InnoDB MyISAM; +show create table t4; +drop table t4; + +# Test boundary values +set mhnsw_default_ef_construction = 1; +select @@mhnsw_default_ef_construction; +set mhnsw_default_ef_construction = 10000; +select @@mhnsw_default_ef_construction; + +# Restore default +set mhnsw_default_ef_construction = @old_ef_construction; diff --git a/sql/vector_mhnsw.cc b/sql/vector_mhnsw.cc index a957aaadf3f24..0d382ca10b866 100644 --- a/sql/vector_mhnsw.cc +++ b/sql/vector_mhnsw.cc @@ -29,7 +29,7 @@ static constexpr float NEAREST = -1.0f; // Algorithm parameters static constexpr float alpha = 1.1f; -static constexpr uint ef_construction= 10; +// ef_construction is now a per-index option (see ha_index_option_struct) static constexpr uint max_ef= 10000; static constexpr size_t subdist_part= 192; static constexpr float subdist_margin= 1.05f; @@ -110,10 +110,15 @@ static TYPELIB distances= CREATE_TYPELIB_FOR(distance_names); static MYSQL_THDVAR_ENUM(default_distance, PLUGIN_VAR_RQCMDARG, "Distance function to build the vector index for", nullptr, nullptr, EUCLIDEAN, &distances); +static MYSQL_THDVAR_UINT(default_ef_construction, PLUGIN_VAR_RQCMDARG, + "Larger values mean slower INSERTs but more accurate index graph. " + "Controls the number of candidates considered during index construction", + nullptr, nullptr, 10, 1, 10000, 1); struct ha_index_option_struct { ulonglong M; // option struct does not support uint + ulonglong ef_construction; metric_type metric; }; @@ -508,12 +513,14 @@ class MHNSW_Share : public Sql_alloc const uint tref_len; const uint gref_len; const uint M; + const uint ef_construction; metric_type metric; bool use_subdist; MHNSW_Share(TABLE *t) : tref_len(t->file->ref_length), gref_len(t->hlindex->file->ref_length), M(static_cast(t->s->key_info[t->s->keys].option_struct->M)), + ef_construction(static_cast(t->s->key_info[t->s->keys].option_struct->ef_construction)), metric(t->s->key_info[t->s->keys].option_struct->metric) { mysql_rwlock_init(PSI_INSTRUMENT_ME, &commit_lock); @@ -1300,7 +1307,7 @@ static int search_layer(MHNSW_param *p, const FVector *target, float threshold, { skip_deleted= false; if (ef > 1) - ef= std::max(ef_construction, ef); + ef= std::max(p->ctx->ef_construction, ef); } else { @@ -1761,6 +1768,7 @@ Item_func_vec_distance::distance_kind mhnsw_uses_distance(const TABLE *table, KE ha_create_table_option mhnsw_index_options[]= { HA_IOPTION_SYSVAR("m", M, default_m), + HA_IOPTION_SYSVAR("ef_construction", ef_construction, default_ef_construction), HA_IOPTION_SYSVAR("distance", metric, default_distance), HA_IOPTION_END }; @@ -1790,6 +1798,7 @@ static struct st_mysql_sys_var *mhnsw_sys_vars[]= { MYSQL_SYSVAR(max_cache_size), MYSQL_SYSVAR(default_m), + MYSQL_SYSVAR(default_ef_construction), MYSQL_SYSVAR(default_distance), MYSQL_SYSVAR(ef_search), NULL