diff --git a/test/src/unit-enumerations.cc b/test/src/unit-enumerations.cc index 47b0c2ac407..78384037f7e 100644 --- a/test/src/unit-enumerations.cc +++ b/test/src/unit-enumerations.cc @@ -76,7 +76,8 @@ struct EnumerationFx { const std::vector& values, bool ordered = false, Datatype type = static_cast(255), - std::string enmr_name = default_enmr_name); + std::string enmr_name = default_enmr_name, + bool async = false); shared_ptr create_empty_enumeration( Datatype type, @@ -147,6 +148,16 @@ struct EnumerationFx { EncryptionKey enc_key_; }; +template +static shared_ptr create_enumeration( + Context& ctx, + shared_ptr memory_tracker, + const std::vector& values, + bool ordered, + Datatype type, + std::string name, + bool async); + template QueryCondition create_qc( const char* field_name, T condition_value, const QueryConditionOp& op); @@ -163,6 +174,7 @@ QueryCondition create_qc( TEST_CASE_METHOD( EnumerationFx, "Create Empty Enumeration", "[enumeration][empty]") { Enumeration::create( + ctx_.resources(), default_enmr_name, Datatype::INT32, 1, @@ -171,6 +183,7 @@ TEST_CASE_METHOD( 0, nullptr, 0, + false, memory_tracker_); } @@ -179,6 +192,7 @@ TEST_CASE_METHOD( "Create Empty Var Sized Enumeration", "[enumeration][empty]") { Enumeration::create( + ctx_.resources(), default_enmr_name, Datatype::STRING_ASCII, constants::var_num, @@ -187,6 +201,7 @@ TEST_CASE_METHOD( 0, nullptr, 0, + false, memory_tracker_); } @@ -260,6 +275,7 @@ TEST_CASE_METHOD( "[enumeration][error][invalid-offsets-args]") { uint64_t offsets = 0; auto enmr = Enumeration::create( + ctx_.resources(), default_enmr_name, Datatype::STRING_ASCII, constants::var_num, @@ -268,6 +284,7 @@ TEST_CASE_METHOD( 0, &offsets, sizeof(uint64_t), + false, memory_tracker_); std::vector values = {""}; @@ -286,6 +303,7 @@ TEST_CASE_METHOD( "[enumeration][index-of][var-size]") { uint64_t offsets = 0; auto enmr = Enumeration::create( + ctx_.resources(), default_enmr_name, Datatype::STRING_ASCII, constants::var_num, @@ -294,6 +312,7 @@ TEST_CASE_METHOD( 0, &offsets, sizeof(uint64_t), + false, memory_tracker_); std::string value_str = ""; @@ -309,6 +328,7 @@ TEST_CASE_METHOD( "[enumeration][basic][buffer-address]") { uint64_t offsets = 0; auto enmr = Enumeration::create( + ctx_.resources(), default_enmr_name, Datatype::STRING_ASCII, constants::var_num, @@ -317,6 +337,7 @@ TEST_CASE_METHOD( 0, &offsets, sizeof(uint64_t), + false, memory_tracker_); REQUIRE(enmr->data().data() == nullptr); @@ -346,6 +367,7 @@ TEST_CASE_METHOD( "Basic Variable Size Empty Address Check", "[enumeration][basic][buffer-address]") { auto enmr = Enumeration::create( + ctx_.resources(), default_enmr_name, Datatype::STRING_ASCII, constants::var_num, @@ -354,6 +376,7 @@ TEST_CASE_METHOD( 0, nullptr, 0, + false, memory_tracker_); REQUIRE(enmr->data().data() == nullptr); @@ -414,6 +437,7 @@ TEST_CASE_METHOD( "[enumeration][basic][fixed][multi-cell-val-num]") { std::vector values = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; auto enmr = Enumeration::create( + ctx_.resources(), default_enmr_name, Datatype::INT32, 2, @@ -422,6 +446,7 @@ TEST_CASE_METHOD( values.size() * sizeof(int), nullptr, 0, + false, memory_tracker_); check_enumeration(enmr, default_enmr_name, values, Datatype::INT32, 2, false); } @@ -434,6 +459,7 @@ TEST_CASE_METHOD( "Invalid data buffer must not be nullptr for fixed sized data."); REQUIRE_THROWS_WITH( Enumeration::create( + ctx_.resources(), default_enmr_name, Datatype::INT32, 1, @@ -442,6 +468,7 @@ TEST_CASE_METHOD( 10, nullptr, 0, + false, memory_tracker_), matcher); } @@ -455,6 +482,7 @@ TEST_CASE_METHOD( "Invalid data size; must be non-zero for fixed size data."); REQUIRE_THROWS_WITH( Enumeration::create( + ctx_.resources(), default_enmr_name, Datatype::INT32, 1, @@ -463,6 +491,7 @@ TEST_CASE_METHOD( 0, nullptr, 0, + false, memory_tracker_), matcher); } @@ -476,6 +505,7 @@ TEST_CASE_METHOD( "Var sized enumeration values require a non-null offsets pointer."); REQUIRE_THROWS_WITH( Enumeration::create( + ctx_.resources(), default_enmr_name, Datatype::STRING_ASCII, constants::var_num, @@ -484,6 +514,7 @@ TEST_CASE_METHOD( strlen(val), nullptr, 8, + false, memory_tracker_), matcher); } @@ -498,6 +529,7 @@ TEST_CASE_METHOD( "Var sized enumeration values require a non-zero offsets size."); REQUIRE_THROWS_WITH( Enumeration::create( + ctx_.resources(), default_enmr_name, Datatype::STRING_ASCII, constants::var_num, @@ -506,6 +538,7 @@ TEST_CASE_METHOD( strlen(val), &offset, 0, + false, memory_tracker_), matcher); } @@ -520,6 +553,7 @@ TEST_CASE_METHOD( "is non-zero."); REQUIRE_THROWS_WITH( Enumeration::create( + ctx_.resources(), default_enmr_name, Datatype::STRING_ASCII, constants::var_num, @@ -528,6 +562,7 @@ TEST_CASE_METHOD( 5, &offsets, sizeof(uint64_t), + false, memory_tracker_), matcher); } @@ -542,6 +577,7 @@ TEST_CASE_METHOD( "require data."); REQUIRE_THROWS_WITH( Enumeration::create( + ctx_.resources(), default_enmr_name, Datatype::STRING_ASCII, constants::var_num, @@ -550,6 +586,7 @@ TEST_CASE_METHOD( 5, &offsets, sizeof(uint64_t), + false, memory_tracker_), matcher); } @@ -565,6 +602,7 @@ TEST_CASE_METHOD( "offset."); REQUIRE_THROWS_WITH( Enumeration::create( + ctx_.resources(), default_enmr_name, Datatype::STRING_ASCII, constants::var_num, @@ -573,6 +611,7 @@ TEST_CASE_METHOD( 2, &offsets, sizeof(uint64_t), + false, memory_tracker_), matcher); } @@ -583,6 +622,7 @@ TEST_CASE_METHOD( "[enumeration][error][invalid-name]") { std::vector values = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; REQUIRE_THROWS(Enumeration::create( + ctx_.resources(), std::string(), Datatype::INT32, 2, @@ -591,6 +631,7 @@ TEST_CASE_METHOD( values.size() * sizeof(int), nullptr, 0, + false, memory_tracker_)); } @@ -600,6 +641,7 @@ TEST_CASE_METHOD( "[enumeration][error][invalid-name]") { std::vector values = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; REQUIRE_THROWS(Enumeration::create( + ctx_.resources(), "", Datatype::INT32, 2, @@ -608,6 +650,7 @@ TEST_CASE_METHOD( values.size() * sizeof(int), nullptr, 0, + false, memory_tracker_)); } @@ -617,6 +660,7 @@ TEST_CASE_METHOD( "[enumeration][error][invalid-name]") { std::vector values = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; REQUIRE_THROWS(Enumeration::create( + ctx_.resources(), default_enmr_name, "an/bad/path", Datatype::INT32, @@ -626,6 +670,7 @@ TEST_CASE_METHOD( values.size() * sizeof(int), nullptr, 0, + false, memory_tracker_)); } @@ -635,6 +680,7 @@ TEST_CASE_METHOD( "[enumeration][error][invalid-cell-val-num]") { std::vector values = {1, 2, 3}; REQUIRE_THROWS(Enumeration::create( + ctx_.resources(), default_enmr_name, Datatype::INT32, 0, @@ -643,6 +689,7 @@ TEST_CASE_METHOD( values.size() * sizeof(int), nullptr, 0, + false, memory_tracker_)); } @@ -652,6 +699,7 @@ TEST_CASE_METHOD( "[enumeration][error][data-nullptr]") { std::vector values = {1, 2, 3}; REQUIRE_THROWS(Enumeration::create( + ctx_.resources(), default_enmr_name, Datatype::INT32, 1, @@ -660,6 +708,7 @@ TEST_CASE_METHOD( values.size() * sizeof(int), nullptr, 0, + false, memory_tracker_)); } @@ -669,6 +718,7 @@ TEST_CASE_METHOD( "[enumeration][error][data-zero-size]") { std::vector values = {1, 2, 3}; REQUIRE_THROWS(Enumeration::create( + ctx_.resources(), default_enmr_name, Datatype::INT32, 1, @@ -677,6 +727,7 @@ TEST_CASE_METHOD( 0, nullptr, 0, + false, memory_tracker_)); } @@ -687,6 +738,7 @@ TEST_CASE_METHOD( auto data = "foobarbazbam"; std::vector offsets = {0, 3, 6, 9}; REQUIRE_THROWS(Enumeration::create( + ctx_.resources(), default_enmr_name, Datatype::STRING_ASCII, constants::var_num, @@ -695,6 +747,7 @@ TEST_CASE_METHOD( strlen(data), nullptr, offsets.size() * sizeof(uint64_t), + false, memory_tracker_)); } @@ -705,6 +758,7 @@ TEST_CASE_METHOD( auto data = "foobarbazbam"; std::vector offsets = {0, 3, 6, 9}; REQUIRE_THROWS(Enumeration::create( + ctx_.resources(), default_enmr_name, Datatype::STRING_ASCII, constants::var_num, @@ -713,6 +767,7 @@ TEST_CASE_METHOD( strlen(data), offsets.data(), 0, + false, memory_tracker_)); } @@ -723,6 +778,7 @@ TEST_CASE_METHOD( std::vector values = {0, 1, 2, 3, 4}; std::vector offsets = {0, 3, 6, 9}; REQUIRE_THROWS(Enumeration::create( + ctx_.resources(), default_enmr_name, Datatype::INT32, 1, @@ -731,6 +787,7 @@ TEST_CASE_METHOD( values.size() * sizeof(int), offsets.data(), 0, + false, memory_tracker_)); } @@ -740,6 +797,7 @@ TEST_CASE_METHOD( "[enumeration][error][offsets-not-required]") { std::vector values = {0, 1, 2, 3, 4}; REQUIRE_THROWS(Enumeration::create( + ctx_.resources(), default_enmr_name, Datatype::INT32, 1, @@ -748,6 +806,7 @@ TEST_CASE_METHOD( values.size() * sizeof(int), nullptr, 100, + false, memory_tracker_)); } @@ -760,6 +819,7 @@ TEST_CASE_METHOD( // Passing 3 for the offsets size is incorrect because the offsets size has // to be a multiple of `sizeof(uint64_t)` REQUIRE_THROWS(Enumeration::create( + ctx_.resources(), default_enmr_name, Datatype::STRING_ASCII, constants::var_num, @@ -768,6 +828,7 @@ TEST_CASE_METHOD( strlen(data), offsets.data(), 3, + false, memory_tracker_)); } @@ -779,6 +840,7 @@ TEST_CASE_METHOD( std::vector offsets = {0, 3, 6, 100}; // The last offset is larger than data_size REQUIRE_THROWS(Enumeration::create( + ctx_.resources(), default_enmr_name, Datatype::STRING_ASCII, constants::var_num, @@ -787,6 +849,7 @@ TEST_CASE_METHOD( strlen(data), offsets.data(), offsets.size() * sizeof(uint64_t), + false, memory_tracker_)); } @@ -798,6 +861,7 @@ TEST_CASE_METHOD( // Passing 3 for the data size is invalid as its not a multiple of // sizeof(int) REQUIRE_THROWS(Enumeration::create( + ctx_.resources(), default_enmr_name, Datatype::INT32, 1, @@ -806,6 +870,7 @@ TEST_CASE_METHOD( 3, nullptr, 0, + false, memory_tracker_)); } @@ -833,6 +898,80 @@ TEST_CASE_METHOD( REQUIRE_THROWS(create_enumeration(values)); } +TEST_CASE_METHOD( + EnumerationFx, + "Enumeration Creation Error - Async", + "[enumeration][error][async]") { + std::vector values = {"foo", "", "foo", "bar"}; + + SECTION("No await") { + REQUIRE_NOTHROW(create_enumeration( + values, false, Datatype::STRING_ASCII, default_enmr_name, true)); + } + + SECTION("No await, access a field") { + auto enmr = create_enumeration( + values, false, Datatype::STRING_ASCII, default_enmr_name, true); + REQUIRE_NOTHROW(enmr->name()); + } + + auto matcher = Catch::Matchers::ContainsSubstring( + "Invalid duplicated value in enumeration"); + + SECTION("Await value_map") { + auto enmr = create_enumeration( + values, false, Datatype::STRING_ASCII, default_enmr_name, true); + REQUIRE_THROWS(enmr->value_map(), matcher); + } + + SECTION("Await index_of") { + auto enmr = create_enumeration( + values, false, Datatype::STRING_ASCII, default_enmr_name, true); + REQUIRE_THROWS(enmr->index_of("foo", 3), matcher); + } + + SECTION("Await index_of exception is thrown every time") { + auto enmr = create_enumeration( + values, false, Datatype::STRING_ASCII, default_enmr_name, true); + REQUIRE_THROWS(enmr->index_of("foo", 3), matcher); + REQUIRE_THROWS(enmr->index_of("foo", 3), matcher); + } +} + +TEST_CASE_METHOD( + EnumerationFx, + "Enumeration Creation - Async Safety", + "[enumeration][async]") { + shared_ptr enmr; + { + const std::vector values = {"foo", "bar", "baz", "gub"}; + + Config cfg; + Context ctx(cfg); + enmr = ::create_enumeration( + ctx, + memory_tracker_, + values, + false, + Datatype::STRING_ASCII, + default_enmr_name, + true); + + // `ctx` is destructed here. + // The future borrows its resources. + // If `ctx` does not await its thread pool tasks, then the + // `generate_value_map` future may refer to the destructed resources and + // SEGV. + // (fortunately `ctx` does await its thread pool tasks, so that shouldn't + // happen) + } + + CHECK(enmr->index_of("foo", 3) == 0); + CHECK(enmr->index_of("bar", 3) == 1); + CHECK(enmr->index_of("baz", 3) == 2); + CHECK(enmr->index_of("gub", 3) == 3); +} + TEST_CASE_METHOD( EnumerationFx, "Enumeration Extension Fixed Size", @@ -869,6 +1008,7 @@ TEST_CASE_METHOD( std::vector extend_values = {5, 6, 7, 8, 9, 10}; std::vector final_values = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; auto enmr1 = Enumeration::create( + ctx_.resources(), default_enmr_name, Datatype::INT32, 2, @@ -877,6 +1017,7 @@ TEST_CASE_METHOD( init_values.size() * sizeof(int), nullptr, 0, + false, memory_tracker_); auto enmr2 = extend_enumeration(enmr1, extend_values); check_enumeration( @@ -1074,7 +1215,8 @@ TEST_CASE_METHOD( memset(data, 1, 4); Deserializer deserializer(tile->data(), tile->size()); - REQUIRE_THROWS(Enumeration::deserialize(deserializer, memory_tracker_)); + REQUIRE_THROWS(Enumeration::deserialize( + ctx_.resources(), deserializer, memory_tracker_)); } TEST_CASE_METHOD( @@ -1519,6 +1661,7 @@ TEST_CASE_METHOD( std::vector data(1024 * 1024 * 10 + 1); std::vector offsets = {0}; auto enmr = Enumeration::create( + ctx_.resources(), "enmr_name", Datatype::STRING_ASCII, constants::var_num, @@ -1527,6 +1670,7 @@ TEST_CASE_METHOD( data.size(), offsets.data(), offsets.size() * constants::cell_var_offset_size, + false, memory_tracker_); schema->add_enumeration(enmr); @@ -1561,6 +1705,7 @@ TEST_CASE_METHOD( // Create more than 50MiB of enumeration data for (size_t i = 0; i < 10; i++) { auto enmr = Enumeration::create( + ctx_.resources(), "enmr_name_" + std::to_string(i), Datatype::STRING_ASCII, constants::var_num, @@ -1569,6 +1714,7 @@ TEST_CASE_METHOD( data.size(), offsets.data(), offsets.size() * constants::cell_var_offset_size, + false, memory_tracker_); schema->add_enumeration(enmr); } @@ -1690,6 +1836,7 @@ TEST_CASE_METHOD( // We have to force this condition by hand auto enmr3 = tiledb::sm::Enumeration::create( + ctx_.resources(), enmr2->name(), // Notice we're reusing the existing path name from enmr1 enmr1->path_name(), @@ -1700,6 +1847,7 @@ TEST_CASE_METHOD( enmr2->data().size(), enmr2->offsets().data(), enmr2->offsets().size(), + false, memory_tracker_); auto matcher = Catch::Matchers::ContainsSubstring( @@ -2366,6 +2514,7 @@ TEST_CASE_METHOD( auto schema1 = create_schema(); auto enmr1 = Enumeration::create( + ctx_.resources(), "empty_fixed", Datatype::INT32, 1, @@ -2374,8 +2523,10 @@ TEST_CASE_METHOD( 0, nullptr, 0, + false, memory_tracker_); auto enmr2 = Enumeration::create( + ctx_.resources(), "empty_var", Datatype::STRING_ASCII, constants::var_num, @@ -2384,6 +2535,7 @@ TEST_CASE_METHOD( 0, nullptr, 0, + false, memory_tracker_); schema1->add_enumeration(enmr1); @@ -2670,11 +2822,14 @@ EnumerationFx::~EnumerationFx() { } template -shared_ptr EnumerationFx::create_enumeration( +static shared_ptr create_enumeration( + Context& ctx, + shared_ptr memory_tracker, const std::vector& values, bool ordered, Datatype type, - std::string name) { + std::string name, + bool async) { TypeParams tp = TypeParams::get(values); if (type != static_cast(255)) { @@ -2689,6 +2844,7 @@ shared_ptr EnumerationFx::create_enumeration( raw_values[i] = values[i] ? 1 : 0; } return Enumeration::create( + ctx.resources(), name, tp.type_, tp.cell_val_num_, @@ -2697,9 +2853,11 @@ shared_ptr EnumerationFx::create_enumeration( raw_values.size() * sizeof(uint8_t), nullptr, 0, - memory_tracker_); + async, + memory_tracker); } else if constexpr (std::is_pod_v) { return Enumeration::create( + ctx.resources(), name, tp.type_, tp.cell_val_num_, @@ -2708,7 +2866,8 @@ shared_ptr EnumerationFx::create_enumeration( values.size() * sizeof(T), nullptr, 0, - memory_tracker_); + async, + memory_tracker); } else { uint64_t total_size = 0; for (auto v : values) { @@ -2727,6 +2886,7 @@ shared_ptr EnumerationFx::create_enumeration( } return Enumeration::create( + ctx.resources(), name, tp.type_, tp.cell_val_num_, @@ -2735,13 +2895,26 @@ shared_ptr EnumerationFx::create_enumeration( total_size, offsets.data(), offsets.size() * sizeof(uint64_t), - memory_tracker_); + async, + memory_tracker); } } +template +shared_ptr EnumerationFx::create_enumeration( + const std::vector& values, + bool ordered, + Datatype type, + std::string name, + bool async) { + return ::create_enumeration( + ctx_, memory_tracker_, values, ordered, type, name, async); +} + shared_ptr EnumerationFx::create_empty_enumeration( Datatype type, uint32_t cell_val_num, bool ordered, std::string name) { return Enumeration::create( + ctx_.resources(), name, type, cell_val_num, @@ -2750,6 +2923,7 @@ shared_ptr EnumerationFx::create_empty_enumeration( 0, nullptr, 0, + false, memory_tracker_); } @@ -2824,7 +2998,8 @@ void EnumerationFx::check_storage_deserialization( auto tile = serialize_to_tile(enmr); Deserializer deserializer(tile->data(), tile->size()); - auto deserialized = Enumeration::deserialize(deserializer, memory_tracker_); + auto deserialized = + Enumeration::deserialize(ctx_.resources(), deserializer, memory_tracker_); REQUIRE(deserialized->name() == enmr->name()); REQUIRE(deserialized->path_name().empty() == false); @@ -3011,7 +3186,8 @@ shared_ptr EnumerationFx::ser_des_array_schema( MemoryType::SERIALIZATION_BUFFER)}; throw_if_not_ok(serialization::array_schema_serialize( *(schema.get()), stype, buf, client_side)); - return serialization::array_schema_deserialize(stype, buf, memory_tracker_); + return serialization::array_schema_deserialize( + ctx_.resources(), stype, buf, memory_tracker_); } shared_ptr EnumerationFx::ser_des_array_schema_evolution( @@ -3023,7 +3199,7 @@ shared_ptr EnumerationFx::ser_des_array_schema_evolution( ArraySchemaEvolution* ret; throw_if_not_ok(serialization::array_schema_evolution_deserialize( - &ret, cfg_, stype, buf, memory_tracker_)); + &ret, ctx_.resources(), cfg_, stype, buf, memory_tracker_)); return shared_ptr(ret); } diff --git a/test/src/unit-request-handlers.cc b/test/src/unit-request-handlers.cc index 12bc6468aca..916d9290789 100644 --- a/test/src/unit-request-handlers.cc +++ b/test/src/unit-request-handlers.cc @@ -465,6 +465,7 @@ HandleLoadArraySchemaRequestFx::create_string_enumeration( } return Enumeration::create( + ctx_.resources(), name, Datatype::STRING_ASCII, constants::var_num, @@ -473,6 +474,7 @@ HandleLoadArraySchemaRequestFx::create_string_enumeration( total_size, offsets.data(), offsets.size() * sizeof(uint64_t), + true, tiledb::test::create_test_memory_tracker()); } @@ -546,7 +548,7 @@ HandleLoadArraySchemaRequestFx::call_handler( REQUIRE(rval == TILEDB_OK); return serialization::deserialize_load_array_schema_response( - uri_, cfg_, stype, resp_buf->buffer(), memory_tracker_); + ctx_.resources(), uri_, cfg_, stype, resp_buf->buffer(), memory_tracker_); } shared_ptr HandleQueryPlanRequestFx::create_schema() { diff --git a/tiledb/api/c_api/enumeration/enumeration_api.cc b/tiledb/api/c_api/enumeration/enumeration_api.cc index 90b0b5d9aa8..746cd063545 100644 --- a/tiledb/api/c_api/enumeration/enumeration_api.cc +++ b/tiledb/api/c_api/enumeration/enumeration_api.cc @@ -71,6 +71,7 @@ capi_return_t tiledb_enumeration_alloc( memory_tracker->set_type(tiledb::sm::MemoryTrackerType::ENUMERATION_CREATE); *enumeration = tiledb_enumeration_handle_t::make_handle( + ctx->context().resources(), std::string(name), datatype, cell_val_num, @@ -79,7 +80,11 @@ capi_return_t tiledb_enumeration_alloc( data_size, offsets, offsets_size, + false, memory_tracker); + + // wait for the value map to finish in case it throws + (*enumeration)->enumeration()->value_map(); } catch (...) { *enumeration = nullptr; throw; diff --git a/tiledb/api/c_api/enumeration/enumeration_api_internal.h b/tiledb/api/c_api/enumeration/enumeration_api_internal.h index 108dcd2d68f..883d76186b6 100644 --- a/tiledb/api/c_api/enumeration/enumeration_api_internal.h +++ b/tiledb/api/c_api/enumeration/enumeration_api_internal.h @@ -34,6 +34,7 @@ #define TILEDB_CAPI_ENUMERATION_INTERNAL_H #include "enumeration_api_experimental.h" +#include "tiledb/api/c_api/context/context_api_internal.h" #include "tiledb/api/c_api_support/handle/handle.h" #include "tiledb/common/common.h" #include "tiledb/sm/array_schema/enumeration.h" diff --git a/tiledb/api/c_api/enumeration/test/compile_capi_enumeration_stub_main.cc b/tiledb/api/c_api/enumeration/test/compile_capi_enumeration_stub_main.cc index d06771a9343..d3afacaed61 100644 --- a/tiledb/api/c_api/enumeration/test/compile_capi_enumeration_stub_main.cc +++ b/tiledb/api/c_api/enumeration/test/compile_capi_enumeration_stub_main.cc @@ -28,10 +28,14 @@ #include "../enumeration_api_internal.h" #include "tiledb/sm/enums/datatype.h" +#include "tiledb/sm/storage_manager/context.h" int main() { + tiledb::sm::Config cfg; + tiledb::sm::Context ctx(cfg); try { tiledb_enumeration_handle_t e{ + ctx.resources(), "fooo", tiledb::sm::Datatype::INT32, 1, @@ -40,6 +44,7 @@ int main() { 0, nullptr, 0, + false, nullptr}; } catch (...) { } diff --git a/tiledb/sm/array/array.cc b/tiledb/sm/array/array.cc index d2be21b1010..bc16de0e99b 100644 --- a/tiledb/sm/array/array.cc +++ b/tiledb/sm/array/array.cc @@ -336,7 +336,7 @@ Status Array::open_without_fragments( throw instead of return Status */ if (!use_refactored_array_open()) { auto&& [st, array_schema_latest] = - rest_client->get_array_schema_from_rest(array_uri_); + rest_client->get_array_schema_from_rest(resources_, array_uri_); if (!st.ok()) { throw StatusException(st); } @@ -499,7 +499,7 @@ Status Array::open( } if (!use_refactored_array_open()) { auto&& [st, array_schema_latest] = - rest_client->get_array_schema_from_rest(array_uri_); + rest_client->get_array_schema_from_rest(resources_, array_uri_); throw_if_not_ok(st); set_array_schema_latest(array_schema_latest.value()); if (serialize_enumerations()) { @@ -850,6 +850,7 @@ Array::get_enumerations_all_schemas() { // Pass an empty list of enumeration names. REST will use timestamps to // load all enumerations on all schemas for the array within that range. ret = rest_client->post_enumerations_from_rest( + resources_, array_uri_, array_dir_timestamp_start_, array_dir_timestamp_end_, @@ -949,6 +950,7 @@ std::vector> Array::get_enumerations( } loaded = rest_client->post_enumerations_from_rest( + resources_, array_uri_, array_dir_timestamp_start_, array_dir_timestamp_end_, @@ -2137,6 +2139,7 @@ void load_enumeration_into_schema( } auto ret = rest_client->post_enumerations_from_rest( + ctx.resources(), array_schema.array_uri(), array_schema.timestamp_start(), array_schema.timestamp_end(), diff --git a/tiledb/sm/array/array.h b/tiledb/sm/array/array.h index 71e9361cb55..fc91d8b9ff5 100644 --- a/tiledb/sm/array/array.h +++ b/tiledb/sm/array/array.h @@ -746,6 +746,10 @@ class Array { return config_; } + inline const ContextResources& resources() const { + return resources_; + } + /** Directly set the array URI for serialized compatibility with pre * TileDB 2.5 clients */ void set_uri_serialized(const std::string& uri) { diff --git a/tiledb/sm/array/array_directory.cc b/tiledb/sm/array/array_directory.cc index c0b0a913765..1abbdb63d70 100644 --- a/tiledb/sm/array/array_directory.cc +++ b/tiledb/sm/array/array_directory.cc @@ -1344,7 +1344,7 @@ shared_ptr ArrayDirectory::load_enumeration( } Deserializer deserializer(tile->data(), tile->size()); - return Enumeration::deserialize(deserializer, memory_tracker); + return Enumeration::deserialize(resources_, deserializer, memory_tracker); } } // namespace tiledb::sm diff --git a/tiledb/sm/array_schema/CMakeLists.txt b/tiledb/sm/array_schema/CMakeLists.txt index 9a398c660c3..acf18c91709 100644 --- a/tiledb/sm/array_schema/CMakeLists.txt +++ b/tiledb/sm/array_schema/CMakeLists.txt @@ -62,7 +62,10 @@ conclude(object_library) # commence(object_library enumeration) this_target_sources(enumeration.cc) - this_target_object_libraries(buffer constants seedable_global_PRNG) + this_target_object_libraries(buffer constants context_resources seedable_global_PRNG) + if(TILEDB_STATS) + this_target_compile_definitions(-DTILEDB_STATS) + endif() conclude(object_library) # diff --git a/tiledb/sm/array_schema/array_schema_operations.cc b/tiledb/sm/array_schema/array_schema_operations.cc index 4d40deaeaac..8db79e0a2cf 100644 --- a/tiledb/sm/array_schema/array_schema_operations.cc +++ b/tiledb/sm/array_schema/array_schema_operations.cc @@ -245,7 +245,7 @@ shared_ptr load_array_schema( if (uri.is_tiledb()) { auto& rest_client = ctx.rest_client(); auto&& [st, array_schema_response] = - rest_client.get_array_schema_from_rest(uri); + rest_client.get_array_schema_from_rest(ctx.resources(), uri); throw_if_not_ok(st); auto array_schema = std::move(array_schema_response).value(); @@ -254,6 +254,7 @@ shared_ptr load_array_schema( // Pass an empty list of enumeration names. REST will use timestamps to // load all enumerations on all schemas for the array within that range. auto ret = rest_client.post_enumerations_from_rest( + ctx.resources(), uri, array_schema->timestamp_start(), array_schema->timestamp_end(), diff --git a/tiledb/sm/array_schema/enumeration.cc b/tiledb/sm/array_schema/enumeration.cc index 202172297f8..2e604003d15 100644 --- a/tiledb/sm/array_schema/enumeration.cc +++ b/tiledb/sm/array_schema/enumeration.cc @@ -33,6 +33,7 @@ #include #include +#include "tiledb/common/logger.h" #include "tiledb/common/memory_tracker.h" #include "tiledb/common/random/random_label.h" @@ -49,6 +50,7 @@ class EnumerationException : public StatusException { }; Enumeration::Enumeration( + const ContextResources& resources, const std::string& name, const std::string& path_name, Datatype type, @@ -58,8 +60,10 @@ Enumeration::Enumeration( uint64_t data_size, const void* offsets, uint64_t offsets_size, + bool async, shared_ptr memory_tracker) - : memory_tracker_(memory_tracker) + : resources_(resources) + , memory_tracker_(memory_tracker) , name_(name) , path_name_(path_name) , type_(type) @@ -94,6 +98,7 @@ Enumeration::Enumeration( if (data_empty && offsets_empty) { // This is an empty enumeration so we're done checking for argument // validity. + value_map_status_.emplace(Status::Ok()); return; } @@ -191,11 +196,36 @@ Enumeration::Enumeration( throw_if_not_ok(offsets_.write(offsets, 0, offsets_size)); } - generate_value_map(); + if (async) { + tiledb::common::ThreadPool::Task value_map_future = + resources.compute_tp().async( + [](std::reference_wrapper enumeration) { + enumeration.get().generate_value_map(); + return Status::Ok(); + }, + std::reference_wrapper(*this)); + value_map_future_ = std::move(value_map_future); + } else { + generate_value_map(); + value_map_status_.emplace(Status::Ok()); + } +} + +Enumeration::~Enumeration() { + if (value_map_future_.valid()) { + auto st = value_map_future_.wait(); + if (!st.ok()) { + tiledb::common::global_logger().warn( + "Enumeration::~Enumeration(): Error computing value map: " + + st.to_string()); + } + } } shared_ptr Enumeration::deserialize( - Deserializer& deserializer, shared_ptr memory_tracker) { + const ContextResources& resources, + Deserializer& deserializer, + shared_ptr memory_tracker) { auto disk_version = deserializer.read(); if (disk_version > constants::enumerations_version) { throw EnumerationException( @@ -233,6 +263,7 @@ shared_ptr Enumeration::deserialize( } return create( + resources, name, path_name, static_cast(type), @@ -242,6 +273,7 @@ shared_ptr Enumeration::deserialize( data_size, offsets, offsets_size, + true, memory_tracker); } @@ -326,6 +358,7 @@ shared_ptr Enumeration::extend( } return create( + resources_, name_, "", type_, @@ -335,6 +368,7 @@ shared_ptr Enumeration::extend( new_data.size(), new_offsets_ptr, new_offsets_size, + false, memory_tracker_); } @@ -413,15 +447,36 @@ void Enumeration::serialize(Serializer& serializer) const { uint64_t Enumeration::index_of(const void* data, uint64_t size) const { std::string_view value_view(static_cast(data), size); - auto iter = value_map_.find(value_view); - if (iter == value_map_.end()) { + const auto& values = value_map(); + auto iter = values.find(value_view); + if (iter == values.end()) { return constants::enumeration_missing_value; } return iter->second; } +/** + * Add a value to a value map. Checks for duplicates. + * + * @param sv A string view of the data to add. + * @param index The index of the data in the Enumeration. + */ +static void add_value_to_map( + Enumeration::EnumerationValueMap& value_map, + std::string_view& sv, + uint64_t index) { + const auto res = value_map.emplace(sv, index); + if (!res.second) { + throw EnumerationException( + "Invalid duplicated value in enumeration '" + std::string(sv) + "'"); + } +} + void Enumeration::generate_value_map() { + auto timer = + resources_.stats().start_timer("Enumeration::generate_value_map"); + auto char_data = data_.data_as(); if (var_size()) { auto offsets = offsets_.data_as(); @@ -438,7 +493,7 @@ void Enumeration::generate_value_map() { } auto sv = std::string_view(char_data + offsets[i], length); - add_value_to_map(sv, i); + add_value_to_map(value_map_, sv, i); } } else { uint64_t i = 0; @@ -448,20 +503,12 @@ void Enumeration::generate_value_map() { while (i * stride < data_.size()) { auto sv = std::string_view(char_data + i * stride, stride); - add_value_to_map(sv, i); + add_value_to_map(value_map_, sv, i); i += 1; } } } -void Enumeration::add_value_to_map(std::string_view& sv, uint64_t index) { - const auto res = value_map_.emplace(sv, index); - if (!res.second) { - throw EnumerationException( - "Invalid duplicated value in enumeration '" + std::string(sv) + "'"); - } -} - } // namespace tiledb::sm std::ostream& operator<<( diff --git a/tiledb/sm/array_schema/enumeration.h b/tiledb/sm/array_schema/enumeration.h index 3e63d3c3ff6..e7e456df1ae 100644 --- a/tiledb/sm/array_schema/enumeration.h +++ b/tiledb/sm/array_schema/enumeration.h @@ -37,9 +37,11 @@ #include "tiledb/common/common.h" #include "tiledb/common/pmr.h" +#include "tiledb/common/thread_pool/thread_pool.h" #include "tiledb/common/types/untyped_datum.h" #include "tiledb/sm/buffer/buffer.h" #include "tiledb/sm/enums/datatype.h" +#include "tiledb/sm/storage_manager/context.h" #include "tiledb/storage_format/serialization/serializers.h" namespace tiledb::sm { @@ -49,6 +51,9 @@ class MemoryTracker; /** Defines an array enumeration */ class Enumeration { public: + using EnumerationValueMap = + tdb::pmr::unordered_map; + /* ********************************* */ /* CONSTRUCTORS & DESTRUCTORS */ /* ********************************* */ @@ -60,7 +65,7 @@ class Enumeration { DISABLE_MOVE(Enumeration); /** Destructor. */ - ~Enumeration() = default; + ~Enumeration(); /* ********************************* */ /* OPERATORS */ @@ -75,6 +80,7 @@ class Enumeration { /** Create a new Enumeration * + * @param resources Resources for computing the enumeration value map. * @param name The name of this Enumeration as referenced by attributes. * @param type The datatype of the enumeration values. * @param cell_val_num The cell_val_num of the enumeration. @@ -87,10 +93,12 @@ class Enumeration { * offsets buffer. Must be null if cell_var_num is not var_num. * @param offsets_size The size of the buffer pointed to by offsets. Must be * zero of cell_var_num is not var_num. + * @param async whether to lazily or eagerly compute `value_map_` * @param memory_tracker The memory tracker associated with this Enumeration. * @return shared_ptr The created enumeration. */ static shared_ptr create( + const ContextResources& resources, const std::string& name, Datatype type, uint32_t cell_val_num, @@ -99,8 +107,10 @@ class Enumeration { uint64_t data_size, const void* offsets, uint64_t offsets_size, + bool async, shared_ptr memory_tracker) { return create( + resources, name, "", type, @@ -110,11 +120,13 @@ class Enumeration { data_size, offsets, offsets_size, + async, memory_tracker); } /** Create a new Enumeration * + * @param resources Resources for computing the enumeration value map. * @param name The name of this Enumeration as referenced by attributes. * @param path_name The last URI path component of the Enumeration. * @param type The datatype of the enumeration values. @@ -128,10 +140,12 @@ class Enumeration { * offsets buffer. Must be null if cell_var_num is not var_num. * @param offsets_size The size of the buffer pointed to by offsets. Must be * zero of cell_var_num is not var_num. + * @param async Whether to lazily or eagerly compute `value_map` * @param memory_tracker The memory tracker associated with this Enumeration. * @return shared_ptr The created enumeration. */ static shared_ptr create( + const ContextResources& resources, const std::string& name, const std::string& path_name, Datatype type, @@ -141,9 +155,11 @@ class Enumeration { uint64_t data_size, const void* offsets, uint64_t offsets_size, + bool async, shared_ptr memory_tracker) { struct EnableMakeShared : public Enumeration { EnableMakeShared( + const ContextResources& resources, const std::string& name, const std::string& path_name, Datatype type, @@ -153,8 +169,10 @@ class Enumeration { uint64_t data_size, const void* offsets, uint64_t offsets_size, + bool async, shared_ptr memory_tracker) : Enumeration( + resources, name, path_name, type, @@ -164,11 +182,13 @@ class Enumeration { data_size, offsets, offsets_size, + async, memory_tracker) { } }; return make_shared( HERE(), + resources, name, path_name, type, @@ -178,6 +198,7 @@ class Enumeration { data_size, offsets, offsets_size, + async, memory_tracker); } @@ -189,7 +210,9 @@ class Enumeration { * @return A new Enumeration. */ static shared_ptr deserialize( - Deserializer& deserializer, shared_ptr memory_tracker); + const ContextResources& resources, + Deserializer& deserializer, + shared_ptr memory_tracker); /** * Create a new enumeration by extending an existing enumeration's @@ -268,7 +291,18 @@ class Enumeration { * value map of the enumeration. */ const tdb::pmr::unordered_map& value_map() const { - return value_map_; + if (value_map_future_.valid()) { +#ifdef TILEDB_STATS + stats::DurationInstrument timer = + resources_.stats().start_timer("Enumeration::await_value_map"); +#endif + value_map_status_.emplace(value_map_future_.wait()); + } + if (value_map_status_.value().ok()) { + return value_map_; + } else { + throw StatusException(value_map_status_.value()); + } } /** @@ -361,6 +395,7 @@ class Enumeration { /** Constructor * + * @param resources Resources for building the enumeration value map. * @param name The name of this Enumeration as referenced by attributes. * @param path_name The last URI path component of the Enumeration. * @param type The datatype of the enumeration values. @@ -374,9 +409,11 @@ class Enumeration { * offsets buffer. Must be null if cell_var_num is not var_num. * @param offsets_size The size of the buffer pointed to by offsets. Must be * zero of cell_var_num is not var_num. + * @param async Whether to lazily or eagerly compute `value_map_` * @param memory_tracker The memory tracker. */ Enumeration( + const ContextResources& resources, const std::string& name, const std::string& path_name, Datatype type, @@ -386,12 +423,19 @@ class Enumeration { uint64_t data_size, const void* offsets, uint64_t offsets_size, + bool async, shared_ptr memory_tracker); /* ********************************* */ /* PRIVATE ATTRIBUTES */ /* ********************************* */ + /** + * Resources of the enclosing context. + * Used for timers. + */ + const ContextResources& resources_; + /** * The memory tracker of the Enumeration. */ @@ -418,8 +462,14 @@ class Enumeration { /** The offsets of each enumeration value if var sized. */ Buffer offsets_; - /** Map of values to indices */ - tdb::pmr::unordered_map value_map_; + /** Map of values to indices, computed lazily */ + EnumerationValueMap value_map_; + + /** Exception thrown while computing value map */ + mutable std::optional value_map_status_; + + /** Handle for waiting on the value map construction */ + mutable tiledb::common::ThreadPool::Task value_map_future_; /* ********************************* */ /* PRIVATE METHODS */ @@ -427,14 +477,6 @@ class Enumeration { /** Populate the value_map_ */ void generate_value_map(); - - /** - * Add a value to value_map_ - * - * @param sv A string view of the data to add. - * @param index The index of the data in the Enumeration. - */ - void add_value_to_map(std::string_view& sv, uint64_t index); }; } // namespace tiledb::sm diff --git a/tiledb/sm/array_schema/test/compile_enumeration_main.cc b/tiledb/sm/array_schema/test/compile_enumeration_main.cc index e8ce6f2ae9c..ff8e810b702 100644 --- a/tiledb/sm/array_schema/test/compile_enumeration_main.cc +++ b/tiledb/sm/array_schema/test/compile_enumeration_main.cc @@ -26,12 +26,18 @@ * THE SOFTWARE. */ +#include "tiledb/common/logger.h" #include "tiledb/sm/array_schema/enumeration.h" #include "tiledb/sm/enums/datatype.h" +#include "tiledb/sm/storage_manager/context_resources.h" int main(int, char*[]) { + tiledb::sm::Config config; + auto logger = make_shared(HERE(), "foo"); + tiledb::sm::ContextResources resources(config, logger, 1, 1, ""); try { tiledb::sm::Enumeration::create( + resources, "foo", tiledb::sm::Datatype::INT32, 1, @@ -40,6 +46,7 @@ int main(int, char*[]) { 0, nullptr, 0, + false, nullptr); } catch (...) { } diff --git a/tiledb/sm/c_api/tiledb.cc b/tiledb/sm/c_api/tiledb.cc index 27fb0a18c48..a2420a92342 100644 --- a/tiledb/sm/c_api/tiledb.cc +++ b/tiledb/sm/c_api/tiledb.cc @@ -1454,6 +1454,7 @@ int32_t tiledb_deserialize_array_schema( auto memory_tracker = ctx->resources().create_memory_tracker(); memory_tracker->set_type(sm::MemoryTrackerType::ARRAY_LOAD); auto shared_schema = tiledb::sm::serialization::array_schema_deserialize( + ctx->context().resources(), (tiledb::sm::SerializationType)serialize_type, buffer->buffer(), memory_tracker); @@ -1578,6 +1579,7 @@ int32_t tiledb_deserialize_array_schema_evolution( ctx, tiledb::sm::serialization::array_schema_evolution_deserialize( &((*array_schema_evolution)->array_schema_evolution_), + ctx->resources(), ctx->config(), (tiledb::sm::SerializationType)serialize_type, buffer->buffer(), @@ -2136,6 +2138,7 @@ int32_t tiledb_deserialize_fragment_info( if (SAVE_ERROR_CATCH( ctx, tiledb::sm::serialization::fragment_info_deserialize( + ctx->context().resources(), fragment_info->fragment_info().get(), (tiledb::sm::SerializationType)serialize_type, uri, diff --git a/tiledb/sm/fragment/fragment_info.cc b/tiledb/sm/fragment/fragment_info.cc index 287a5b90bc0..3b48b1eb572 100644 --- a/tiledb/sm/fragment/fragment_info.cc +++ b/tiledb/sm/fragment/fragment_info.cc @@ -818,7 +818,8 @@ Status FragmentInfo::load() { // as it's the case for local fragment info load requests. throw_if_not_ok(config_.set("sm.fragment_info.preload_mbrs", "true")); - return rest_client->post_fragment_info_from_rest(array_uri_, this); + return rest_client->post_fragment_info_from_rest( + *resources_, array_uri_, this); } // Create an ArrayDirectory object and load diff --git a/tiledb/sm/rest/rest_client.h b/tiledb/sm/rest/rest_client.h index 63cd3c3648f..197a320a0c5 100644 --- a/tiledb/sm/rest/rest_client.h +++ b/tiledb/sm/rest/rest_client.h @@ -331,7 +331,7 @@ class RestClient { /// Operation disabled in base class. inline virtual tuple>> - get_array_schema_from_rest(const URI&) { + get_array_schema_from_rest(const ContextResources&, const URI&) { throw RestClientDisabledException(); } @@ -340,7 +340,12 @@ class RestClient { shared_ptr, std::unordered_map>> post_array_schema_from_rest( - const Config&, const URI&, uint64_t, uint64_t, bool) { + const ContextResources&, + const Config&, + const URI&, + uint64_t, + uint64_t, + bool) { throw RestClientDisabledException(); } @@ -399,6 +404,7 @@ class RestClient { inline virtual std:: unordered_map>> post_enumerations_from_rest( + const ContextResources&, const URI&, uint64_t, uint64_t, @@ -443,7 +449,7 @@ class RestClient { /// Operation disabled in base class. inline virtual Status post_fragment_info_from_rest( - const URI&, FragmentInfo*) { + const ContextResources&, const URI&, FragmentInfo*) { throw RestClientDisabledException(); } diff --git a/tiledb/sm/rest/rest_client_remote.cc b/tiledb/sm/rest/rest_client_remote.cc index 1ce36fba250..6c0cbd8e54d 100644 --- a/tiledb/sm/rest/rest_client_remote.cc +++ b/tiledb/sm/rest/rest_client_remote.cc @@ -193,7 +193,8 @@ RestClientRemote::check_group_exists_from_rest(const URI& uri) { } tuple>> -RestClientRemote::get_array_schema_from_rest(const URI& uri) { +RestClientRemote::get_array_schema_from_rest( + const ContextResources& resources, const URI& uri) { // Init curl and form the URL Curl curlc(logger_); std::string array_ns, array_uri; @@ -218,7 +219,7 @@ RestClientRemote::get_array_schema_from_rest(const URI& uri) { nullopt}; auto array_schema = serialization::array_schema_deserialize( - serialization_type_, returned_data, memory_tracker_); + resources, serialization_type_, returned_data, memory_tracker_); array_schema->set_array_uri(uri); @@ -229,6 +230,7 @@ std::tuple< shared_ptr, std::unordered_map>> RestClientRemote::post_array_schema_from_rest( + const ContextResources& resources, const Config& config, const URI& uri, uint64_t timestamp_start, @@ -268,7 +270,12 @@ RestClientRemote::post_array_schema_from_rest( } return serialization::deserialize_load_array_schema_response( - uri, config, serialization_type_, returned_data, memory_tracker_); + resources, + uri, + config, + serialization_type_, + returned_data, + memory_tracker_); } Status RestClientRemote::post_array_schema_to_rest( @@ -558,6 +565,7 @@ Status RestClientRemote::post_array_metadata_to_rest( std::unordered_map>> RestClientRemote::post_enumerations_from_rest( + const ContextResources& resources, const URI& uri, uint64_t timestamp_start, uint64_t timestamp_end, @@ -601,7 +609,12 @@ RestClientRemote::post_enumerations_from_rest( } return serialization::deserialize_load_enumerations_response( - array_schema, config, serialization_type_, returned_data, memory_tracker); + resources, + array_schema, + config, + serialization_type_, + returned_data, + memory_tracker); } void RestClientRemote::post_query_plan_from_rest( @@ -1214,7 +1227,9 @@ Status RestClientRemote::post_array_schema_evolution_to_rest( } Status RestClientRemote::post_fragment_info_from_rest( - const URI& uri, FragmentInfo* fragment_info) { + const ContextResources& resources, + const URI& uri, + FragmentInfo* fragment_info) { if (fragment_info == nullptr) return LOG_STATUS(Status_RestError( "Error getting fragment info from REST; fragment info is null.")); @@ -1248,7 +1263,12 @@ Status RestClientRemote::post_fragment_info_from_rest( "Error getting fragment info from REST; server returned no data.")); return serialization::fragment_info_deserialize( - fragment_info, serialization_type_, uri, returned_data, memory_tracker_); + resources, + fragment_info, + serialization_type_, + uri, + returned_data, + memory_tracker_); } Status RestClientRemote::post_group_metadata_from_rest( diff --git a/tiledb/sm/rest/rest_client_remote.h b/tiledb/sm/rest/rest_client_remote.h index b2efa963bac..488f9945324 100644 --- a/tiledb/sm/rest/rest_client_remote.h +++ b/tiledb/sm/rest/rest_client_remote.h @@ -167,17 +167,19 @@ class RestClientRemote : public RestClient { /** * Get a data encoded array schema from rest server. * + * @param resources resources for building data structures * @param uri of array being loaded * @return Status and new ArraySchema shared pointer. */ tuple>> get_array_schema_from_rest( - const URI& uri) override; + const ContextResources& resources, const URI& uri) override; /** * Get an array schema from the rest server. This will eventually replace the * get_array_schema_from_rest after TileDB-Cloud-REST merges support for the * POST endpoint. * + * @param resources Resources for building data structures * @param config The TileDB config. * @param uri The Array URI to load the schema from. * @param timestamp_start The starting timestamp used to open the array. @@ -189,6 +191,7 @@ class RestClientRemote : public RestClient { shared_ptr, std::unordered_map>> post_array_schema_from_rest( + const ContextResources& resources, const Config& config, const URI& uri, uint64_t timestamp_start, @@ -312,6 +315,7 @@ class RestClientRemote : public RestClient { */ std::unordered_map>> post_enumerations_from_rest( + const ContextResources& ctx, const URI& uri, uint64_t timestamp_start, uint64_t timestamp_end, @@ -380,12 +384,15 @@ class RestClientRemote : public RestClient { /** * Get array's fragment info from rest server * + * @param resources Resources for building data structures * @param uri Array uri to query for * @param fragment_info Fragment info object to store the incoming info * @return Status Ok() on success Error() on failures */ Status post_fragment_info_from_rest( - const URI& uri, FragmentInfo* fragment_info) override; + const ContextResources& resources, + const URI& uri, + FragmentInfo* fragment_info) override; /** * Gets the group's metadata from the REST server (and updates the in-memory diff --git a/tiledb/sm/serialization/array.cc b/tiledb/sm/serialization/array.cc index 9a4ef23182e..cd0da781240 100644 --- a/tiledb/sm/serialization/array.cc +++ b/tiledb/sm/serialization/array.cc @@ -275,7 +275,10 @@ void array_from_capnp( auto entries = array_reader.getArraySchemasAll().getEntries(); for (auto array_schema_build : entries) { auto schema = array_schema_from_capnp( - array_schema_build.getValue(), array->array_uri(), memory_tracker); + resources, + array_schema_build.getValue(), + array->array_uri(), + memory_tracker); schema->set_array_uri(array->array_uri()); all_schemas[array_schema_build.getKey()] = schema; } @@ -286,7 +289,10 @@ void array_from_capnp( if (array_reader.hasArraySchemaLatest()) { auto array_schema_latest_reader = array_reader.getArraySchemaLatest(); auto array_schema_latest{array_schema_from_capnp( - array_schema_latest_reader, array->array_uri(), memory_tracker)}; + resources, + array_schema_latest_reader, + array->array_uri(), + memory_tracker)}; array_schema_latest->set_array_uri(array->array_uri()); array->set_array_schema_latest(array_schema_latest); } diff --git a/tiledb/sm/serialization/array_schema.cc b/tiledb/sm/serialization/array_schema.cc index 59edb97ff3d..6bce5d2c68e 100644 --- a/tiledb/sm/serialization/array_schema.cc +++ b/tiledb/sm/serialization/array_schema.cc @@ -800,6 +800,7 @@ void dimension_label_to_capnp( } shared_ptr dimension_label_from_capnp( + const ContextResources& resources, const capnp::DimensionLabel::Reader& dim_label_reader, shared_ptr memory_tracker) { // Get datatype @@ -808,7 +809,8 @@ shared_ptr dimension_label_from_capnp( shared_ptr schema{nullptr}; if (dim_label_reader.hasSchema()) { auto schema_reader = dim_label_reader.getSchema(); - schema = array_schema_from_capnp(schema_reader, URI(), memory_tracker); + schema = array_schema_from_capnp( + resources, schema_reader, URI(), memory_tracker); } auto is_relative = dim_label_reader.getRelative(); @@ -938,6 +940,7 @@ Status array_schema_to_capnp( // #TODO Add security validation on incoming URI shared_ptr array_schema_from_capnp( + const ContextResources& resources, const capnp::ArraySchema::Reader& schema_reader, const URI& uri, shared_ptr memory_tracker) { @@ -1093,8 +1096,8 @@ shared_ptr array_schema_from_capnp( dimension_labels.reserve(dim_labels_reader.size()); try { for (auto dim_label_reader : dim_labels_reader) { - dimension_labels.emplace_back( - dimension_label_from_capnp(dim_label_reader, memory_tracker)); + dimension_labels.emplace_back(dimension_label_from_capnp( + resources, dim_label_reader, memory_tracker)); } } catch (const std::exception& e) { std::throw_with_nested(std::runtime_error( @@ -1111,7 +1114,7 @@ shared_ptr array_schema_from_capnp( try { for (auto&& enmr_reader : enmr_readers) { enumerations.emplace_back( - enumeration_from_capnp(enmr_reader, memory_tracker)); + enumeration_from_capnp(resources, enmr_reader, memory_tracker)); } } catch (const std::exception& e) { std::throw_with_nested(std::runtime_error( @@ -1229,6 +1232,7 @@ Status array_schema_serialize( } shared_ptr array_schema_deserialize( + const ContextResources& resources, SerializationType serialize_type, span serialized_buffer, shared_ptr memory_tracker) { @@ -1243,7 +1247,7 @@ shared_ptr array_schema_deserialize( utils::decode_json_message(serialized_buffer, array_schema_builder); array_schema_reader = array_schema_builder.asReader(); return array_schema_from_capnp( - array_schema_reader, URI(), memory_tracker); + resources, array_schema_reader, URI(), memory_tracker); } case SerializationType::CAPNP: { const auto mBytes = @@ -1253,7 +1257,7 @@ shared_ptr array_schema_deserialize( serialized_buffer.size() / sizeof(::capnp::word))); array_schema_reader = reader.getRoot(); return array_schema_from_capnp( - array_schema_reader, URI(), memory_tracker); + resources, array_schema_reader, URI(), memory_tracker); } default: { throw StatusException(Status_SerializationError( @@ -1782,11 +1786,13 @@ std::tuple< shared_ptr, std::unordered_map>> load_array_schema_response_from_capnp( + const ContextResources& resources, const URI& uri, capnp::LoadArraySchemaResponse::Reader& reader, shared_ptr memory_tracker) { auto schema_reader = reader.getSchema(); - auto schema = array_schema_from_capnp(schema_reader, URI(), memory_tracker); + auto schema = + array_schema_from_capnp(resources, schema_reader, URI(), memory_tracker); schema->set_array_uri(uri); std::unordered_map> all_schemas; @@ -1797,7 +1803,10 @@ load_array_schema_response_from_capnp( auto entries = all_schemas_reader.getEntries(); for (auto array_schema_build : entries) { auto schema_entry = array_schema_from_capnp( - array_schema_build.getValue(), schema->array_uri(), memory_tracker); + resources, + array_schema_build.getValue(), + schema->array_uri(), + memory_tracker); schema_entry->set_array_uri(schema->array_uri()); all_schemas[array_schema_build.getKey()] = schema_entry; } @@ -1810,6 +1819,7 @@ std::tuple< shared_ptr, std::unordered_map>> deserialize_load_array_schema_response( + const ContextResources& resources, const URI& uri, const Config& config, SerializationType serialization_type, @@ -1824,7 +1834,7 @@ deserialize_load_array_schema_response( utils::decode_json_message(data, builder); auto reader = builder.asReader(); return load_array_schema_response_from_capnp( - uri, reader, memory_tracker); + resources, uri, reader, memory_tracker); } case SerializationType::CAPNP: { // Set traversal limit from config @@ -1842,7 +1852,7 @@ deserialize_load_array_schema_response( readerOptions); auto reader = array_reader.getRoot(); return load_array_schema_response_from_capnp( - uri, reader, memory_tracker); + resources, uri, reader, memory_tracker); } default: { throw ArraySchemaSerializationException( @@ -1870,7 +1880,10 @@ Status array_schema_serialize( } shared_ptr array_schema_deserialize( - SerializationType, span, shared_ptr) { + const ContextResources& resources, + SerializationType, + span, + shared_ptr) { throw StatusException(Status_SerializationError( "Cannot serialize; serialization not enabled.")); } @@ -1921,6 +1934,7 @@ std::tuple< shared_ptr, std::unordered_map>> deserialize_load_array_schema_response( + const ContextResources& resources, const URI&, const Config&, SerializationType, diff --git a/tiledb/sm/serialization/array_schema.h b/tiledb/sm/serialization/array_schema.h index aac214c57a9..a88dc2b38b3 100644 --- a/tiledb/sm/serialization/array_schema.h +++ b/tiledb/sm/serialization/array_schema.h @@ -49,6 +49,7 @@ namespace sm { class Array; class ArraySchema; +class ContextResources; class Dimension; class MemoryTracker; class SerializationBuffer; @@ -123,6 +124,7 @@ Status array_schema_to_capnp( * @return a new ArraySchema */ shared_ptr array_schema_from_capnp( + const ContextResources& resources, const capnp::ArraySchema::Reader& schema_reader, const URI& uri, shared_ptr memory_tracker); @@ -142,11 +144,13 @@ void dimension_label_to_capnp( /** * Deserialize a dimension label from a cap'n proto object * + * @param resources Resources for building data structures * @param reader Cap'n proto reader object. * @param memory_tracker The memory tracker to use. * @return A new DimensionLabel. */ shared_ptr dimension_label_from_capnp( + const ContextResources& resources, const capnp::DimensionLabel::Reader& reader, shared_ptr memory_tracker); @@ -168,6 +172,7 @@ Status array_schema_serialize( const bool client_side); shared_ptr array_schema_deserialize( + const ContextResources& resources, SerializationType serialize_type, span serialized_buffer, shared_ptr memory_tracker); @@ -214,6 +219,7 @@ std::tuple< shared_ptr, std::unordered_map>> deserialize_load_array_schema_response( + const ContextResources& resources, const URI& uri, const Config& config, SerializationType serialization_type, diff --git a/tiledb/sm/serialization/array_schema_evolution.cc b/tiledb/sm/serialization/array_schema_evolution.cc index 78b891a5f57..88974cbc7cc 100644 --- a/tiledb/sm/serialization/array_schema_evolution.cc +++ b/tiledb/sm/serialization/array_schema_evolution.cc @@ -165,6 +165,7 @@ Status array_schema_evolution_to_capnp( } tdb_unique_ptr array_schema_evolution_from_capnp( + const ContextResources& resources, const capnp::ArraySchemaEvolution::Reader& evolution_reader, shared_ptr memory_tracker) { // Create attributes to add @@ -187,7 +188,7 @@ tdb_unique_ptr array_schema_evolution_from_capnp( std::unordered_map> enmrs_to_add; auto enmrs_to_add_reader = evolution_reader.getEnumerationsToAdd(); for (auto enmr_reader : enmrs_to_add_reader) { - auto enmr = enumeration_from_capnp(enmr_reader, memory_tracker); + auto enmr = enumeration_from_capnp(resources, enmr_reader, memory_tracker); enmrs_to_add[enmr->name()] = enmr; } @@ -196,7 +197,7 @@ tdb_unique_ptr array_schema_evolution_from_capnp( enmrs_to_extend; auto enmrs_to_extend_reader = evolution_reader.getEnumerationsToExtend(); for (auto enmr_reader : enmrs_to_extend_reader) { - auto enmr = enumeration_from_capnp(enmr_reader, memory_tracker); + auto enmr = enumeration_from_capnp(resources, enmr_reader, memory_tracker); enmrs_to_extend[enmr->name()] = enmr; } @@ -289,6 +290,7 @@ Status array_schema_evolution_serialize( Status array_schema_evolution_deserialize( ArraySchemaEvolution** array_schema_evolution, + const ContextResources& resources, const Config& config, SerializationType serialize_type, span serialized_buffer, @@ -307,7 +309,7 @@ Status array_schema_evolution_deserialize( capnp::ArraySchemaEvolution::Reader array_schema_evolution_reader = array_schema_evolution_builder.asReader(); decoded_array_schema_evolution = array_schema_evolution_from_capnp( - array_schema_evolution_reader, memory_tracker); + resources, array_schema_evolution_reader, memory_tracker); break; } case SerializationType::CAPNP: { @@ -328,7 +330,7 @@ Status array_schema_evolution_deserialize( capnp::ArraySchemaEvolution::Reader array_schema_evolution_reader = reader.getRoot(); decoded_array_schema_evolution = array_schema_evolution_from_capnp( - array_schema_evolution_reader, memory_tracker); + resources, array_schema_evolution_reader, memory_tracker); break; } default: { @@ -371,6 +373,7 @@ Status array_schema_evolution_serialize( Status array_schema_evolution_deserialize( ArraySchemaEvolution**, + const ContextResources&, const Config&, SerializationType, span, diff --git a/tiledb/sm/serialization/array_schema_evolution.h b/tiledb/sm/serialization/array_schema_evolution.h index ae4bd86109a..045947fe100 100644 --- a/tiledb/sm/serialization/array_schema_evolution.h +++ b/tiledb/sm/serialization/array_schema_evolution.h @@ -70,6 +70,7 @@ Status array_schema_evolution_serialize( /** * Deserialize an array schema evolution via Cap'n Proto * @param array_schema_evolution pointer to store evolution object in + * @param resources resources for constructing * @param config associated config object * @param serialize_type format to serialize into Cap'n Proto or JSON * @param serialized_buffer buffer where serialized bytes are stored @@ -78,6 +79,7 @@ Status array_schema_evolution_serialize( */ Status array_schema_evolution_deserialize( ArraySchemaEvolution** array_schema_evolution, + const ContextResources& resources, const Config& config, SerializationType serialize_type, span serialized_buffer, diff --git a/tiledb/sm/serialization/enumeration.cc b/tiledb/sm/serialization/enumeration.cc index 69c3bd38939..b054164f949 100644 --- a/tiledb/sm/serialization/enumeration.cc +++ b/tiledb/sm/serialization/enumeration.cc @@ -88,6 +88,7 @@ void enumeration_to_capnp( } shared_ptr enumeration_from_capnp( + const ContextResources& resources, const capnp::Enumeration::Reader& reader, shared_ptr memory_tracker) { auto name = reader.getName(); @@ -113,6 +114,7 @@ shared_ptr enumeration_from_capnp( } return Enumeration::create( + resources, name, path_name, datatype, @@ -122,6 +124,7 @@ shared_ptr enumeration_from_capnp( data_size, offsets, offsets_size, + true, memory_tracker); } @@ -190,6 +193,7 @@ void load_enumerations_response_to_capnp( std::unordered_map>> load_enumerations_response_from_capnp( + const ContextResources& resources, const capnp::LoadEnumerationsResponse::Reader& reader, const ArraySchema& array_schema, shared_ptr memory_tracker) { @@ -200,7 +204,7 @@ load_enumerations_response_from_capnp( auto enmr_readers = reader.getEnumerations(); for (auto enmr_reader : enmr_readers) { loaded_enmrs.push_back( - enumeration_from_capnp(enmr_reader, memory_tracker)); + enumeration_from_capnp(resources, enmr_reader, memory_tracker)); } // The name of the latest array schema will not be serialized in the // response if we are only loading enumerations from the latest schema. @@ -211,7 +215,7 @@ load_enumerations_response_from_capnp( std::vector> loaded_enmrs; for (auto enmr_reader : enmr_entry_reader.getValue()) { loaded_enmrs.push_back( - enumeration_from_capnp(enmr_reader, memory_tracker)); + enumeration_from_capnp(resources, enmr_reader, memory_tracker)); } ret[enmr_entry_reader.getKey()] = loaded_enmrs; @@ -344,6 +348,7 @@ void serialize_load_enumerations_response( std::unordered_map>> deserialize_load_enumerations_response( + const ContextResources& resources, const ArraySchema& array_schema, const Config& config, SerializationType serialize_type, @@ -358,7 +363,7 @@ deserialize_load_enumerations_response( utils::decode_json_message(response, builder); capnp::LoadEnumerationsResponse::Reader reader = builder.asReader(); return load_enumerations_response_from_capnp( - reader, array_schema, memory_tracker); + resources, reader, array_schema, memory_tracker); } case SerializationType::CAPNP: { // Set traversal limit from config @@ -377,7 +382,7 @@ deserialize_load_enumerations_response( capnp::LoadEnumerationsResponse::Reader reader = array_reader.getRoot(); return load_enumerations_response_from_capnp( - reader, array_schema, memory_tracker); + resources, reader, array_schema, memory_tracker); } default: { throw EnumerationSerializationException( @@ -421,6 +426,7 @@ void serialize_load_enumerations_response( std::unordered_map>> deserialize_load_enumerations_response( + const Context&, const Array&, const Config&, SerializationType, diff --git a/tiledb/sm/serialization/enumeration.h b/tiledb/sm/serialization/enumeration.h index a015b97b41c..aa30fdf74a6 100644 --- a/tiledb/sm/serialization/enumeration.h +++ b/tiledb/sm/serialization/enumeration.h @@ -63,12 +63,14 @@ void enumeration_to_capnp( /** * Deserialize an enumeration from a cap'n proto object * + * @param resources Resources for building the enumeration value map * @param reader Cap'n proto reader object * @param memory_tracker The memory tracker associated with the Enumeration * object. * @return A new Enumeration */ shared_ptr enumeration_from_capnp( + const ContextResources& resources, const capnp::Enumeration::Reader& reader, shared_ptr memory_tracker); @@ -92,6 +94,7 @@ void serialize_load_enumerations_response( std::unordered_map>> deserialize_load_enumerations_response( + const ContextResources& ctx, const ArraySchema& array_schema, const Config& config, SerializationType serialization_type, diff --git a/tiledb/sm/serialization/fragment_info.cc b/tiledb/sm/serialization/fragment_info.cc index a38a6517ff3..76499ab3430 100644 --- a/tiledb/sm/serialization/fragment_info.cc +++ b/tiledb/sm/serialization/fragment_info.cc @@ -266,6 +266,7 @@ Status single_fragment_info_to_capnp( } Status fragment_info_from_capnp( + const ContextResources& resources, const capnp::FragmentInfo::Reader& fragment_info_reader, const URI& array_uri, FragmentInfo* fragment_info, @@ -275,7 +276,7 @@ Status fragment_info_from_capnp( auto array_schema_latest_reader = fragment_info_reader.getArraySchemaLatest(); auto array_schema_latest{array_schema_from_capnp( - array_schema_latest_reader, array_uri, memory_tracker)}; + resources, array_schema_latest_reader, array_uri, memory_tracker)}; array_schema_latest->set_array_uri(array_uri); fragment_info->array_schema_latest() = array_schema_latest; } @@ -288,7 +289,10 @@ Status fragment_info_from_capnp( auto entries = fragment_info_reader.getArraySchemasAll().getEntries(); for (auto array_schema_build : entries) { auto schema{array_schema_from_capnp( - array_schema_build.getValue(), array_uri, memory_tracker)}; + resources, + array_schema_build.getValue(), + array_uri, + memory_tracker)}; schema->set_array_uri(array_uri); auto key = std::string_view{ array_schema_build.getKey().cStr(), @@ -430,6 +434,7 @@ Status fragment_info_serialize( } Status fragment_info_deserialize( + const ContextResources& resources, FragmentInfo* fragment_info, SerializationType serialize_type, const URI& uri, @@ -450,7 +455,7 @@ Status fragment_info_deserialize( // Deserialize RETURN_NOT_OK(fragment_info_from_capnp( - reader, uri, fragment_info, memory_tracker)); + resources, reader, uri, fragment_info, memory_tracker)); break; } case SerializationType::CAPNP: { @@ -473,7 +478,7 @@ Status fragment_info_deserialize( // Deserialize RETURN_NOT_OK(fragment_info_from_capnp( - reader, uri, fragment_info, memory_tracker)); + resources, reader, uri, fragment_info, memory_tracker)); break; } default: { diff --git a/tiledb/sm/serialization/fragment_info.h b/tiledb/sm/serialization/fragment_info.h index 5e4c27c8bec..097f1a184ea 100644 --- a/tiledb/sm/serialization/fragment_info.h +++ b/tiledb/sm/serialization/fragment_info.h @@ -56,6 +56,7 @@ namespace serialization { /** * Convert Cap'n Proto message to Fragment Info. * + * @param resources Resources for building structures * @param fragment_info_reader cap'n proto class. * @param uri array uri that the fragment belongs to * @param fragment_info fragment info object to deserialize into. @@ -63,6 +64,7 @@ namespace serialization { * @return Status */ Status fragment_info_from_capnp( + const ContextResources& resources, const capnp::FragmentInfo::Reader& fragment_info_reader, const URI& uri, FragmentInfo* fragment_info, @@ -122,6 +124,7 @@ Status fragment_info_serialize( /** * Deserialize a Cap'n Proto message into a fragment info object. * + * @param resources Resources for building structures * @param fragment_info fragment info object to deserialize into. * @param serialize_type format the data is serialized in: Cap'n Proto of JSON. * @param uri array uri that the fragment belongs to @@ -130,6 +133,7 @@ Status fragment_info_serialize( * @return Status */ Status fragment_info_deserialize( + const ContextResources& resources, FragmentInfo* fragment_info, SerializationType serialize_type, const URI& uri,