chore: incorporate review feedback

This commit is contained in:
Robert Schulze 2022-08-22 18:02:07 +00:00
parent 3aec76d281
commit 35a37c91f8
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
4 changed files with 39 additions and 11 deletions

View File

@ -341,7 +341,8 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
}
else
{
LOG_WARNING(log, "Unknown library method: '{}'", method);
processError(response, "Unknown library method '" + method + "'");
LOG_ERROR(log, "Unknown library method: '{}'", method);
}
}
catch (...)
@ -517,7 +518,7 @@ void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & requ
const String & data = params.get("data");
ReadBufferFromString string_read_buf(data);
NativeReader deserializer(string_read_buf, 0);
NativeReader deserializer(string_read_buf, /*server_revision*/ 0);
Block block_read = deserializer.read();
Columns col_ptrs = block_read.getColumns();
@ -543,14 +544,15 @@ void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & requ
WriteBufferFromOwnString string_write_buf;
Block block_write(res_cols_with_type_and_name);
NativeWriter native_writer{string_write_buf, 0, block_write};
NativeWriter native_writer{string_write_buf, /*client_revision*/ 0, block_write};
native_writer.write(block_write);
writeStringBinary(string_write_buf.str(), out);
}
else
{
LOG_WARNING(log, "Unknown library method: '{}'", method);
processError(response, "Unknown library method '" + method + "'");
LOG_ERROR(log, "Unknown library method: '{}'", method);
}
}
catch (...)

View File

@ -20,8 +20,8 @@ namespace ErrorCodes
CatBoostLibraryBridgeHelper::CatBoostLibraryBridgeHelper(
ContextPtr context_,
const String & library_path_,
const String & model_path_)
std::string_view library_path_,
std::string_view model_path_)
: LibraryBridgeHelper(context_->getGlobalContext())
, library_path(library_path_)
, model_path(model_path_)
@ -96,7 +96,7 @@ ColumnPtr CatBoostLibraryBridgeHelper::evaluate(const ColumnsWithTypeAndName & c
WriteBufferFromOwnString string_write_buf;
Block block(columns);
NativeWriter native_writer(string_write_buf, 0, block);
NativeWriter native_writer(string_write_buf, /*client_revision*/ 0, block);
native_writer.write(block);
ReadWriteBufferFromHTTP buf(
@ -105,14 +105,14 @@ ColumnPtr CatBoostLibraryBridgeHelper::evaluate(const ColumnsWithTypeAndName & c
[this, serialized = string_write_buf.str()](std::ostream & os)
{
os << "model_path=" << escapeForFileName(model_path) << "&";
os << "data=" << serialized;
os << "data=" << escapeForFileName(serialized);
},
http_timeouts, credentials);
String res;
readStringBinary(res, buf);
ReadBufferFromString string_read_buf(res);
NativeReader native_reader(string_read_buf, 0);
NativeReader native_reader(string_read_buf, /*server_revision*/ 0);
Block block_read = native_reader.read();
return block_read.getColumns()[0];

View File

@ -16,7 +16,7 @@ public:
static constexpr inline auto PING_HANDLER = "/catboost_ping";
static constexpr inline auto MAIN_HANDLER = "/catboost_request";
CatBoostLibraryBridgeHelper(ContextPtr context_, const String & library_path_, const String & model_path_);
CatBoostLibraryBridgeHelper(ContextPtr context_, std::string_view library_path_, std::string_view model_path_);
size_t getTreeCount();

View File

@ -231,7 +231,7 @@ def testRecoveryAfterCrash(ch_cluster):
# amazon_model.bin has 0 float features and 9 categorical features
def testAmazonModel(ch_cluster):
def testAmazonModelSingleRow(ch_cluster):
if instance.is_built_with_memory_sanitizer():
pytest.skip("Memory Sanitizer cannot work with third-party shared libraries")
@ -242,6 +242,32 @@ def testAmazonModel(ch_cluster):
assert result == expected
def testAmazonModelManyRows(ch_cluster):
if instance.is_built_with_memory_sanitizer():
pytest.skip("Memory Sanitizer cannot work with third-party shared libraries")
result = instance.query("drop table if exists amazon")
result = instance.query(
"create table amazon ( DATE Date materialized today(), ACTION UInt8, RESOURCE UInt32, MGR_ID UInt32, ROLE_ROLLUP_1 UInt32, ROLE_ROLLUP_2 UInt32, ROLE_DEPTNAME UInt32, ROLE_TITLE UInt32, ROLE_FAMILY_DESC UInt32, ROLE_FAMILY UInt32, ROLE_CODE UInt32) engine = MergeTree order by DATE"
)
result = instance.query(
"insert into amazon select number % 256, number, number, number, number, number, number, number, number, number from numbers(7500)"
)
# First compute prediction, then as a very crude way to fingerprint and compare the result: sum and floor
# (the focus is to test that the exchange of large result sets between the server and the bridge works)
result = instance.query(
"SELECT floor(sum(catboostEvaluate('/etc/clickhouse-server/model/amazon_model.bin', RESOURCE, MGR_ID, ROLE_ROLLUP_1, ROLE_ROLLUP_2, ROLE_DEPTNAME, ROLE_TITLE, ROLE_FAMILY_DESC, ROLE_FAMILY, ROLE_CODE))) FROM amazon"
)
expected = "5834\n"
assert result == expected
result = instance.query("drop table if exists amazon")
def testModelUpdate(ch_cluster):
if instance.is_built_with_memory_sanitizer():
pytest.skip("Memory Sanitizer cannot work with third-party shared libraries")