mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Merge pull request #47970 from ClickHouse/rs/fix-catboost
Fix "Field value too long" in catboostEvaluate()
This commit is contained in:
commit
b18c051943
@ -4,21 +4,22 @@
|
||||
#include <Poco/Net/NetException.h>
|
||||
#include <Poco/Util/HelpFormatter.h>
|
||||
|
||||
#include <base/range.h>
|
||||
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/SensitiveDataMasker.h>
|
||||
#include "config.h"
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <base/errnoToString.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <Formats/registerFormats.h>
|
||||
#include <Server/HTTP/HTTPServer.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Server/HTTP/HTTPServer.h>
|
||||
#include <base/errnoToString.h>
|
||||
#include <base/range.h>
|
||||
|
||||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_ODBC
|
||||
# include <Poco/Data/ODBC/Connector.h>
|
||||
#endif
|
||||
@ -89,7 +90,7 @@ void IBridge::defineOptions(Poco::Util::OptionSet & options)
|
||||
Poco::Util::Option("listen-host", "", "hostname or address to listen, default 127.0.0.1").argument("listen-host").binding("listen-host"));
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("http-timeout", "", "http timeout for socket, default 1800").argument("http-timeout").binding("http-timeout"));
|
||||
Poco::Util::Option("http-timeout", "", "http timeout for socket, default 180").argument("http-timeout").binding("http-timeout"));
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("max-server-connections", "", "max connections to server, default 1024").argument("max-server-connections").binding("max-server-connections"));
|
||||
@ -97,6 +98,9 @@ void IBridge::defineOptions(Poco::Util::OptionSet & options)
|
||||
options.addOption(
|
||||
Poco::Util::Option("keep-alive-timeout", "", "keepalive timeout, default 10").argument("keep-alive-timeout").binding("keep-alive-timeout"));
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("http-max-field-value-size", "", "max http field value size, default 1048576").argument("http-max-field-value-size").binding("http-max-field-value-size"));
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("log-level", "", "sets log level, default info") .argument("log-level").binding("logger.level"));
|
||||
|
||||
@ -165,6 +169,7 @@ void IBridge::initialize(Application & self)
|
||||
http_timeout = config().getUInt64("http-timeout", DEFAULT_HTTP_READ_BUFFER_TIMEOUT);
|
||||
max_server_connections = config().getUInt("max-server-connections", 1024);
|
||||
keep_alive_timeout = config().getUInt64("keep-alive-timeout", 10);
|
||||
http_max_field_value_size = config().getUInt64("http-max-field-value-size", 1048576);
|
||||
|
||||
struct rlimit limit;
|
||||
const UInt64 gb = 1024 * 1024 * 1024;
|
||||
@ -226,6 +231,10 @@ int IBridge::main(const std::vector<std::string> & /*args*/)
|
||||
auto context = Context::createGlobal(shared_context.get());
|
||||
context->makeGlobalContext();
|
||||
|
||||
auto settings = context->getSettings();
|
||||
settings.set("http_max_field_value_size", http_max_field_value_size);
|
||||
context->setSettings(settings);
|
||||
|
||||
if (config().has("query_masking_rules"))
|
||||
SensitiveDataMasker::setInstance(std::make_unique<SensitiveDataMasker>(config(), "query_masking_rules"));
|
||||
|
||||
|
@ -45,6 +45,7 @@ private:
|
||||
std::string log_level;
|
||||
unsigned max_server_connections;
|
||||
size_t http_timeout;
|
||||
size_t http_max_field_value_size;
|
||||
|
||||
Poco::Logger * log;
|
||||
};
|
||||
|
@ -67,6 +67,8 @@ std::unique_ptr<ShellCommand> IBridgeHelper::startBridgeCommand()
|
||||
cmd_args.push_back(config.getString(configPrefix() + ".listen_host", DEFAULT_HOST));
|
||||
cmd_args.push_back("--http-timeout");
|
||||
cmd_args.push_back(std::to_string(getHTTPTimeout().totalMicroseconds()));
|
||||
cmd_args.push_back("--http-max-field-value-size");
|
||||
cmd_args.push_back("99999999999999999"); // something "big" to accept large datasets (issue 47616)
|
||||
if (config.has("logger." + configPrefix() + "_log"))
|
||||
{
|
||||
cmd_args.push_back("--log-path");
|
||||
|
@ -279,7 +279,7 @@ def testAmazonModelManyRows(ch_cluster):
|
||||
)
|
||||
|
||||
result = instance.query(
|
||||
"insert into amazon select number % 256, number, number, number, number, number, number, number, number, number from numbers(7500)"
|
||||
"insert into amazon select number % 256, number, number, number, number, number, number, number, number, number from numbers(750000)"
|
||||
)
|
||||
|
||||
# First compute prediction, then as a very crude way to fingerprint and compare the result: sum and floor
|
||||
@ -288,7 +288,7 @@ def testAmazonModelManyRows(ch_cluster):
|
||||
"SELECT floor(sum(catboostEvaluate('/etc/clickhouse-server/model/amazon_model.bin', RESOURCE, MGR_ID, ROLE_ROLLUP_1, ROLE_ROLLUP_2, ROLE_DEPTNAME, ROLE_TITLE, ROLE_FAMILY_DESC, ROLE_FAMILY, ROLE_CODE))) FROM amazon"
|
||||
)
|
||||
|
||||
expected = "5834\n"
|
||||
expected = "583092\n"
|
||||
assert result == expected
|
||||
|
||||
result = instance.query("drop table if exists amazon")
|
||||
|
Loading…
Reference in New Issue
Block a user