mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 00:22:29 +00:00
Merge pull request #31042 from Avogar/hdfs-bad-url
Add additional hdfs url check
This commit is contained in:
commit
17085183bf
@ -2,12 +2,12 @@
|
||||
|
||||
#include <IO/SeekAvoidingReadBuffer.h>
|
||||
#include <Storages/HDFS/WriteBufferFromHDFS.h>
|
||||
#include <Storages/HDFS/HDFSCommon.h>
|
||||
|
||||
#include <Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h>
|
||||
#include <Disks/IO/ReadIndirectBufferFromRemoteFS.h>
|
||||
#include <Disks/IO/WriteIndirectBufferFromRemoteFS.h>
|
||||
#include <Disks/IO/ReadBufferFromRemoteFSGather.h>
|
||||
#include <Disks/IO/ThreadPoolRemoteFSReader.h>
|
||||
|
||||
#include <base/logger_useful.h>
|
||||
#include <base/FnTraits.h>
|
||||
@ -169,6 +169,7 @@ void registerDiskHDFS(DiskFactory & factory)
|
||||
fs::create_directories(disk);
|
||||
|
||||
String uri{config.getString(config_prefix + ".endpoint")};
|
||||
checkHDFSURL(uri);
|
||||
|
||||
if (uri.back() != '/')
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "HDFS path must ends with '/', but '{}' doesn't.", uri);
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <Storages/HDFS/HDFSCommon.h>
|
||||
#include <Poco/URI.h>
|
||||
#include <boost/algorithm/string/replace.hpp>
|
||||
#include <re2/re2.h>
|
||||
|
||||
#if USE_HDFS
|
||||
#include <Common/ShellCommand.h>
|
||||
@ -21,6 +22,7 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
const String HDFSBuilderWrapper::CONFIG_PREFIX = "hdfs";
|
||||
const String HDFS_URL_REGEXP = "^hdfs://[^:/]*:[0-9]*/.*";
|
||||
|
||||
void HDFSBuilderWrapper::loadFromConfig(const Poco::Util::AbstractConfiguration & config,
|
||||
const String & config_path, bool isUser)
|
||||
@ -197,6 +199,12 @@ HDFSFSPtr createHDFSFS(hdfsBuilder * builder)
|
||||
return fs;
|
||||
}
|
||||
|
||||
void checkHDFSURL(const String & url)
|
||||
{
|
||||
if (!re2::RE2::FullMatch(url, HDFS_URL_REGEXP))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad hdfs url: {}. It should have structure 'hdfs://<host_name>:<port>/<path>'", url);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -98,5 +98,9 @@ using HDFSFSPtr = std::unique_ptr<std::remove_pointer_t<hdfsFS>, detail::HDFSFsD
|
||||
HDFSBuilderWrapper createHDFSBuilder(const String & uri_str, const Poco::Util::AbstractConfiguration &);
|
||||
HDFSFSPtr createHDFSFS(hdfsBuilder * builder);
|
||||
|
||||
/// Check that url satisfy structure 'hdfs://<host_name>:<port>/<path>'
|
||||
/// and throw exception if it doesn't;
|
||||
void checkHDFSURL(const String & url);
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -67,6 +67,7 @@ StorageHDFS::StorageHDFS(
|
||||
, partition_by(partition_by_)
|
||||
{
|
||||
context_->getRemoteHostFilter().checkURL(Poco::URI(uri));
|
||||
checkHDFSURL(uri);
|
||||
|
||||
StorageInMemoryMetadata storage_metadata;
|
||||
storage_metadata.setColumns(columns_);
|
||||
|
@ -13,7 +13,7 @@
|
||||
</disk_memory>
|
||||
<disk_hdfs>
|
||||
<type>hdfs</type>
|
||||
<endpoint>http://hdfs1:9000/data/</endpoint>
|
||||
<endpoint>hdfs://hdfs1:9000/data/</endpoint>
|
||||
</disk_hdfs>
|
||||
<disk_encrypted>
|
||||
<type>encrypted</type>
|
||||
|
@ -100,7 +100,7 @@ def test_bad_hdfs_uri(started_cluster):
|
||||
"create table BadStorage1 (id UInt32, name String, weight Float64) ENGINE = HDFS('hads:hgsdfs100500:9000/other_storage', 'TSV')")
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
assert "Illegal HDFS URI" in str(ex)
|
||||
assert "Bad hdfs url" in str(ex)
|
||||
try:
|
||||
node1.query(
|
||||
"create table BadStorage2 (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs100500:9000/other_storage', 'TSV')")
|
||||
|
17
tests/queries/0_stateless/02114_hdfs_bad_url.reference
Normal file
17
tests/queries/0_stateless/02114_hdfs_bad_url.reference
Normal file
@ -0,0 +1,17 @@
|
||||
OK
|
||||
OK
|
||||
OK
|
||||
OK
|
||||
OK
|
||||
OK
|
||||
OK
|
||||
OK
|
||||
OK
|
||||
OK
|
||||
OK
|
||||
OK
|
||||
OK
|
||||
OK
|
||||
OK
|
||||
OK
|
||||
OK
|
26
tests/queries/0_stateless/02114_hdfs_bad_url.sh
Executable file
26
tests/queries/0_stateless/02114_hdfs_bad_url.sh
Executable file
@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-fasttest
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('abcd', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL';
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('abcd/', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL';
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('//abcd', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL';
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('//abcd/', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL';
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('//abcd/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL';
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('://abcd', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL';
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('://abcd/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL';
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('abcd:9000', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL';
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('abcd:9000/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL';
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('//abcd:9000/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL';
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('://abcd:9000/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL';
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('abcd/', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL';
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('hdfs://abcd', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL';
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('hdfs1:9000/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL';
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('hdfs://hdfs1/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL';
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('http://hdfs1:9000/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL';
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('hdfs://hdfs1/abcd:9000/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL';
|
||||
|
Loading…
Reference in New Issue
Block a user