From 3839d521a0effd4b839ca28ce1b57b259c951b5c Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 3 Nov 2021 14:01:50 +0300 Subject: [PATCH] Add additional hdfs url check --- src/Disks/HDFS/DiskHDFS.cpp | 3 ++- src/Storages/HDFS/HDFSCommon.cpp | 8 ++++++ src/Storages/HDFS/HDFSCommon.h | 4 +++ src/Storages/HDFS/StorageHDFS.cpp | 2 +- .../0_stateless/02114_hdfs_bad_url.reference | 17 ++++++++++++ .../queries/0_stateless/02114_hdfs_bad_url.sh | 26 +++++++++++++++++++ 6 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02114_hdfs_bad_url.reference create mode 100755 tests/queries/0_stateless/02114_hdfs_bad_url.sh diff --git a/src/Disks/HDFS/DiskHDFS.cpp b/src/Disks/HDFS/DiskHDFS.cpp index 9071ce1d139..bddb5ebefc6 100644 --- a/src/Disks/HDFS/DiskHDFS.cpp +++ b/src/Disks/HDFS/DiskHDFS.cpp @@ -2,12 +2,12 @@ #include #include +#include #include #include #include #include -#include #include #include @@ -169,6 +169,7 @@ void registerDiskHDFS(DiskFactory & factory) fs::create_directories(disk); String uri{config.getString(config_prefix + ".endpoint")}; + checkHDFSURL(uri); if (uri.back() != '/') throw Exception(ErrorCodes::BAD_ARGUMENTS, "HDFS path must ends with '/', but '{}' doesn't.", uri); diff --git a/src/Storages/HDFS/HDFSCommon.cpp b/src/Storages/HDFS/HDFSCommon.cpp index 25c454e7500..68d5f60a2aa 100644 --- a/src/Storages/HDFS/HDFSCommon.cpp +++ b/src/Storages/HDFS/HDFSCommon.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #if USE_HDFS #include @@ -21,6 +22,7 @@ namespace ErrorCodes } const String HDFSBuilderWrapper::CONFIG_PREFIX = "hdfs"; +const String HDFS_URL_REGEXP = "^hdfs://[^:/]*:[0-9]*/.*"; void HDFSBuilderWrapper::loadFromConfig(const Poco::Util::AbstractConfiguration & config, const String & config_path, bool isUser) @@ -197,6 +199,12 @@ HDFSFSPtr createHDFSFS(hdfsBuilder * builder) return fs; } +void checkHDFSURL(const String & url) +{ + if (!re2::RE2::FullMatch(url, HDFS_URL_REGEXP)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad hdfs url: {}. It should have structure 'hdfs://:/'", url); +} + } #endif diff --git a/src/Storages/HDFS/HDFSCommon.h b/src/Storages/HDFS/HDFSCommon.h index 877bf7dd74f..82127beb520 100644 --- a/src/Storages/HDFS/HDFSCommon.h +++ b/src/Storages/HDFS/HDFSCommon.h @@ -98,5 +98,9 @@ using HDFSFSPtr = std::unique_ptr, detail::HDFSFsD HDFSBuilderWrapper createHDFSBuilder(const String & uri_str, const Poco::Util::AbstractConfiguration &); HDFSFSPtr createHDFSFS(hdfsBuilder * builder); +/// Check that url satisfy structure 'hdfs://:/' +/// and throw exception if it doesn't; +void checkHDFSURL(const String & url); + } #endif diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 6e81f5577ab..6c2f155fbee 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -51,6 +50,7 @@ StorageHDFS::StorageHDFS( : IStorage(table_id_), WithContext(context_), uri(uri_), format_name(format_name_), compression_method(compression_method_) { context_->getRemoteHostFilter().checkURL(Poco::URI(uri)); + checkHDFSURL(uri); StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); diff --git a/tests/queries/0_stateless/02114_hdfs_bad_url.reference b/tests/queries/0_stateless/02114_hdfs_bad_url.reference new file mode 100644 index 00000000000..a588883cf70 --- /dev/null +++ b/tests/queries/0_stateless/02114_hdfs_bad_url.reference @@ -0,0 +1,17 @@ +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK diff --git a/tests/queries/0_stateless/02114_hdfs_bad_url.sh b/tests/queries/0_stateless/02114_hdfs_bad_url.sh new file mode 100755 index 00000000000..5117568b67f --- /dev/null +++ b/tests/queries/0_stateless/02114_hdfs_bad_url.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('abcd', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('abcd/', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('//abcd', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('//abcd/', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('//abcd/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('://abcd', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('://abcd/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('abcd:9000', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('abcd:9000/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('//abcd:9000/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('://abcd:9000/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('abcd/', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('hdfs://abcd', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('hdfs1:9000/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('hdfs://hdfs1/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('http://hdfs1:9000/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('hdfs://hdfs1/abcd:9000/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +