From 822289658bf5d93a4d99c552ef142930930555c6 Mon Sep 17 00:00:00 2001 From: michael1589 Date: Mon, 22 Nov 2021 17:59:30 +0800 Subject: [PATCH 01/11] support HTTP/HTTPS basic access authentication --- src/Storages/StorageURL.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 66033f7a7d6..25a6101594d 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -21,11 +21,13 @@ #include #include +#include #include #include #include #include #include +#include namespace DB @@ -128,6 +130,16 @@ namespace try { + std::ostringstream ostr; + std::string userInfo = request_uri.getUserInfo(); + if ("" != userInfo) + { + Poco::Base64Encoder encoder(ostr); + encoder.rdbuf()->setLineLength(0); + encoder << userInfo; + encoder.close(); + } + read_buf = wrapReadBufferWithCompressionMethod( std::make_unique( request_uri, @@ -135,7 +147,7 @@ namespace callback, timeouts, context->getSettingsRef().max_http_get_redirects, - Poco::Net::HTTPBasicCredentials{}, + userInfo.length() == 0 ? Poco::Net::HTTPBasicCredentials{} : Poco::Net::HTTPBasicCredentials(ostr.str()), DBMS_DEFAULT_BUFFER_SIZE, context->getReadSettings(), headers, From 1ca6f7bd0352504739f699ad29b7a59917c86f96 Mon Sep 17 00:00:00 2001 From: michael1589 Date: Tue, 23 Nov 2021 09:27:28 +0800 Subject: [PATCH 02/11] HTTP/HTTPS basic access authentication test --- tests/queries/0_stateless/02114_basic_acess_auth.reference | 1 + tests/queries/0_stateless/02114_basic_acess_auth.sql | 1 + 2 files changed, 2 insertions(+) create mode 100644 tests/queries/0_stateless/02114_basic_acess_auth.reference create mode 100644 tests/queries/0_stateless/02114_basic_acess_auth.sql diff --git a/tests/queries/0_stateless/02114_basic_acess_auth.reference b/tests/queries/0_stateless/02114_basic_acess_auth.reference new file mode 100644 index 00000000000..6f8cac30949 --- /dev/null +++ b/tests/queries/0_stateless/02114_basic_acess_auth.reference @@ -0,0 +1 @@ +\n\n \n Basic Authentication test page\n\n \n\n\n

\n JigsawJigsaw Powered !\n \n

\n


\n

Your browser made it!\n \n\n diff --git a/tests/queries/0_stateless/02114_basic_acess_auth.sql b/tests/queries/0_stateless/02114_basic_acess_auth.sql new file mode 100644 index 00000000000..77cd9e6cbbe --- /dev/null +++ b/tests/queries/0_stateless/02114_basic_acess_auth.sql @@ -0,0 +1 @@ +select * FROM url('https://guest:guest@jigsaw.w3.org/HTTP/Basic/','RawBLOB', 'a String'); \ No newline at end of file From fe501d4dd4b021ba91573859eeab1e4990e35079 Mon Sep 17 00:00:00 2001 From: michael1589 Date: Thu, 25 Nov 2021 23:03:01 +0800 Subject: [PATCH 03/11] fix merge error --- src/Storages/StorageURL.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index df4893cb3e1..1b378237fb3 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -21,13 +21,11 @@ #include #include -#include #include #include #include #include #include -#include namespace DB @@ -132,12 +130,15 @@ namespace { std::ostringstream ostr; std::string userInfo = request_uri.getUserInfo(); - if ("" != userInfo) + if (!userInfo.empty()) { - Poco::Base64Encoder encoder(ostr); - encoder.rdbuf()->setLineLength(0); - encoder << userInfo; - encoder.close(); + std::string::size_type n; + n = userInfo.find(":"); + if(n != std::string::npos) + { + credentials.setUsername(userInfo.substr(0, n)); + credentials.setPassword(userInfo.substr(n+1)); + } } read_buf = wrapReadBufferWithCompressionMethod( @@ -148,7 +149,6 @@ namespace timeouts, credentials, context->getSettingsRef().max_http_get_redirects, - userInfo.length() == 0 ? Poco::Net::HTTPBasicCredentials{} : Poco::Net::HTTPBasicCredentials(ostr.str()), DBMS_DEFAULT_BUFFER_SIZE, context->getReadSettings(), headers, From 26a117c4c1c92dc5f6c2bf50dd1f3e86cbb518c1 Mon Sep 17 00:00:00 2001 From: michael1589 Date: Sat, 27 Nov 2021 23:54:33 +0800 Subject: [PATCH 04/11] add test file for url basic auth --- .../queries/0_stateless/02126_url_auth.python | 226 ++++++++++++++++++ .../0_stateless/02126_url_auth.reference | 1 + tests/queries/0_stateless/02126_url_auth.sh | 11 + 3 files changed, 238 insertions(+) create mode 100644 tests/queries/0_stateless/02126_url_auth.python create mode 100644 tests/queries/0_stateless/02126_url_auth.reference create mode 100755 tests/queries/0_stateless/02126_url_auth.sh diff --git a/tests/queries/0_stateless/02126_url_auth.python b/tests/queries/0_stateless/02126_url_auth.python new file mode 100644 index 00000000000..db1e8a38565 --- /dev/null +++ b/tests/queries/0_stateless/02126_url_auth.python @@ -0,0 +1,226 @@ +#!/usr/bin/env python3 + +import socket +import csv +import sys +import tempfile +import threading +import os +import traceback +import urllib.request +import subprocess +from io import StringIO +from http.server import BaseHTTPRequestHandler, HTTPServer + +def is_ipv6(host): + try: + socket.inet_aton(host) + return False + except: + return True + +def get_local_port(host, ipv6): + if ipv6: + family = socket.AF_INET6 + else: + family = socket.AF_INET + + with socket.socket(family) as fd: + fd.bind((host, 0)) + return fd.getsockname()[1] + +CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1') +CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') + +##################################################################################### +# This test starts an HTTP server and serves data to clickhouse url-engine based table. +# In order for it to work ip+port of http server (given below) should be +# accessible from clickhouse server. +##################################################################################### + +# IP-address of this host accessible from the outside world. Get the first one +HTTP_SERVER_HOST = subprocess.check_output(['hostname', '-i']).decode('utf-8').strip().split()[0] +IS_IPV6 = is_ipv6(HTTP_SERVER_HOST) +HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6) + +# IP address and port of the HTTP server started from this script. +HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT) +if IS_IPV6: + HTTP_SERVER_URL_STR = 'http://' + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/" +else: + HTTP_SERVER_URL_STR = 'http://' + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/" + +CSV_DATA = os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())) + +def get_ch_answer(query): + host = CLICKHOUSE_HOST + if IS_IPV6: + host = f'[{host}]' + + url = os.environ.get('CLICKHOUSE_URL', 'http://{host}:{port}'.format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP)) + return urllib.request.urlopen(url, data=query.encode()).read().decode() + +def check_answers(query, answer): + ch_answer = get_ch_answer(query) + if ch_answer.strip() != answer.strip(): + print("FAIL on query:", query, file=sys.stderr) + print("Expected answer:", answer, file=sys.stderr) + print("Fetched answer :", ch_answer, file=sys.stderr) + raise Exception("Fail on query") + +class CSVHTTPServer(BaseHTTPRequestHandler): + def _set_headers(self): + self.send_response(200) + self.send_header('Content-type', 'text/csv') + self.end_headers() + + def do_GET(self): + self._set_headers() + with open(CSV_DATA, 'r') as fl: + reader = csv.reader(fl, delimiter=',') + for row in reader: + self.wfile.write((', '.join(row) + '\n').encode()) + return + + def read_chunk(self): + msg = '' + while True: + sym = self.rfile.read(1) + if sym == '': + break + msg += sym.decode('utf-8') + if msg.endswith('\r\n'): + break + length = int(msg[:-2], 16) + if length == 0: + return '' + content = self.rfile.read(length) + self.rfile.read(2) # read sep \r\n + return content.decode('utf-8') + + def do_POST(self): + data = '' + while True: + chunk = self.read_chunk() + if not chunk: + break + data += chunk + with StringIO(data) as fl: + reader = csv.reader(fl, delimiter=',') + with open(CSV_DATA, 'a') as d: + for row in reader: + d.write(','.join(row) + '\n') + self._set_headers() + self.wfile.write(b"ok") + + def log_message(self, format, *args): + return + + +class HTTPServerV6(HTTPServer): + address_family = socket.AF_INET6 + +def start_server(requests_amount): + if IS_IPV6: + httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, CSVHTTPServer) + else: + httpd = HTTPServer(HTTP_SERVER_ADDRESS, CSVHTTPServer) + + def real_func(): + for i in range(requests_amount): + httpd.handle_request() + + t = threading.Thread(target=real_func) + return t + +# test section + +def test_select(table_name="", schema="str String,numuint UInt32,numint Int32,double Float64", requests=[], answers=[], test_data=""): + with open(CSV_DATA, 'w') as f: # clear file + f.write('') + + if test_data: + with open(CSV_DATA, 'w') as f: + f.write(test_data + "\n") + + if table_name: + get_ch_answer("drop table if exists {}".format(table_name)) + get_ch_answer("create table {} ({}) engine=URL('{}', 'CSV')".format(table_name, schema, HTTP_SERVER_URL_STR)) + + for i in range(len(requests)): + tbl = table_name + if not tbl: + tbl = "url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema) + check_answers(requests[i].format(tbl=tbl), answers[i]) + + if table_name: + get_ch_answer("drop table if exists {}".format(table_name)) + +def test_insert(table_name="", schema="str String,numuint UInt32,numint Int32,double Float64", requests_insert=[], requests_select=[], answers=[]): + with open(CSV_DATA, 'w') as f: # flush test file + f.write('') + + if table_name: + get_ch_answer("drop table if exists {}".format(table_name)) + get_ch_answer("create table {} ({}) engine=URL('{}', 'CSV')".format(table_name, schema, HTTP_SERVER_URL_STR)) + + for req in requests_insert: + tbl = table_name + if not tbl: + tbl = "table function url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema) + get_ch_answer(req.format(tbl=tbl)) + + + for i in range(len(requests_select)): + tbl = table_name + if not tbl: + tbl = "url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema) + check_answers(requests_select[i].format(tbl=tbl), answers[i]) + + if table_name: + get_ch_answer("drop table if exists {}".format(table_name)) + +def test_select_url_engine(requests=[], answers=[], test_data=""): + for i in range(len(requests)): + check_answers(requests[i], answers[i]) + +def main(): + test_data = "Hello,2,-2,7.7\nWorld,2,-5,8.8" + select_only_requests = { + "select str,numuint,numint,double from {tbl}" : test_data.replace(',', '\t'), + "select numuint, count(*) from {tbl} group by numuint" : "2\t2", + "select str,numuint,numint,double from {tbl} limit 1": test_data.split("\n")[0].replace(',', '\t'), + } + + insert_requests = [ + "insert into {tbl} values('Hello',10,-2,7.7)('World',10,-5,7.7)", + "insert into {tbl} select 'Buy', number, 9-number, 9.9 from system.numbers limit 10", + ] + + select_requests = { + "select distinct numuint from {tbl} order by numuint": '\n'.join([str(i) for i in range(11)]), + "select count(*) from {tbl}": '12', + 'select double, count(*) from {tbl} group by double': "7.7\t2\n9.9\t10" + } + + select_requests_url_auth = { + "select * from url('http://guest:guest@127.0.0.1:4443/', 'RawBLOB', 'a String')": test_data.replace(',', '\t'), + } + + t = start_server(len(select_only_requests) * 2 + (len(insert_requests) + len(select_requests)) * 2) + t.start() + test_select(requests=list(select_requests_url_auth.keys()), answers=list(select_requests_url_auth.values()), test_data=test_data) + t.join() + print("PASSED") + + +if __name__ == "__main__": + try: + main() + except Exception as ex: + exc_type, exc_value, exc_traceback = sys.exc_info() + traceback.print_tb(exc_traceback, file=sys.stderr) + print(ex, file=sys.stderr) + sys.stderr.flush() + + os._exit(1) diff --git a/tests/queries/0_stateless/02126_url_auth.reference b/tests/queries/0_stateless/02126_url_auth.reference new file mode 100644 index 00000000000..53cdf1e9393 --- /dev/null +++ b/tests/queries/0_stateless/02126_url_auth.reference @@ -0,0 +1 @@ +PASSED diff --git a/tests/queries/0_stateless/02126_url_auth.sh b/tests/queries/0_stateless/02126_url_auth.sh new file mode 100755 index 00000000000..0013c3c1c0c --- /dev/null +++ b/tests/queries/0_stateless/02126_url_auth.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Tag no-fasttest: Not sure why fail even in sequential mode. Disabled for now to make some progress. + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# We should have correct env vars from shell_config.sh to run this test + +python3 "$CURDIR"/02126_url_auth.python From 70264b37a1e387bcb0212e6d3965c5ed2a4446ab Mon Sep 17 00:00:00 2001 From: michael1589 Date: Sun, 28 Nov 2021 16:47:26 +0800 Subject: [PATCH 05/11] style check fail --- src/Storages/StorageURL.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 1b378237fb3..d90f9a39542 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -128,7 +128,6 @@ namespace try { - std::ostringstream ostr; std::string userInfo = request_uri.getUserInfo(); if (!userInfo.empty()) { From 082105faa9197defbda756ab75b9f00162ec24c1 Mon Sep 17 00:00:00 2001 From: michael1589 Date: Sun, 28 Nov 2021 21:22:23 +0800 Subject: [PATCH 06/11] for test url auth --- .../queries/0_stateless/02126_url_auth.python | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/02126_url_auth.python b/tests/queries/0_stateless/02126_url_auth.python index db1e8a38565..60009624c76 100644 --- a/tests/queries/0_stateless/02126_url_auth.python +++ b/tests/queries/0_stateless/02126_url_auth.python @@ -76,10 +76,11 @@ class CSVHTTPServer(BaseHTTPRequestHandler): def do_GET(self): self._set_headers() - with open(CSV_DATA, 'r') as fl: - reader = csv.reader(fl, delimiter=',') - for row in reader: - self.wfile.write((', '.join(row) + '\n').encode()) + self.wfile.write(('hello, world').encode()) + # with open(CSV_DATA, 'r') as fl: + # reader = csv.reader(fl, delimiter=',') + # for row in reader: + # self.wfile.write((', '.join(row) + '\n').encode()) return def read_chunk(self): @@ -186,6 +187,7 @@ def test_select_url_engine(requests=[], answers=[], test_data=""): def main(): test_data = "Hello,2,-2,7.7\nWorld,2,-5,8.8" + """ select_only_requests = { "select str,numuint,numint,double from {tbl}" : test_data.replace(',', '\t'), "select numuint, count(*) from {tbl} group by numuint" : "2\t2", @@ -202,12 +204,20 @@ def main(): "select count(*) from {tbl}": '12', 'select double, count(*) from {tbl} group by double': "7.7\t2\n9.9\t10" } + """ + + if IS_IPV6: + query = "select * from url('http://guest:guest@" + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/', 'RawBLOB', 'a String')" + else: + query = "select * from url('http://guest:guest@" + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/', 'RawBLOB', 'a String')" + + select_requests_url_auth = { - "select * from url('http://guest:guest@127.0.0.1:4443/', 'RawBLOB', 'a String')": test_data.replace(',', '\t'), + query : 'hello, world', } - t = start_server(len(select_only_requests) * 2 + (len(insert_requests) + len(select_requests)) * 2) + t = start_server(len(list(select_requests_url_auth.keys()))) t.start() test_select(requests=list(select_requests_url_auth.keys()), answers=list(select_requests_url_auth.values()), test_data=test_data) t.join() From 1bf49ab6373410d0659006e51c17df0ce7d3a3f9 Mon Sep 17 00:00:00 2001 From: michael1589 Date: Sun, 28 Nov 2021 21:51:39 +0800 Subject: [PATCH 07/11] style check fail --- src/Storages/StorageURL.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index d90f9a39542..7ef7628bfca 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -131,8 +131,7 @@ namespace std::string userInfo = request_uri.getUserInfo(); if (!userInfo.empty()) { - std::string::size_type n; - n = userInfo.find(":"); + std::size_t n = userInfo.find(":"); if(n != std::string::npos) { credentials.setUsername(userInfo.substr(0, n)); From b54caf9d32383f9ec3deeaac57d7808d300bbc7d Mon Sep 17 00:00:00 2001 From: michael1589 Date: Mon, 29 Nov 2021 14:19:29 +0800 Subject: [PATCH 08/11] delete old test files --- tests/queries/0_stateless/02114_basic_acess_auth.reference | 1 - tests/queries/0_stateless/02114_basic_acess_auth.sql | 1 - 2 files changed, 2 deletions(-) delete mode 100644 tests/queries/0_stateless/02114_basic_acess_auth.reference delete mode 100644 tests/queries/0_stateless/02114_basic_acess_auth.sql diff --git a/tests/queries/0_stateless/02114_basic_acess_auth.reference b/tests/queries/0_stateless/02114_basic_acess_auth.reference deleted file mode 100644 index 6f8cac30949..00000000000 --- a/tests/queries/0_stateless/02114_basic_acess_auth.reference +++ /dev/null @@ -1 +0,0 @@ -\n\n \n Basic Authentication test page\n\n \n\n\n

\n JigsawJigsaw Powered !\n \n

\n


\n

Your browser made it!\n \n\n diff --git a/tests/queries/0_stateless/02114_basic_acess_auth.sql b/tests/queries/0_stateless/02114_basic_acess_auth.sql deleted file mode 100644 index 77cd9e6cbbe..00000000000 --- a/tests/queries/0_stateless/02114_basic_acess_auth.sql +++ /dev/null @@ -1 +0,0 @@ -select * FROM url('https://guest:guest@jigsaw.w3.org/HTTP/Basic/','RawBLOB', 'a String'); \ No newline at end of file From 3f56dde858288ab8759662b5fb506d93dda5785c Mon Sep 17 00:00:00 2001 From: michael1589 Date: Tue, 30 Nov 2021 15:07:18 +0800 Subject: [PATCH 09/11] style check fail --- src/Storages/StorageURL.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 7ef7628bfca..8189a0a31eb 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -132,7 +132,7 @@ namespace if (!userInfo.empty()) { std::size_t n = userInfo.find(":"); - if(n != std::string::npos) + if (n != std::string::npos) { credentials.setUsername(userInfo.substr(0, n)); credentials.setPassword(userInfo.substr(n+1)); From 80dd01e66bcd956b40d7294ce93c5b4e01b82b88 Mon Sep 17 00:00:00 2001 From: michael1589 Date: Tue, 30 Nov 2021 15:18:26 +0800 Subject: [PATCH 10/11] style check fail --- src/Storages/StorageURL.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 8189a0a31eb..adbff983c66 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -131,7 +131,7 @@ namespace std::string userInfo = request_uri.getUserInfo(); if (!userInfo.empty()) { - std::size_t n = userInfo.find(":"); + std::size_t n = userInfo.find(':'); if (n != std::string::npos) { credentials.setUsername(userInfo.substr(0, n)); From f2f8826621f14fcec8d99d2d47f2858a5bd897a7 Mon Sep 17 00:00:00 2001 From: michael1589 Date: Tue, 30 Nov 2021 20:46:54 +0800 Subject: [PATCH 11/11] build check fail --- src/Storages/StorageURL.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index adbff983c66..382466c32d4 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -128,14 +128,14 @@ namespace try { - std::string userInfo = request_uri.getUserInfo(); - if (!userInfo.empty()) + std::string user_info = request_uri.getUserInfo(); + if (!user_info.empty()) { - std::size_t n = userInfo.find(':'); + std::size_t n = user_info.find(':'); if (n != std::string::npos) { - credentials.setUsername(userInfo.substr(0, n)); - credentials.setPassword(userInfo.substr(n+1)); + credentials.setUsername(user_info.substr(0, n)); + credentials.setPassword(user_info.substr(n+1)); } }