mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-18 13:42:02 +00:00
299 lines
8.8 KiB
Python
299 lines
8.8 KiB
Python
#!/usr/bin/env python3
|
|
|
|
from http.server import BaseHTTPRequestHandler, HTTPServer
|
|
import socket
|
|
import sys
|
|
import re
|
|
import threading
|
|
import os
|
|
import traceback
|
|
import urllib.request
|
|
import subprocess
|
|
|
|
|
|
def is_ipv6(host):
|
|
try:
|
|
socket.inet_aton(host)
|
|
return False
|
|
except:
|
|
return True
|
|
|
|
|
|
def get_local_port(host, ipv6):
|
|
if ipv6:
|
|
family = socket.AF_INET6
|
|
else:
|
|
family = socket.AF_INET
|
|
|
|
with socket.socket(family) as fd:
|
|
fd.bind((host, 0))
|
|
return fd.getsockname()[1]
|
|
|
|
|
|
CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "localhost")
|
|
CLICKHOUSE_PORT_HTTP = os.environ.get("CLICKHOUSE_PORT_HTTP", "8123")
|
|
|
|
# Server returns this JSON response.
|
|
SERVER_JSON_RESPONSE = """{
|
|
"login": "ClickHouse",
|
|
"id": 54801242,
|
|
"name": "ClickHouse",
|
|
"company": null
|
|
}"""
|
|
|
|
PAYLOAD_LEN = len(SERVER_JSON_RESPONSE)
|
|
|
|
EXPECTED_ANSWER = """{\\n\\t"login": "ClickHouse",\\n\\t"id": 54801242,\\n\\t"name": "ClickHouse",\\n\\t"company": null\\n}"""
|
|
|
|
#####################################################################################
|
|
# This test starts an HTTP server and serves data to clickhouse url-engine based table.
|
|
# The objective of this test is to check the ClickHouse server provides a User-Agent
|
|
# with HTTP requests.
|
|
# In order for it to work ip+port of http server (given below) should be
|
|
# accessible from clickhouse server.
|
|
#####################################################################################
|
|
|
|
# IP-address of this host accessible from the outside world. Get the first one
|
|
HTTP_SERVER_HOST = (
|
|
subprocess.check_output(["hostname", "-i"]).decode("utf-8").strip().split()[0]
|
|
)
|
|
IS_IPV6 = is_ipv6(HTTP_SERVER_HOST)
|
|
HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6)
|
|
|
|
# IP address and port of the HTTP server started from this script.
|
|
HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT)
|
|
if IS_IPV6:
|
|
HTTP_SERVER_URL_STR = (
|
|
"http://"
|
|
+ f"[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}"
|
|
+ "/"
|
|
)
|
|
else:
|
|
HTTP_SERVER_URL_STR = (
|
|
"http://" + f"{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}" + "/"
|
|
)
|
|
|
|
|
|
def get_ch_answer(query):
|
|
host = CLICKHOUSE_HOST
|
|
if IS_IPV6:
|
|
host = f"[{host}]"
|
|
|
|
url = os.environ.get(
|
|
"CLICKHOUSE_URL",
|
|
"http://{host}:{port}".format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP),
|
|
)
|
|
return urllib.request.urlopen(url, data=query.encode()).read().decode()
|
|
|
|
|
|
def check_answers(query, answer):
|
|
ch_answer = get_ch_answer(query)
|
|
if ch_answer.strip() != answer.strip():
|
|
print("FAIL on query:", query, file=sys.stderr)
|
|
print("Expected answer:", answer, file=sys.stderr)
|
|
print("Fetched answer :", ch_answer, file=sys.stderr)
|
|
raise Exception("Fail on query")
|
|
|
|
|
|
BYTE_RANGE_RE = re.compile(r"bytes=(\d+)-(\d+)?$")
|
|
|
|
|
|
def parse_byte_range(byte_range):
|
|
"""Returns the two numbers in 'bytes=123-456' or throws ValueError.
|
|
The last number or both numbers may be None.
|
|
"""
|
|
if byte_range.strip() == "":
|
|
return None, None
|
|
|
|
m = BYTE_RANGE_RE.match(byte_range)
|
|
if not m:
|
|
raise ValueError(f"Invalid byte range {byte_range}")
|
|
|
|
first, last = [x and int(x) for x in m.groups()]
|
|
if last and last < first:
|
|
raise ValueError(f"Invalid byte range {byte_range}")
|
|
return first, last
|
|
|
|
|
|
# Server with check for User-Agent headers.
|
|
class HttpProcessor(BaseHTTPRequestHandler):
|
|
allow_range = False
|
|
range_used = False
|
|
get_call_num = 0
|
|
responses_to_get = []
|
|
|
|
def send_head(self, from_get=False):
|
|
if self.headers["Range"] and HttpProcessor.allow_range:
|
|
try:
|
|
self.range = parse_byte_range(self.headers["Range"])
|
|
except ValueError as e:
|
|
self.send_error(400, "Invalid byte range")
|
|
return None
|
|
else:
|
|
self.range = None
|
|
|
|
if self.range:
|
|
first, last = self.range
|
|
else:
|
|
first, last = None, None
|
|
|
|
if first == None:
|
|
first = 0
|
|
|
|
payload = SERVER_JSON_RESPONSE.encode()
|
|
payload_len = len(payload)
|
|
if first and first >= payload_len:
|
|
self.send_error(416, "Requested Range Not Satisfiable")
|
|
return None
|
|
|
|
retry_range_request = (
|
|
first != 0 and from_get is True and len(HttpProcessor.responses_to_get) > 0
|
|
)
|
|
if retry_range_request:
|
|
code = HttpProcessor.responses_to_get.pop()
|
|
if code not in HttpProcessor.responses:
|
|
self.send_response(int(code))
|
|
else:
|
|
self.send_response(206 if HttpProcessor.allow_range else 200)
|
|
|
|
self.send_header("Content-type", "application/json")
|
|
|
|
if HttpProcessor.allow_range:
|
|
self.send_header("Accept-Ranges", "bytes")
|
|
|
|
if last is None or last >= payload_len:
|
|
last = payload_len - 1
|
|
|
|
response_length = last - first + 1
|
|
|
|
if first or last:
|
|
self.send_header("Content-Range", f"bytes {first}-{last}/{payload_len}")
|
|
self.send_header(
|
|
"Content-Length",
|
|
str(response_length) if HttpProcessor.allow_range else str(payload_len),
|
|
)
|
|
self.end_headers()
|
|
return payload
|
|
|
|
def do_HEAD(self):
|
|
self.send_head()
|
|
|
|
def do_GET(self):
|
|
result = self.send_head(True)
|
|
if result == None:
|
|
return
|
|
|
|
HttpProcessor.get_call_num += 1
|
|
|
|
if not self.range:
|
|
self.wfile.write(SERVER_JSON_RESPONSE.encode())
|
|
return
|
|
|
|
HttpProcessor.range_used = True
|
|
payload = SERVER_JSON_RESPONSE.encode()
|
|
start, stop = self.range
|
|
if stop == None:
|
|
stop = len(payload) - 1
|
|
if start == None:
|
|
start = 0
|
|
self.wfile.write(SERVER_JSON_RESPONSE.encode()[start : stop + 1])
|
|
|
|
def log_message(self, format, *args):
|
|
return
|
|
|
|
|
|
class HTTPServerV6(HTTPServer):
|
|
address_family = socket.AF_INET6
|
|
|
|
|
|
def start_server():
|
|
if IS_IPV6:
|
|
httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, HttpProcessor)
|
|
else:
|
|
httpd = HTTPServer(HTTP_SERVER_ADDRESS, HttpProcessor)
|
|
|
|
t = threading.Thread(target=httpd.serve_forever)
|
|
return t, httpd
|
|
|
|
|
|
#####################################################################
|
|
# Testing area.
|
|
#####################################################################
|
|
|
|
|
|
def test_select(settings):
|
|
global HTTP_SERVER_URL_STR
|
|
query = f"SELECT * FROM url('{HTTP_SERVER_URL_STR}','JSONAsString') SETTINGS {','.join((k+'='+repr(v) for k, v in settings.items()))};"
|
|
check_answers(query, EXPECTED_ANSWER)
|
|
|
|
|
|
def run_test(allow_range, settings, check_retries=False):
|
|
HttpProcessor.range_used = False
|
|
HttpProcessor.get_call_num = 0
|
|
HttpProcessor.allow_range = allow_range
|
|
if check_retries:
|
|
HttpProcessor.responses_to_get = ["500", "200", "206"]
|
|
retries_num = len(HttpProcessor.responses_to_get)
|
|
|
|
t, httpd = start_server()
|
|
t.start()
|
|
test_select(settings)
|
|
|
|
download_buffer_size = settings["max_download_buffer_size"]
|
|
expected_get_call_num = (PAYLOAD_LEN - 1) // download_buffer_size + 1
|
|
if allow_range:
|
|
if not HttpProcessor.range_used:
|
|
raise Exception("HTTP Range was not used when supported")
|
|
|
|
if check_retries and len(HttpProcessor.responses_to_get) > 0:
|
|
raise Exception(
|
|
"Expected to get http response 500, which had to be retried, but 200 ok returned and then retried"
|
|
)
|
|
|
|
if retries_num > 0:
|
|
expected_get_call_num += retries_num - 1
|
|
|
|
if expected_get_call_num != HttpProcessor.get_call_num:
|
|
raise Exception(
|
|
f"Invalid amount of GET calls with Range. Expected {expected_get_call_num}, actual {HttpProcessor.get_call_num}"
|
|
)
|
|
else:
|
|
if HttpProcessor.range_used:
|
|
raise Exception("HTTP Range used while not supported")
|
|
|
|
httpd.shutdown()
|
|
t.join()
|
|
print("PASSED")
|
|
|
|
|
|
def main():
|
|
settings = {"max_download_buffer_size": 20}
|
|
|
|
# Test Accept-Ranges=False
|
|
run_test(allow_range=False, settings=settings)
|
|
# Test Accept-Ranges=True, parallel download is used
|
|
run_test(allow_range=True, settings=settings)
|
|
|
|
# Test Accept-Ranges=True, parallel download is used
|
|
settings = {"max_download_buffer_size": 10}
|
|
run_test(allow_range=True, settings=settings)
|
|
|
|
# Test Accept-Ranges=True, parallel download is not used,
|
|
# first get request 500 response,
|
|
# second get request 200ok response,
|
|
# third get request (retry) 206 response.
|
|
settings["max_download_threads"] = 2
|
|
run_test(allow_range=True, settings=settings, check_retries=True)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
try:
|
|
main()
|
|
except Exception as ex:
|
|
exc_type, exc_value, exc_traceback = sys.exc_info()
|
|
traceback.print_tb(exc_traceback, file=sys.stderr)
|
|
print(ex, file=sys.stderr)
|
|
sys.stderr.flush()
|
|
|
|
os._exit(1)
|