From bfc703692ad5d90bb1f43836752e4f4668ba1c4b Mon Sep 17 00:00:00 2001 From: Vitaliy Zakaznikov Date: Mon, 8 Feb 2021 08:48:43 -0500 Subject: [PATCH 01/22] Starting to add LDAP docs. --- .../external-authenticators/index.md | 9 ++ .../external-authenticators/ldap.md | 145 ++++++++++++++++++ .../sql-reference/statements/create/user.md | 3 +- 3 files changed, 156 insertions(+), 1 deletion(-) create mode 100644 docs/en/operations/external-authenticators/index.md create mode 100644 docs/en/operations/external-authenticators/ldap.md diff --git a/docs/en/operations/external-authenticators/index.md b/docs/en/operations/external-authenticators/index.md new file mode 100644 index 00000000000..10c2ea91eb9 --- /dev/null +++ b/docs/en/operations/external-authenticators/index.md @@ -0,0 +1,9 @@ +--- +toc_folder_title: External User Authenticators and Directories +toc_priority: 48 +toc_title: Introduction +--- + +# External User Authenticators and Directories {#external-authenticators} + +ClickHouse supports authenticating and managing users using external services such as [LDAP](#external-authenticators-ldap). diff --git a/docs/en/operations/external-authenticators/ldap.md b/docs/en/operations/external-authenticators/ldap.md new file mode 100644 index 00000000000..fd5f2e578ce --- /dev/null +++ b/docs/en/operations/external-authenticators/ldap.md @@ -0,0 +1,145 @@ +# LDAP {#external-authenticators-ldap} + +LDAP server can be used to authenticate ClickHouse users. There are two different approaches for doing this: + +- use LDAP as an external authenticator for existing users, which are defined in `users.xml` or in local access control paths +- use LDAP as an external user directory and allow locally undefined users to be authenticated if they exist on the LDAP server + +For both of these approaches, an internally named LDAP server must be defined in the ClickHouse config +so that other parts of config are able to refer to it. + +## Server Definition {#ldap-server-definition} + +To define LDAP server you must add `ldap_servers` section to the `config.xml`. For example, + +```xml + + + + + localhost + 636 + uid={user_name},ou=users,dc=example,dc=com + 300 + yes + tls1.2 + demand + /path/to/tls_cert_file + /path/to/tls_key_file + /path/to/tls_ca_cert_file + /path/to/tls_ca_cert_dir + ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:AES256-GCM-SHA384 + + + +``` + +Note, that you can define multiple LDAP servers inside `ldap_servers` section using distinct names. + +Parameters: + +- `host` - LDAP server hostname or IP, this parameter is mandatory and cannot be empty. +- `port` - LDAP server port, default is `636` if `enable_tls` is set to `true`, `389` otherwise. +- `bind_dn` - template used to construct the DN to bind to. + - The resulting DN will be constructed by replacing all `{user_name}` substrings of the template with the actual user name during each authentication attempt. +- `verification_cooldown` - a period of time, in seconds, after a successful bind attempt, during which the user will be assumed to be successfully authenticated for all consecutive requests without contacting the LDAP server. + - Specify `0` (the default) to disable caching and force contacting the LDAP server for each authentication request. +- `enable_tls` - flag to trigger use of secure connection to the LDAP server. + - Specify `no` for plain text `ldap://` protocol (not recommended). + - Specify `yes` for LDAP over SSL/TLS `ldaps://` protocol (recommended, the default). + - Specify `starttls` for legacy StartTLS protocol (plain text `ldap://` protocol, upgraded to TLS). +- `tls_minimum_protocol_version` - the minimum protocol version of SSL/TLS. + - Accepted values are: `ssl2`, `ssl3`, `tls1.0`, `tls1.1`, `tls1.2` (the default). +- `tls_require_cert` - SSL/TLS peer certificate verification behavior. + - Accepted values are: `never`, `allow`, `try`, `demand` (the default). +- `tls_cert_file` - path to certificate file. +- `tls_key_file` - path to certificate key file. +- `tls_ca_cert_file` - path to CA certificate file. +- `tls_ca_cert_dir` - path to the directory containing CA certificates. +- `tls_cipher_suite` - allowed cipher suite (in OpenSSL notation). + +## External Authenticator {#ldap-external-authenticator} + +A remote LDAP server can be used as a method for verifying the passwords for locally defined users (users defined in `users.xml` or in local access control paths). In order to achieve this, specify previously defined LDAP server name instead of `password` or similar sections in the user definition. + +At each login attempt, ClickHouse will try to "bind" to the specified DN (see `bind_dn` sections in LDAP server config in `config.xml`) at the LDAP server using the provided credentials, and, if successful, the user will be considered authenticated. This is often called "simple bind" method. + +Example (goes into `users.xml`): + +```xml + + + + + + + + my_ldap_server + + + + +``` + +Note, that now, once user `my_user` refers to `my_ldap_server`, this LDAP server must be configured in the main `config.xml` file as described previously. + +When SQL-driven Access Control and Account Management is enabled in ClickHouse, users that are identified by LDAP servers can also be created using queries. + +Example (execute in ClickHouse client): + +```sql +CREATE USER my_user IDENTIFIED WITH ldap_server BY 'my_ldap_server' +``` + +## Exernal User Directory {#ldap-external-user-directory} + +A remote LDAP server can be used as a source of user definitions, in addition to the locally defined users. In order to achieve this, specify previously defined LDAP server name in `ldap` section inside `users_directories` section in main `config.xml` file. + +At each login attempt, ClickHouse will try to find the user definition locally and authenticate it as usual, but if the user is not defined, ClickHouse will assume it exists in the external LDAP directory, and will try to "bind" to the specified DN (see `bind_dn` sections in LDAP server config in `config.xml`) at the LDAP server using the provided credentials, and, if successful, the user will be considered existing and authenticated. The user will be assigned roles from the list specified in `roles`. Additionally, LDAP "search" can be performed and results can be transformed and treated as role names and then assigned to the user if `role_mapping` section is also configured. All this implies that the SQL-driven Access Control and Account Management is enabled in ClickHouse and roles are created using `CREATE ROLE ...` queries. + +Example (goes into `config.xml`): + +```xml + + + + + + my_ldap_server + + + + + + ou=groups,dc=example,dc=com + subtree + (&(objectClass=groupOfNames)(member={bind_dn})) + cn + clickhouse_ + + + + +``` + +Note, that now, once `my_ldap_server` is referred from `ldap` inside `user_directories` section, this LDAP server must be configured in the main `config.xml` file as described previously. + +Parameters: + +- `server` - one of LDAP server names defined in `ldap_servers` config section above. This parameter is mandatory and cannot be empty. +- `roles` - section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server. + - If no roles are specified here or assigned during role mapping (below), user will not be able to perform any actions after authentication. +- `role_mapping` - section with LDAP search parameters and mapping rules. + - When a user authenticates, while still bound to LDAP, an LDAP search is performed using `search_filter` and the name of the logged in user. For each entry found during that search, the value of the specified attribute is extracted. For each attribute value that has the specified prefix, the prefix is removed, and the rest of the value becomes the name of a local role defined in ClickHouse, which is expected to be created beforehand by `CREATE ROLE ...` command. + + - There can be multiple `role_mapping` sections defined inside the same `ldap` section. All of them will be applied. + - `base_dn` - template used to construct the base DN for the LDAP search. + - The resulting DN will be constructed by replacing all `{user_name}` and `{bind_dn}` substrings of the template with the actual user name and bind DN during each LDAP search. + - `scope` - scope of the LDAP search. + - Accepted values are: `base`, `one_level`, `children`, `subtree` (the default). + - `search_filter` - template used to construct the search filter for the LDAP search. + - The resulting filter will be constructed by replacing all `{user_name}`, `{bind_dn}`, and `{base_dn}` substrings of the template with the actual user name, bind DN, and base DN during each LDAP search. + - Note, that the special characters must be escaped properly in XML. + - `attribute` - attribute name whose values will be returned by the LDAP search. + - `prefix` - prefix, that will be expected to be in front of each string in the original list of strings returned by the LDAP search. Prefix will be removed from the original strings and resulting strings will be treated as local role names. Empty, by default. + diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index d5343cce7be..c1a52e3b864 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -12,7 +12,7 @@ Syntax: ``` sql CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...] - [IDENTIFIED [WITH {NO_PASSWORD|PLAINTEXT_PASSWORD|SHA256_PASSWORD|SHA256_HASH|DOUBLE_SHA1_PASSWORD|DOUBLE_SHA1_HASH}] BY {'password'|'hash'}] + [IDENTIFIED [WITH {NO_PASSWORD|PLAINTEXT_PASSWORD|SHA256_PASSWORD|SHA256_HASH|DOUBLE_SHA1_PASSWORD|DOUBLE_SHA1_HASH|LDAP_SERVER}] BY {'password'|'hash'}] [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] [DEFAULT ROLE role [,...]] [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] @@ -30,6 +30,7 @@ There are multiple ways of user identification: - `IDENTIFIED WITH sha256_hash BY 'hash'` - `IDENTIFIED WITH double_sha1_password BY 'qwerty'` - `IDENTIFIED WITH double_sha1_hash BY 'hash'` +- `IDENTIFIED WITH ldap_server BY 'server'` ## User Host {#user-host} From e312ef72281dc5b034343d0ff33035fbf1a7a7ef Mon Sep 17 00:00:00 2001 From: Vitaliy Zakaznikov Date: Mon, 8 Feb 2021 12:29:45 -0500 Subject: [PATCH 02/22] Updating LDAP docs. --- .../external-authenticators/index.md | 12 ++++- .../external-authenticators/ldap.md | 53 ++++++++++++------- 2 files changed, 44 insertions(+), 21 deletions(-) diff --git a/docs/en/operations/external-authenticators/index.md b/docs/en/operations/external-authenticators/index.md index 10c2ea91eb9..f06c1de8ec7 100644 --- a/docs/en/operations/external-authenticators/index.md +++ b/docs/en/operations/external-authenticators/index.md @@ -6,4 +6,14 @@ toc_title: Introduction # External User Authenticators and Directories {#external-authenticators} -ClickHouse supports authenticating and managing users using external services such as [LDAP](#external-authenticators-ldap). +ClickHouse supports authenticating and managing users using external services. + +The following external authenticators and directories are supported. + +## External Authenticators + +- [LDAP](#ldap-external-authenticator) + +## External User Directories + +- [LDAP](#ldap-external-user-directory) diff --git a/docs/en/operations/external-authenticators/ldap.md b/docs/en/operations/external-authenticators/ldap.md index fd5f2e578ce..7ad1fd68b74 100644 --- a/docs/en/operations/external-authenticators/ldap.md +++ b/docs/en/operations/external-authenticators/ldap.md @@ -41,8 +41,11 @@ Parameters: - `host` - LDAP server hostname or IP, this parameter is mandatory and cannot be empty. - `port` - LDAP server port, default is `636` if `enable_tls` is set to `true`, `389` otherwise. - `bind_dn` - template used to construct the DN to bind to. - - The resulting DN will be constructed by replacing all `{user_name}` substrings of the template with the actual user name during each authentication attempt. -- `verification_cooldown` - a period of time, in seconds, after a successful bind attempt, during which the user will be assumed to be successfully authenticated for all consecutive requests without contacting the LDAP server. + - The resulting DN will be constructed by replacing all `{user_name}` substrings of the + template with the actual user name during each authentication attempt. +- `verification_cooldown` - a period of time, in seconds, after a successful bind attempt, + during which the user will be assumed to be successfully authenticated for all consecutive + requests without contacting the LDAP server. - Specify `0` (the default) to disable caching and force contacting the LDAP server for each authentication request. - `enable_tls` - flag to trigger use of secure connection to the LDAP server. - Specify `no` for plain text `ldap://` protocol (not recommended). @@ -58,13 +61,14 @@ Parameters: - `tls_ca_cert_dir` - path to the directory containing CA certificates. - `tls_cipher_suite` - allowed cipher suite (in OpenSSL notation). -## External Authenticator {#ldap-external-authenticator} +## Using LDAP As External Authenticator {#ldap-external-authenticator} -A remote LDAP server can be used as a method for verifying the passwords for locally defined users (users defined in `users.xml` or in local access control paths). In order to achieve this, specify previously defined LDAP server name instead of `password` or similar sections in the user definition. +A remote LDAP server can be used as a method for verifying passwords for locally defined users (users defined in `users.xml` or in local access control paths). In order to achieve this, specify previously defined LDAP server name instead of `password` or similar sections in the user definition. -At each login attempt, ClickHouse will try to "bind" to the specified DN (see `bind_dn` sections in LDAP server config in `config.xml`) at the LDAP server using the provided credentials, and, if successful, the user will be considered authenticated. This is often called "simple bind" method. +At each login attempt, ClickHouse will try to "bind" to the specified DN defined by the `bind_dn` parameter +in the [LDAP server definition](#ldap-server-definition) using the provided credentials, and if successful, the user will be considered authenticated. This is often called a "simple bind" method. -Example (goes into `users.xml`): +For example, ```xml @@ -81,21 +85,20 @@ Example (goes into `users.xml`): ``` -Note, that now, once user `my_user` refers to `my_ldap_server`, this LDAP server must be configured in the main `config.xml` file as described previously. +Note, that user `my_user` refers to `my_ldap_server`. This LDAP server must be configured in the main `config.xml` file as described previously. -When SQL-driven Access Control and Account Management is enabled in ClickHouse, users that are identified by LDAP servers can also be created using queries. +When SQL-driven [Access Control and Account Management](#access-control) is enabled in ClickHouse, users that are identified by LDAP servers can also be created using the [CRATE USER](#create-user-statement) statement. -Example (execute in ClickHouse client): ```sql CREATE USER my_user IDENTIFIED WITH ldap_server BY 'my_ldap_server' ``` -## Exernal User Directory {#ldap-external-user-directory} +## Using LDAP As Exernal User Directory {#ldap-external-user-directory} -A remote LDAP server can be used as a source of user definitions, in addition to the locally defined users. In order to achieve this, specify previously defined LDAP server name in `ldap` section inside `users_directories` section in main `config.xml` file. +In addition to the locally defined users, a remote LDAP server can be used as a source of user definitions. In order to achieve this, specify previously defined LDAP server name (see [LDAP Server Definition](#ldap-server-definition)) in the `ldap` section inside the `users_directories` section in of the `config.xml` file. -At each login attempt, ClickHouse will try to find the user definition locally and authenticate it as usual, but if the user is not defined, ClickHouse will assume it exists in the external LDAP directory, and will try to "bind" to the specified DN (see `bind_dn` sections in LDAP server config in `config.xml`) at the LDAP server using the provided credentials, and, if successful, the user will be considered existing and authenticated. The user will be assigned roles from the list specified in `roles`. Additionally, LDAP "search" can be performed and results can be transformed and treated as role names and then assigned to the user if `role_mapping` section is also configured. All this implies that the SQL-driven Access Control and Account Management is enabled in ClickHouse and roles are created using `CREATE ROLE ...` queries. +At each login attempt, ClickHouse will try to find the user definition locally and authenticate it as usual, but if the user is not defined, ClickHouse will assume it exists in the external LDAP directory, and will try to "bind" to the specified DN at the LDAP server using the provided credentials, and if successful, the user will be considered existing and authenticated. The user will be assigned roles from the list specified in the `roles` section. Additionally, LDAP "search" can be performed and results can be transformed and treated as role names and then assigned to the user if the `role_mapping` section is also configured. All this implies that the SQL-driven [Access Control and Account Management](#access-control) is enabled in ClickHouse and roles are created using the [CREATE ROLE](#create-role-statement) statement. Example (goes into `config.xml`): @@ -122,24 +125,34 @@ Example (goes into `config.xml`): ``` -Note, that now, once `my_ldap_server` is referred from `ldap` inside `user_directories` section, this LDAP server must be configured in the main `config.xml` file as described previously. +Note that `my_ldap_server` referred in the `ldap` section inside the `user_directories` section must be a previously +defined LDAP server that is configured in the `config.xml` (see [LDAP Server Definition](#ldap-server-definition)). Parameters: -- `server` - one of LDAP server names defined in `ldap_servers` config section above. This parameter is mandatory and cannot be empty. +- `server` - one of LDAP server names defined in `ldap_servers` config section above. + This parameter is mandatory and cannot be empty. - `roles` - section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server. - - If no roles are specified here or assigned during role mapping (below), user will not be able to perform any actions after authentication. + - If no roles are specified here or assigned during role mapping (below), user will not be able + to perform any actions after authentication. - `role_mapping` - section with LDAP search parameters and mapping rules. - - When a user authenticates, while still bound to LDAP, an LDAP search is performed using `search_filter` and the name of the logged in user. For each entry found during that search, the value of the specified attribute is extracted. For each attribute value that has the specified prefix, the prefix is removed, and the rest of the value becomes the name of a local role defined in ClickHouse, which is expected to be created beforehand by `CREATE ROLE ...` command. - + - When a user authenticates, while still bound to LDAP, an LDAP search is performed using `search_filter` + and the name of the logged in user. For each entry found during that search, the value of the specified + attribute is extracted. For each attribute value that has the specified prefix, the prefix is removed, + and the rest of the value becomes the name of a local role defined in ClickHouse, + which is expected to be created beforehand by the [CREATE ROLE](#create-role-statement) statement. - There can be multiple `role_mapping` sections defined inside the same `ldap` section. All of them will be applied. - `base_dn` - template used to construct the base DN for the LDAP search. - - The resulting DN will be constructed by replacing all `{user_name}` and `{bind_dn}` substrings of the template with the actual user name and bind DN during each LDAP search. + - The resulting DN will be constructed by replacing all `{user_name}` and `{bind_dn}` + substrings of the template with the actual user name and bind DN during each LDAP search. - `scope` - scope of the LDAP search. - Accepted values are: `base`, `one_level`, `children`, `subtree` (the default). - `search_filter` - template used to construct the search filter for the LDAP search. - - The resulting filter will be constructed by replacing all `{user_name}`, `{bind_dn}`, and `{base_dn}` substrings of the template with the actual user name, bind DN, and base DN during each LDAP search. + - The resulting filter will be constructed by replacing all `{user_name}`, `{bind_dn}`, and `{base_dn}` + substrings of the template with the actual user name, bind DN, and base DN during each LDAP search. - Note, that the special characters must be escaped properly in XML. - `attribute` - attribute name whose values will be returned by the LDAP search. - - `prefix` - prefix, that will be expected to be in front of each string in the original list of strings returned by the LDAP search. Prefix will be removed from the original strings and resulting strings will be treated as local role names. Empty, by default. + - `prefix` - prefix, that will be expected to be in front of each string in the original + list of strings returned by the LDAP search. Prefix will be removed from the original + strings and resulting strings will be treated as local role names. Empty, by default. From 9d9055681c8c5536d3dec4974cf42c90490f1efb Mon Sep 17 00:00:00 2001 From: Vitaliy Zakaznikov Date: Mon, 8 Feb 2021 12:35:18 -0500 Subject: [PATCH 03/22] Small changes to LDAP docs. --- docs/en/operations/external-authenticators/index.md | 4 ++-- docs/en/operations/external-authenticators/ldap.md | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/en/operations/external-authenticators/index.md b/docs/en/operations/external-authenticators/index.md index f06c1de8ec7..3387bbbdc05 100644 --- a/docs/en/operations/external-authenticators/index.md +++ b/docs/en/operations/external-authenticators/index.md @@ -10,10 +10,10 @@ ClickHouse supports authenticating and managing users using external services. The following external authenticators and directories are supported. -## External Authenticators +External Authenticators: - [LDAP](#ldap-external-authenticator) -## External User Directories +External User Directories: - [LDAP](#ldap-external-user-directory) diff --git a/docs/en/operations/external-authenticators/ldap.md b/docs/en/operations/external-authenticators/ldap.md index 7ad1fd68b74..03be357a12a 100644 --- a/docs/en/operations/external-authenticators/ldap.md +++ b/docs/en/operations/external-authenticators/ldap.md @@ -8,7 +8,7 @@ LDAP server can be used to authenticate ClickHouse users. There are two differen For both of these approaches, an internally named LDAP server must be defined in the ClickHouse config so that other parts of config are able to refer to it. -## Server Definition {#ldap-server-definition} +## LDAP Server Definition {#ldap-server-definition} To define LDAP server you must add `ldap_servers` section to the `config.xml`. For example, @@ -61,7 +61,7 @@ Parameters: - `tls_ca_cert_dir` - path to the directory containing CA certificates. - `tls_cipher_suite` - allowed cipher suite (in OpenSSL notation). -## Using LDAP As External Authenticator {#ldap-external-authenticator} +## LDAP External Authenticator {#ldap-external-authenticator} A remote LDAP server can be used as a method for verifying passwords for locally defined users (users defined in `users.xml` or in local access control paths). In order to achieve this, specify previously defined LDAP server name instead of `password` or similar sections in the user definition. @@ -94,7 +94,7 @@ When SQL-driven [Access Control and Account Management](#access-control) is enab CREATE USER my_user IDENTIFIED WITH ldap_server BY 'my_ldap_server' ``` -## Using LDAP As Exernal User Directory {#ldap-external-user-directory} +## LDAP Exernal User Directory {#ldap-external-user-directory} In addition to the locally defined users, a remote LDAP server can be used as a source of user definitions. In order to achieve this, specify previously defined LDAP server name (see [LDAP Server Definition](#ldap-server-definition)) in the `ldap` section inside the `users_directories` section in of the `config.xml` file. From 3c94e4d6f4b5e7c8ee048d6325d6275775d35426 Mon Sep 17 00:00:00 2001 From: Vitaliy Zakaznikov Date: Mon, 8 Feb 2021 14:01:33 -0500 Subject: [PATCH 04/22] Changing index.md --- docs/en/operations/external-authenticators/index.md | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/docs/en/operations/external-authenticators/index.md b/docs/en/operations/external-authenticators/index.md index 3387bbbdc05..fb8483fa341 100644 --- a/docs/en/operations/external-authenticators/index.md +++ b/docs/en/operations/external-authenticators/index.md @@ -8,12 +8,6 @@ toc_title: Introduction ClickHouse supports authenticating and managing users using external services. -The following external authenticators and directories are supported. +The following external authenticators and directories are supported: -External Authenticators: - -- [LDAP](#ldap-external-authenticator) - -External User Directories: - -- [LDAP](#ldap-external-user-directory) +- [LDAP](#external-authenticators-ldap) [Authenticator](#ldap-external-authenticator) and [Directory](#ldap-external-user-directory) From 3874effea16b4140227efa6e11fe6dc34024924f Mon Sep 17 00:00:00 2001 From: Vitaliy Zakaznikov Date: Tue, 9 Feb 2021 10:09:38 -0500 Subject: [PATCH 05/22] Fixing rendering issues and links. --- .../external-authenticators/index.md | 2 +- .../external-authenticators/ldap.md | 74 +++++++++---------- 2 files changed, 37 insertions(+), 39 deletions(-) diff --git a/docs/en/operations/external-authenticators/index.md b/docs/en/operations/external-authenticators/index.md index fb8483fa341..95f80f192f5 100644 --- a/docs/en/operations/external-authenticators/index.md +++ b/docs/en/operations/external-authenticators/index.md @@ -10,4 +10,4 @@ ClickHouse supports authenticating and managing users using external services. The following external authenticators and directories are supported: -- [LDAP](#external-authenticators-ldap) [Authenticator](#ldap-external-authenticator) and [Directory](#ldap-external-user-directory) +- [LDAP](./ldap.md#external-authenticators-ldap) [Authenticator](./ldap.md#ldap-external-authenticator) and [Directory](./ldap.md#ldap-external-user-directory) diff --git a/docs/en/operations/external-authenticators/ldap.md b/docs/en/operations/external-authenticators/ldap.md index 03be357a12a..36a13227852 100644 --- a/docs/en/operations/external-authenticators/ldap.md +++ b/docs/en/operations/external-authenticators/ldap.md @@ -5,8 +5,7 @@ LDAP server can be used to authenticate ClickHouse users. There are two differen - use LDAP as an external authenticator for existing users, which are defined in `users.xml` or in local access control paths - use LDAP as an external user directory and allow locally undefined users to be authenticated if they exist on the LDAP server -For both of these approaches, an internally named LDAP server must be defined in the ClickHouse config -so that other parts of config are able to refer to it. +For both of these approaches, an internally named LDAP server must be defined in the ClickHouse config so that other parts of config are able to refer to it. ## LDAP Server Definition {#ldap-server-definition} @@ -34,27 +33,27 @@ To define LDAP server you must add `ldap_servers` section to the `config.xml`. F ``` -Note, that you can define multiple LDAP servers inside `ldap_servers` section using distinct names. +Note, that you can define multiple LDAP servers inside the `ldap_servers` section using distinct names. Parameters: - `host` - LDAP server hostname or IP, this parameter is mandatory and cannot be empty. - `port` - LDAP server port, default is `636` if `enable_tls` is set to `true`, `389` otherwise. - `bind_dn` - template used to construct the DN to bind to. - - The resulting DN will be constructed by replacing all `{user_name}` substrings of the - template with the actual user name during each authentication attempt. + - The resulting DN will be constructed by replacing all `{user_name}` substrings of the + template with the actual user name during each authentication attempt. - `verification_cooldown` - a period of time, in seconds, after a successful bind attempt, during which the user will be assumed to be successfully authenticated for all consecutive requests without contacting the LDAP server. - - Specify `0` (the default) to disable caching and force contacting the LDAP server for each authentication request. + - Specify `0` (the default) to disable caching and force contacting the LDAP server for each authentication request. - `enable_tls` - flag to trigger use of secure connection to the LDAP server. - - Specify `no` for plain text `ldap://` protocol (not recommended). - - Specify `yes` for LDAP over SSL/TLS `ldaps://` protocol (recommended, the default). - - Specify `starttls` for legacy StartTLS protocol (plain text `ldap://` protocol, upgraded to TLS). + - Specify `no` for plain text `ldap://` protocol (not recommended). + - Specify `yes` for LDAP over SSL/TLS `ldaps://` protocol (recommended, the default). + - Specify `starttls` for legacy StartTLS protocol (plain text `ldap://` protocol, upgraded to TLS). - `tls_minimum_protocol_version` - the minimum protocol version of SSL/TLS. - - Accepted values are: `ssl2`, `ssl3`, `tls1.0`, `tls1.1`, `tls1.2` (the default). + - Accepted values are: `ssl2`, `ssl3`, `tls1.0`, `tls1.1`, `tls1.2` (the default). - `tls_require_cert` - SSL/TLS peer certificate verification behavior. - - Accepted values are: `never`, `allow`, `try`, `demand` (the default). + - Accepted values are: `never`, `allow`, `try`, `demand` (the default). - `tls_cert_file` - path to certificate file. - `tls_key_file` - path to certificate key file. - `tls_ca_cert_file` - path to CA certificate file. @@ -65,8 +64,7 @@ Parameters: A remote LDAP server can be used as a method for verifying passwords for locally defined users (users defined in `users.xml` or in local access control paths). In order to achieve this, specify previously defined LDAP server name instead of `password` or similar sections in the user definition. -At each login attempt, ClickHouse will try to "bind" to the specified DN defined by the `bind_dn` parameter -in the [LDAP server definition](#ldap-server-definition) using the provided credentials, and if successful, the user will be considered authenticated. This is often called a "simple bind" method. +At each login attempt, ClickHouse will try to "bind" to the specified DN defined by the `bind_dn` parameter in the [LDAP server definition](#ldap-server-definition) using the provided credentials, and if successful, the user will be considered authenticated. This is often called a "simple bind" method. For example, @@ -87,7 +85,7 @@ For example, Note, that user `my_user` refers to `my_ldap_server`. This LDAP server must be configured in the main `config.xml` file as described previously. -When SQL-driven [Access Control and Account Management](#access-control) is enabled in ClickHouse, users that are identified by LDAP servers can also be created using the [CRATE USER](#create-user-statement) statement. +When SQL-driven [Access Control and Account Management](../access-rights.md#access-control) is enabled in ClickHouse, users that are authenticated by LDAP servers can also be created using the [CRATE USER](../../sql-reference/statements/create/user.md#create-user-statement) statement. ```sql @@ -96,9 +94,9 @@ CREATE USER my_user IDENTIFIED WITH ldap_server BY 'my_ldap_server' ## LDAP Exernal User Directory {#ldap-external-user-directory} -In addition to the locally defined users, a remote LDAP server can be used as a source of user definitions. In order to achieve this, specify previously defined LDAP server name (see [LDAP Server Definition](#ldap-server-definition)) in the `ldap` section inside the `users_directories` section in of the `config.xml` file. +In addition to the locally defined users, a remote LDAP server can be used as a source of user definitions. In order to achieve this, specify previously defined LDAP server name (see [LDAP Server Definition](#ldap-server-definition)) in the `ldap` section inside the `users_directories` section of the `config.xml` file. -At each login attempt, ClickHouse will try to find the user definition locally and authenticate it as usual, but if the user is not defined, ClickHouse will assume it exists in the external LDAP directory, and will try to "bind" to the specified DN at the LDAP server using the provided credentials, and if successful, the user will be considered existing and authenticated. The user will be assigned roles from the list specified in the `roles` section. Additionally, LDAP "search" can be performed and results can be transformed and treated as role names and then assigned to the user if the `role_mapping` section is also configured. All this implies that the SQL-driven [Access Control and Account Management](#access-control) is enabled in ClickHouse and roles are created using the [CREATE ROLE](#create-role-statement) statement. +At each login attempt, ClickHouse will try to find the user definition locally and authenticate it as usual, but if the user is not defined, ClickHouse will assume it exists in the external LDAP directory, and will try to "bind" to the specified DN at the LDAP server using the provided credentials. If successful, the user will be considered existing and authenticated. The user will be assigned roles from the list specified in the `roles` section. Additionally, LDAP "search" can be performed and results can be transformed and treated as role names and then be assigned to the user if the `role_mapping` section is also configured. All this implies that the SQL-driven [Access Control and Account Management](../access-rights.md#access-control) is enabled and roles are created using the [CREATE ROLE](../../sql-reference/statements/create/role.md#create-role-statement) statement. Example (goes into `config.xml`): @@ -130,29 +128,29 @@ defined LDAP server that is configured in the `config.xml` (see [LDAP Server Def Parameters: -- `server` - one of LDAP server names defined in `ldap_servers` config section above. +- `server` - one of LDAP server names defined in the `ldap_servers` config section above. This parameter is mandatory and cannot be empty. - `roles` - section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server. - - If no roles are specified here or assigned during role mapping (below), user will not be able - to perform any actions after authentication. + - If no roles are specified here or assigned during role mapping (below), user will not be able + to perform any actions after authentication. - `role_mapping` - section with LDAP search parameters and mapping rules. - - When a user authenticates, while still bound to LDAP, an LDAP search is performed using `search_filter` - and the name of the logged in user. For each entry found during that search, the value of the specified - attribute is extracted. For each attribute value that has the specified prefix, the prefix is removed, - and the rest of the value becomes the name of a local role defined in ClickHouse, - which is expected to be created beforehand by the [CREATE ROLE](#create-role-statement) statement. - - There can be multiple `role_mapping` sections defined inside the same `ldap` section. All of them will be applied. - - `base_dn` - template used to construct the base DN for the LDAP search. - - The resulting DN will be constructed by replacing all `{user_name}` and `{bind_dn}` - substrings of the template with the actual user name and bind DN during each LDAP search. - - `scope` - scope of the LDAP search. - - Accepted values are: `base`, `one_level`, `children`, `subtree` (the default). - - `search_filter` - template used to construct the search filter for the LDAP search. - - The resulting filter will be constructed by replacing all `{user_name}`, `{bind_dn}`, and `{base_dn}` - substrings of the template with the actual user name, bind DN, and base DN during each LDAP search. - - Note, that the special characters must be escaped properly in XML. - - `attribute` - attribute name whose values will be returned by the LDAP search. - - `prefix` - prefix, that will be expected to be in front of each string in the original - list of strings returned by the LDAP search. Prefix will be removed from the original - strings and resulting strings will be treated as local role names. Empty, by default. + - When a user authenticates, while still bound to LDAP, an LDAP search is performed using `search_filter` + and the name of the logged in user. For each entry found during that search, the value of the specified + attribute is extracted. For each attribute value that has the specified prefix, the prefix is removed, + and the rest of the value becomes the name of a local role defined in ClickHouse, + which is expected to be created beforehand by the [CREATE ROLE](../../sql-reference/statements/create/role.md#create-role-statement) statement. + - There can be multiple `role_mapping` sections defined inside the same `ldap` section. All of them will be applied. + - `base_dn` - template used to construct the base DN for the LDAP search. + - The resulting DN will be constructed by replacing all `{user_name}` and `{bind_dn}` + substrings of the template with the actual user name and bind DN during each LDAP search. + - `scope` - scope of the LDAP search. + - Accepted values are: `base`, `one_level`, `children`, `subtree` (the default). + - `search_filter` - template used to construct the search filter for the LDAP search. + - The resulting filter will be constructed by replacing all `{user_name}`, `{bind_dn}`, and `{base_dn}` + substrings of the template with the actual user name, bind DN, and base DN during each LDAP search. + - Note, that the special characters must be escaped properly in XML. + - `attribute` - attribute name whose values will be returned by the LDAP search. + - `prefix` - prefix, that will be expected to be in front of each string in the original + list of strings returned by the LDAP search. Prefix will be removed from the original + strings and resulting strings will be treated as local role names. Empty, by default. From 2a887b9772180e6d0a731f966dc57572c73f25bd Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 15 Feb 2021 21:56:51 +0000 Subject: [PATCH 06/22] Add missing format factory settings --- .../table-engines/integrations/rabbitmq.md | 9 +++- .../table-engines/integrations/rabbitmq.md | 9 +++- src/Storages/RabbitMQ/RabbitMQSettings.h | 7 ++- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 9 ++++ .../integration/test_storage_rabbitmq/test.py | 53 +++++++++++++++++++ 5 files changed, 82 insertions(+), 5 deletions(-) diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md index b0901ee6f6e..dbae6b62257 100644 --- a/docs/en/engines/table-engines/integrations/rabbitmq.md +++ b/docs/en/engines/table-engines/integrations/rabbitmq.md @@ -59,6 +59,8 @@ Optional parameters: - `rabbitmq_max_block_size` - `rabbitmq_flush_interval_ms` +Also FormatFactory settings can be added along with rabbitmq-related settings. + Required configuration: The RabbitMQ server configuration should be added using the ClickHouse config file. @@ -75,11 +77,13 @@ Example: ``` sql CREATE TABLE queue ( key UInt64, - value UInt64 + value UInt64, + date DateTime ) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'localhost:5672', rabbitmq_exchange_name = 'exchange1', rabbitmq_format = 'JSONEachRow', - rabbitmq_num_consumers = 5; + rabbitmq_num_consumers = 5, + date_time_input_format = 'best_effort'; ``` ## Description {#description} @@ -105,6 +109,7 @@ Exchange type options: - `consistent_hash` - Data is evenly distributed between all bound tables (where the exchange name is the same). Note that this exchange type must be enabled with RabbitMQ plugin: `rabbitmq-plugins enable rabbitmq_consistent_hash_exchange`. Setting `rabbitmq_queue_base` may be used for the following cases: + - to let different tables share queues, so that multiple consumers could be registered for the same queues, which makes a better performance. If using `rabbitmq_num_consumers` and/or `rabbitmq_num_queues` settings, the exact match of queues is achieved in case these parameters are the same. - to be able to restore reading from certain durable queues when not all messages were successfully consumed. To resume consumption from one specific queue - set its name in `rabbitmq_queue_base` setting and do not specify `rabbitmq_num_consumers` and `rabbitmq_num_queues` (defaults to 1). To resume consumption from all queues, which were declared for a specific table - just specify the same settings: `rabbitmq_queue_base`, `rabbitmq_num_consumers`, `rabbitmq_num_queues`. By default, queue names will be unique to tables. - to reuse queues as they are declared durable and not auto-deleted. (Can be deleted via any of RabbitMQ CLI tools.) diff --git a/docs/ru/engines/table-engines/integrations/rabbitmq.md b/docs/ru/engines/table-engines/integrations/rabbitmq.md index dedb5842d68..bc2eda746cf 100644 --- a/docs/ru/engines/table-engines/integrations/rabbitmq.md +++ b/docs/ru/engines/table-engines/integrations/rabbitmq.md @@ -52,6 +52,8 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] - `rabbitmq_max_block_size` - `rabbitmq_flush_interval_ms` +Настройки FormatFactory также могут быть добавлены в списке RabbitMQ настроек. + Требуемая конфигурация: Конфигурация сервера RabbitMQ добавляется с помощью конфигурационного файла ClickHouse. @@ -68,11 +70,13 @@ Example: ``` sql CREATE TABLE queue ( key UInt64, - value UInt64 + value UInt64, + date DateTime ) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'localhost:5672', rabbitmq_exchange_name = 'exchange1', rabbitmq_format = 'JSONEachRow', - rabbitmq_num_consumers = 5; + rabbitmq_num_consumers = 5, + date_time_input_format = 'best_effort'; ``` ## Описание {#description} @@ -98,6 +102,7 @@ Example: - `consistent_hash` - данные равномерно распределяются между всеми связанными таблицами, где имя точки обмена совпадает. Обратите внимание, что этот тип обмена должен быть включен с помощью плагина RabbitMQ: `rabbitmq-plugins enable rabbitmq_consistent_hash_exchange`. Настройка `rabbitmq_queue_base` может быть использована в следующих случаях: + 1. чтобы восстановить чтение из ранее созданных очередей, если оно прекратилось по какой-либо причине, но очереди остались непустыми. Для восстановления чтения из одной конкретной очереди, нужно написать ее имя в `rabbitmq_queue_base` настройку и не указывать настройки `rabbitmq_num_consumers` и `rabbitmq_num_queues`. Чтобы восстановить чтение из всех очередей, которые были созданы для конкретной таблицы, необходимо совпадение следующих настроек: `rabbitmq_queue_base`, `rabbitmq_num_consumers`, `rabbitmq_num_queues`. По умолчанию, если настройка `rabbitmq_queue_base` не указана, будут использованы уникальные для каждой таблицы имена очередей. 2. чтобы объявить одни и те же очереди для разных таблиц, что позволяет создавать несколько параллельных подписчиков на каждую из очередей. То есть обеспечивается лучшая производительность. В данном случае, для таких таблиц также необходимо совпадение настроек: `rabbitmq_num_consumers`, `rabbitmq_num_queues`. 3. чтобы повторно использовать созданные c `durable` настройкой очереди, так как они не удаляются автоматически (но могут быть удалены с помощью любого RabbitMQ CLI). diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.h b/src/Storages/RabbitMQ/RabbitMQSettings.h index 2f8d6adfa16..66348d61424 100644 --- a/src/Storages/RabbitMQ/RabbitMQSettings.h +++ b/src/Storages/RabbitMQ/RabbitMQSettings.h @@ -1,13 +1,14 @@ #pragma once #include +#include namespace DB { class ASTStorage; -#define LIST_OF_RABBITMQ_SETTINGS(M) \ +#define RABBITMQ_RELATED_SETTINGS(M) \ M(String, rabbitmq_host_port, "", "A host-port to connect to RabbitMQ server.", 0) \ M(String, rabbitmq_exchange_name, "clickhouse-exchange", "The exchange name, to which messages are sent.", 0) \ M(String, rabbitmq_format, "", "The message format.", 0) \ @@ -24,6 +25,10 @@ namespace DB M(UInt64, rabbitmq_max_block_size, 0, "Number of row collected before flushing data from RabbitMQ.", 0) \ M(Milliseconds, rabbitmq_flush_interval_ms, 0, "Timeout for flushing data from RabbitMQ.", 0) \ +#define LIST_OF_RABBITMQ_SETTINGS(M) \ + RABBITMQ_RELATED_SETTINGS(M) \ + FORMAT_FACTORY_SETTINGS(M) + DECLARE_SETTINGS_TRAITS(RabbitMQSettingsTraits, LIST_OF_RABBITMQ_SETTINGS) struct RabbitMQSettings : public BaseSettings diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 3ee9dda2bf3..edce1a4b658 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -199,6 +199,15 @@ std::shared_ptr StorageRabbitMQ::addSettings(const Context & context) c if (!schema_name.empty()) modified_context->setSetting("format_schema", schema_name); + for (const auto & setting : *rabbitmq_settings) + { + const auto & setting_name = setting.getName(); + + /// check for non-rabbitmq-related settings + if (!setting_name.starts_with("rabbitmq_")) + modified_context->setSetting(setting_name, setting.getValue()); + } + return modified_context; } diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 911f6d144f9..ca89ebdea0a 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -1912,6 +1912,59 @@ def test_rabbitmq_no_connection_at_startup(rabbitmq_cluster): assert int(result) == messages_num, 'ClickHouse lost some messages: {}'.format(result) +@pytest.mark.timeout(120) +def test_rabbitmq_format_factory_settings(rabbitmq_cluster): + instance.query(''' + CREATE TABLE test.format_settings ( + id String, date DateTime + ) ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_exchange_name = 'format_settings', + rabbitmq_format = 'JSONEachRow', + date_time_input_format = 'best_effort'; + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + + message = json.dumps({"id":"format_settings_test","date":"2021-01-19T14:42:33.1829214Z"}) + expected = instance.query('''SELECT parseDateTimeBestEffort(CAST('2021-01-19T14:42:33.1829214Z', 'String'))''') + + channel.basic_publish(exchange='format_settings', routing_key='', body=message) + result = '' + while True: + result = instance.query('SELECT date FROM test.format_settings') + if result == expected: + break; + + instance.query(''' + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + CREATE TABLE test.view ( + id String, date DateTime + ) ENGINE = MergeTree ORDER BY id; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.format_settings; + ''') + + channel.basic_publish(exchange='format_settings', routing_key='', body=message) + result = '' + while True: + result = instance.query('SELECT date FROM test.view') + if result == expected: + break; + + connection.close() + instance.query(''' + DROP TABLE test.consumer; + DROP TABLE test.format_settings; + ''') + + assert(result == expected) + + if __name__ == '__main__': cluster.start() input("Cluster created, press any key to destroy...") From d9f66d8d30b35058b9d2fc0fa070ad4c3c1a5cd5 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 15 Feb 2021 23:25:19 +0000 Subject: [PATCH 07/22] Better doc --- docs/en/engines/table-engines/integrations/rabbitmq.md | 2 +- docs/ru/engines/table-engines/integrations/rabbitmq.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md index dbae6b62257..946f70f903d 100644 --- a/docs/en/engines/table-engines/integrations/rabbitmq.md +++ b/docs/en/engines/table-engines/integrations/rabbitmq.md @@ -59,7 +59,7 @@ Optional parameters: - `rabbitmq_max_block_size` - `rabbitmq_flush_interval_ms` -Also FormatFactory settings can be added along with rabbitmq-related settings. +Also format settings can be added along with rabbitmq-related settings. Required configuration: diff --git a/docs/ru/engines/table-engines/integrations/rabbitmq.md b/docs/ru/engines/table-engines/integrations/rabbitmq.md index bc2eda746cf..173beecb6e7 100644 --- a/docs/ru/engines/table-engines/integrations/rabbitmq.md +++ b/docs/ru/engines/table-engines/integrations/rabbitmq.md @@ -52,7 +52,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] - `rabbitmq_max_block_size` - `rabbitmq_flush_interval_ms` -Настройки FormatFactory также могут быть добавлены в списке RabbitMQ настроек. +Настройки форматов данных также могут быть добавлены в списке RabbitMQ настроек. Требуемая конфигурация: From 5f88f5817f4a348051e7aeaa93b8bdb589b8805a Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 17 Feb 2021 11:23:24 +0300 Subject: [PATCH 08/22] Rename untyped function reinterpretAs into reinterpret --- src/Functions/reinterpretAs.cpp | 50 +++++++++---------- .../01676_reinterpret_as.reference | 6 +-- .../0_stateless/01676_reinterpret_as.sql | 42 ++++++++-------- 3 files changed, 49 insertions(+), 49 deletions(-) diff --git a/src/Functions/reinterpretAs.cpp b/src/Functions/reinterpretAs.cpp index 363455cb38f..1d105f4ce38 100644 --- a/src/Functions/reinterpretAs.cpp +++ b/src/Functions/reinterpretAs.cpp @@ -39,12 +39,12 @@ namespace * 3. Types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into FixedString, * String, and types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID). */ -class FunctionReinterpretAs : public IFunction +class FunctionReinterpret : public IFunction { public: - static constexpr auto name = "reinterpretAs"; + static constexpr auto name = "reinterpret"; - static FunctionPtr create(const Context &) { return std::make_shared(); } + static FunctionPtr create(const Context &) { return std::make_shared(); } String getName() const override { return name; } @@ -308,11 +308,11 @@ private: }; template -class FunctionReinterpretAsTyped : public IFunction +class FunctionReinterpretAs : public IFunction { public: static constexpr auto name = Name::name; - static FunctionPtr create(const Context &) { return std::make_shared(); } + static FunctionPtr create(const Context &) { return std::make_shared(); } String getName() const override { return name; } @@ -365,7 +365,7 @@ public: return impl.executeImpl(arguments_with_type, return_type, input_rows_count); } - FunctionReinterpretAs impl; + FunctionReinterpret impl; }; struct NameReinterpretAsUInt8 { static constexpr auto name = "reinterpretAsUInt8"; }; @@ -387,26 +387,26 @@ struct NameReinterpretAsUUID { static constexpr auto name = "reinterpretA struct NameReinterpretAsString { static constexpr auto name = "reinterpretAsString"; }; struct NameReinterpretAsFixedString { static constexpr auto name = "reinterpretAsFixedString"; }; -using FunctionReinterpretAsUInt8 = FunctionReinterpretAsTyped; -using FunctionReinterpretAsUInt16 = FunctionReinterpretAsTyped; -using FunctionReinterpretAsUInt32 = FunctionReinterpretAsTyped; -using FunctionReinterpretAsUInt64 = FunctionReinterpretAsTyped; -using FunctionReinterpretAsUInt256 = FunctionReinterpretAsTyped; -using FunctionReinterpretAsInt8 = FunctionReinterpretAsTyped; -using FunctionReinterpretAsInt16 = FunctionReinterpretAsTyped; -using FunctionReinterpretAsInt32 = FunctionReinterpretAsTyped; -using FunctionReinterpretAsInt64 = FunctionReinterpretAsTyped; -using FunctionReinterpretAsInt128 = FunctionReinterpretAsTyped; -using FunctionReinterpretAsInt256 = FunctionReinterpretAsTyped; -using FunctionReinterpretAsFloat32 = FunctionReinterpretAsTyped; -using FunctionReinterpretAsFloat64 = FunctionReinterpretAsTyped; -using FunctionReinterpretAsDate = FunctionReinterpretAsTyped; -using FunctionReinterpretAsDateTime = FunctionReinterpretAsTyped; -using FunctionReinterpretAsUUID = FunctionReinterpretAsTyped; +using FunctionReinterpretAsUInt8 = FunctionReinterpretAs; +using FunctionReinterpretAsUInt16 = FunctionReinterpretAs; +using FunctionReinterpretAsUInt32 = FunctionReinterpretAs; +using FunctionReinterpretAsUInt64 = FunctionReinterpretAs; +using FunctionReinterpretAsUInt256 = FunctionReinterpretAs; +using FunctionReinterpretAsInt8 = FunctionReinterpretAs; +using FunctionReinterpretAsInt16 = FunctionReinterpretAs; +using FunctionReinterpretAsInt32 = FunctionReinterpretAs; +using FunctionReinterpretAsInt64 = FunctionReinterpretAs; +using FunctionReinterpretAsInt128 = FunctionReinterpretAs; +using FunctionReinterpretAsInt256 = FunctionReinterpretAs; +using FunctionReinterpretAsFloat32 = FunctionReinterpretAs; +using FunctionReinterpretAsFloat64 = FunctionReinterpretAs; +using FunctionReinterpretAsDate = FunctionReinterpretAs; +using FunctionReinterpretAsDateTime = FunctionReinterpretAs; +using FunctionReinterpretAsUUID = FunctionReinterpretAs; -using FunctionReinterpretAsString = FunctionReinterpretAsTyped; +using FunctionReinterpretAsString = FunctionReinterpretAs; -using FunctionReinterpretAsFixedString = FunctionReinterpretAsTyped; +using FunctionReinterpretAsFixedString = FunctionReinterpretAs; } @@ -433,7 +433,7 @@ void registerFunctionsReinterpretAs(FunctionFactory & factory) factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction(); } } diff --git a/tests/queries/0_stateless/01676_reinterpret_as.reference b/tests/queries/0_stateless/01676_reinterpret_as.reference index bbde2d5ed57..f7ca2bbedfa 100644 --- a/tests/queries/0_stateless/01676_reinterpret_as.reference +++ b/tests/queries/0_stateless/01676_reinterpret_as.reference @@ -25,6 +25,6 @@ Integer and Float types 0.2 1045220557 0.2 4596373779694328218 Integer and String types -1 49 -1 49 -11 12593 +1 1 49 +1 1 49 +11 11 12593 diff --git a/tests/queries/0_stateless/01676_reinterpret_as.sql b/tests/queries/0_stateless/01676_reinterpret_as.sql index 88dc6437043..cc5dba1e110 100644 --- a/tests/queries/0_stateless/01676_reinterpret_as.sql +++ b/tests/queries/0_stateless/01676_reinterpret_as.sql @@ -1,30 +1,30 @@ SELECT 'Into String'; -SELECT reinterpretAs(49, 'String'); +SELECT reinterpret(49, 'String'); SELECT 'Into FixedString'; -SELECT reinterpretAs(49, 'FixedString(1)'); -SELECT reinterpretAs(49, 'FixedString(2)'); -SELECT reinterpretAs(49, 'FixedString(3)'); -SELECT reinterpretAs(49, 'FixedString(4)'); +SELECT reinterpret(49, 'FixedString(1)'); +SELECT reinterpret(49, 'FixedString(2)'); +SELECT reinterpret(49, 'FixedString(3)'); +SELECT reinterpret(49, 'FixedString(4)'); SELECT reinterpretAsFixedString(49); SELECT 'Into Numeric Representable'; SELECT 'Integer and Integer types'; -SELECT reinterpretAs(257, 'UInt8'), reinterpretAsUInt8(257); -SELECT reinterpretAs(257, 'Int8'), reinterpretAsInt8(257); -SELECT reinterpretAs(257, 'UInt16'), reinterpretAsUInt16(257); -SELECT reinterpretAs(257, 'Int16'), reinterpretAsInt16(257); -SELECT reinterpretAs(257, 'UInt32'), reinterpretAsUInt32(257); -SELECT reinterpretAs(257, 'Int32'), reinterpretAsInt32(257); -SELECT reinterpretAs(257, 'UInt64'), reinterpretAsUInt64(257); -SELECT reinterpretAs(257, 'Int64'), reinterpretAsInt64(257); -SELECT reinterpretAs(257, 'Int128'), reinterpretAsInt128(257); -SELECT reinterpretAs(257, 'UInt256'), reinterpretAsUInt256(257); -SELECT reinterpretAs(257, 'Int256'), reinterpretAsInt256(257); +SELECT reinterpret(257, 'UInt8'), reinterpretAsUInt8(257); +SELECT reinterpret(257, 'Int8'), reinterpretAsInt8(257); +SELECT reinterpret(257, 'UInt16'), reinterpretAsUInt16(257); +SELECT reinterpret(257, 'Int16'), reinterpretAsInt16(257); +SELECT reinterpret(257, 'UInt32'), reinterpretAsUInt32(257); +SELECT reinterpret(257, 'Int32'), reinterpretAsInt32(257); +SELECT reinterpret(257, 'UInt64'), reinterpretAsUInt64(257); +SELECT reinterpret(257, 'Int64'), reinterpretAsInt64(257); +SELECT reinterpret(257, 'Int128'), reinterpretAsInt128(257); +SELECT reinterpret(257, 'UInt256'), reinterpretAsUInt256(257); +SELECT reinterpret(257, 'Int256'), reinterpretAsInt256(257); SELECT 'Integer and Float types'; -SELECT reinterpretAs(toFloat32(0.2), 'UInt32'), reinterpretAsUInt32(toFloat32(0.2)); -SELECT reinterpretAs(toFloat64(0.2), 'UInt64'), reinterpretAsUInt64(toFloat64(0.2)); +SELECT reinterpret(toFloat32(0.2), 'UInt32'), reinterpretAsUInt32(toFloat32(0.2)); +SELECT reinterpret(toFloat64(0.2), 'UInt64'), reinterpretAsUInt64(toFloat64(0.2)); SELECT reinterpretAsFloat32(a), reinterpretAsUInt32(toFloat32(0.2)) as a; SELECT reinterpretAsFloat64(a), reinterpretAsUInt64(toFloat64(0.2)) as a; SELECT 'Integer and String types'; -SELECT reinterpretAsString(a), reinterpretAsUInt8('1') as a; -SELECT reinterpretAsString(a), reinterpretAsUInt8('11') as a; -SELECT reinterpretAsString(a), reinterpretAsUInt16('11') as a; +SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt8('1') as a; +SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt8('11') as a; +SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt16('11') as a; From e52cc1ac1fe7b3c937cc16d75dbcf623fca86c2c Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 17 Feb 2021 11:31:20 +0300 Subject: [PATCH 09/22] Updated documentation --- .../functions/type-conversion-functions.md | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 3ca36f41c78..6bc274eba73 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -303,7 +303,7 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut └────────────┴───────┘ ``` -## reinterpretAs(x, T) {#type_conversion_function-cast} +## reinterpret(x, T) {#type_conversion_function-reinterpret} Performs byte reinterpretation of ‘x’ as ‘t’ data type. @@ -313,9 +313,9 @@ Following reinterpretations are allowed: 3. FixedString, String, types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into FixedString, ``` sql -SELECT reinterpretAs(toInt8(-1), 'UInt8') as int_to_uint, - reinterpretAs(toInt8(1), 'Float32') as int_to_float, - reinterpretAs('1', 'UInt32') as string_to_int; +SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint, + reinterpret(toInt8(1), 'Float32') as int_to_float, + reinterpret('1', 'UInt32') as string_to_int; ``` ``` text @@ -324,23 +324,23 @@ SELECT reinterpretAs(toInt8(-1), 'UInt8') as int_to_uint, └─────────────┴──────────────┴───────────────┘ ``` -## reinterpretAsUInt(8\|16\|32\|64\|256) {#reinterpretasuint8163264256} +## reinterpretAsUInt(8\|16\|32\|64\|256) {#type_conversion_function-reinterpretAsUInt8163264256} -## reinterpretAsInt(8\|16\|32\|64\|128\|256) {#reinterpretasint8163264128256} +## reinterpretAsInt(8\|16\|32\|64\|128\|256) {#type_conversion_function-reinterpretAsInt8163264128256} -## reinterpretAsFloat(32\|64) {#reinterpretasfloat3264} +## reinterpretAsFloat(32\|64) {##type_conversion_function-reinterpretAsFloat} -## reinterpretAsDate {#reinterpretasdate} +## reinterpretAsDate {#type_conversion_function-reinterpretAsDate} -## reinterpretAsDateTime {#reinterpretasdatetime} +## reinterpretAsDateTime {#type_conversion_function-reinterpretAsDateTime} -## reinterpretAsString {#type_conversion_functions-reinterpretAsString} +## reinterpretAsString {#type_conversion_function-reinterpretAsString} -## reinterpretAsFixedString {#reinterpretasfixedstring} +## reinterpretAsFixedString {#type_conversion_function-reinterpretAsFixedString} -## reinterpretAsUUID {#reinterpretasuuid} +## reinterpretAsUUID {#type_conversion_function-reinterpretAsUUID} -These functions are aliases for `reinterpretAs`function. +These functions are aliases for `reinterpret` function. ## CAST(x, T) {#type_conversion_function-cast} @@ -401,7 +401,7 @@ bounds of type T. Example ``` sql -SELECT cast(-1, 'UInt8') as uint8; +SELECT cast(-1, 'UInt8') as uint8; ``` @@ -422,7 +422,7 @@ Code: 70. DB::Exception: Received from localhost:9000. DB::Exception: Value in c ## accurateCastOrNull(x, T) {#type_conversion_function-accurate-cast_or_null} -Converts ‘x’ to the ‘t’ data type. Always returns nullable type and returns NULL +Converts ‘x’ to the ‘t’ data type. Always returns nullable type and returns NULL if the casted value is not representable in the target type. Example: @@ -817,9 +817,9 @@ SELECT fromUnixTimestamp64Milli(i64, 'UTC') ## formatRow {#formatrow} -Converts arbitrary expressions into a string via given format. +Converts arbitrary expressions into a string via given format. -**Syntax** +**Syntax** ``` sql formatRow(format, x, y, ...) @@ -860,7 +860,7 @@ Result: Converts arbitrary expressions into a string via given format. The function trims the last `\n` if any. -**Syntax** +**Syntax** ``` sql formatRowNoNewline(format, x, y, ...) From c608fa1e6a3539f74e8956e441e4f68b99367982 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 17 Feb 2021 12:53:12 +0300 Subject: [PATCH 10/22] Added error reinterpretation tests --- src/Functions/reinterpretAs.cpp | 4 ++++ tests/queries/0_stateless/01676_reinterpret_as.reference | 1 + tests/queries/0_stateless/01676_reinterpret_as.sql | 4 ++++ 3 files changed, 9 insertions(+) diff --git a/src/Functions/reinterpretAs.cpp b/src/Functions/reinterpretAs.cpp index 1d105f4ce38..c15ba969fdb 100644 --- a/src/Functions/reinterpretAs.cpp +++ b/src/Functions/reinterpretAs.cpp @@ -93,6 +93,10 @@ public: + " because only Numeric, String or FixedString can be reinterpreted in Numeric", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } + else + throw Exception("Cannot reinterpret " + from_type->getName() + " as " + to_type->getName() + + " because only reinterpretation in String, FixedString and Numeric types is supported", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); return to_type; } diff --git a/tests/queries/0_stateless/01676_reinterpret_as.reference b/tests/queries/0_stateless/01676_reinterpret_as.reference index f7ca2bbedfa..b39deb55a7f 100644 --- a/tests/queries/0_stateless/01676_reinterpret_as.reference +++ b/tests/queries/0_stateless/01676_reinterpret_as.reference @@ -28,3 +28,4 @@ Integer and String types 1 1 49 1 1 49 11 11 12593 +ReinterpretErrors diff --git a/tests/queries/0_stateless/01676_reinterpret_as.sql b/tests/queries/0_stateless/01676_reinterpret_as.sql index cc5dba1e110..ff727f284bb 100644 --- a/tests/queries/0_stateless/01676_reinterpret_as.sql +++ b/tests/queries/0_stateless/01676_reinterpret_as.sql @@ -28,3 +28,7 @@ SELECT 'Integer and String types'; SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt8('1') as a; SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt8('11') as a; SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt16('11') as a; +SELECT 'ReinterpretErrors'; +SELECT reinterpret(toDecimal64(1, 2), 'UInt8'); -- {serverError 43} +SELECT reinterpret('123', 'FixedString(1)'); -- {serverError 43} +SELECT reinterpret(toDateTime('9922337203.6854775808', 1), 'Decimal64(1)'); -- {serverError 43} From f483cd091a5dbc71c7e507ab87d0d6fad307eb39 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 14 Feb 2021 23:31:58 +0300 Subject: [PATCH 11/22] test/stress: use clickhouse builtin start/stop to run server from the same user This will allow to attach with gdb for better diagnosis. --- docker/test/stress/run.sh | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 88a633ac488..44612a83504 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -10,14 +10,7 @@ dpkg -i package_folder/clickhouse-test_*.deb function stop() { - timeout 120 service clickhouse-server stop - - # Wait for process to disappear from processlist and also try to kill zombies. - while kill -9 "$(pidof clickhouse-server)" - do - echo "Killed clickhouse-server" - sleep 0.5 - done + clickhouse stop } function start() @@ -33,7 +26,8 @@ function start() tail -n1000 /var/log/clickhouse-server/clickhouse-server.log break fi - timeout 120 service clickhouse-server start + # use root to match with current uid + clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>/var/log/clickhouse-server/stderr.log sleep 0.5 counter=$((counter + 1)) done From 63eff6e8c812a8770fc54fa987c68e7fb681abe0 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 13 Feb 2021 11:41:00 +0300 Subject: [PATCH 12/22] test/stress: improve backtrace catching on server failures Otherwise sometimes stracktraces may be lost [1]: [1]: https://clickhouse-test-reports.s3.yandex.net/19580/6aecb62416ece880cbb8ee3a803e14d841388dde/stress_test_(thread).html#fail1 --- docker/test/stress/run.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 44612a83504..60e9ffd265c 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -31,6 +31,18 @@ function start() sleep 0.5 counter=$((counter + 1)) done + + echo " +handle all noprint +handle SIGSEGV stop print +handle SIGBUS stop print +handle SIGABRT stop print +continue +thread apply all backtrace +continue +" > script.gdb + + gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" & } # install test configs From 770c3406df6d55541dcb59b9146206b2558cbe86 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 15 Feb 2021 21:02:21 +0300 Subject: [PATCH 13/22] test/stress: fix permissions for clickhouse directories --- docker/test/stress/run.sh | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 60e9ffd265c..dc1e4db4477 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -8,6 +8,20 @@ dpkg -i package_folder/clickhouse-server_*.deb dpkg -i package_folder/clickhouse-client_*.deb dpkg -i package_folder/clickhouse-test_*.deb +function configure() +{ + # install test configs + /usr/share/clickhouse-test/config/install.sh + + # for clickhouse-server (via service) + echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment + # for clickhouse-client + export ASAN_OPTIONS='malloc_context_size=10 allocator_release_to_os_interval_ms=10000' + + # since we run clickhouse from root + sudo chown root: /var/lib/clickhouse +} + function stop() { clickhouse stop @@ -45,13 +59,7 @@ continue gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" & } -# install test configs -/usr/share/clickhouse-test/config/install.sh - -# for clickhouse-server (via service) -echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment -# for clickhouse-client -export ASAN_OPTIONS='malloc_context_size=10 allocator_release_to_os_interval_ms=10000' +configure start From dcba99f4b1d3c1ed8b4838d00458271cfb2be8d4 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 18 Feb 2021 02:19:58 +0300 Subject: [PATCH 14/22] fix usage of 'distinct' combinator with 'state' combinator --- src/AggregateFunctions/AggregateFunctionDistinct.h | 5 +++++ .../01259_combinator_distinct_distributed.reference | 4 ++++ .../01259_combinator_distinct_distributed.sql | 9 +++++++++ 3 files changed, 18 insertions(+) diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index b481e2a28e7..b587bbebf6e 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -236,6 +236,11 @@ public: return true; } + bool isState() const override + { + return nested_func->isState(); + } + AggregateFunctionPtr getNestedFunction() const override { return nested_func; } }; diff --git a/tests/queries/0_stateless/01259_combinator_distinct_distributed.reference b/tests/queries/0_stateless/01259_combinator_distinct_distributed.reference index 096d5703292..72a41ac1d84 100644 --- a/tests/queries/0_stateless/01259_combinator_distinct_distributed.reference +++ b/tests/queries/0_stateless/01259_combinator_distinct_distributed.reference @@ -2,3 +2,7 @@ [0,1,2,3,4,5,6,7,8,9,10,11,12] 20 0.49237 +78 +[0,1,2,3,4,5,6,7,8,9,10,11,12] +20 +0.49237 diff --git a/tests/queries/0_stateless/01259_combinator_distinct_distributed.sql b/tests/queries/0_stateless/01259_combinator_distinct_distributed.sql index f851e64dbcb..f95d2d87b8e 100644 --- a/tests/queries/0_stateless/01259_combinator_distinct_distributed.sql +++ b/tests/queries/0_stateless/01259_combinator_distinct_distributed.sql @@ -1,3 +1,12 @@ +SET distributed_aggregation_memory_efficient = 1; + +SELECT sum(DISTINCT number % 13) FROM remote('127.0.0.{1,2}', numbers_mt(100000)); +SELECT arraySort(groupArray(DISTINCT number % 13)) FROM remote('127.0.0.{1,2}', numbers_mt(100000)); +SELECT finalizeAggregation(countState(DISTINCT toString(number % 20))) FROM remote('127.0.0.{1,2}', numbers_mt(100000)); +SELECT round(corrStable(DISTINCT x, y), 5) FROM (SELECT number % 10 AS x, number % 5 AS y FROM remote('127.0.0.{1,2}', numbers(1000))); + +SET distributed_aggregation_memory_efficient = 0; + SELECT sum(DISTINCT number % 13) FROM remote('127.0.0.{1,2}', numbers_mt(100000)); SELECT arraySort(groupArray(DISTINCT number % 13)) FROM remote('127.0.0.{1,2}', numbers_mt(100000)); SELECT finalizeAggregation(countState(DISTINCT toString(number % 20))) FROM remote('127.0.0.{1,2}', numbers_mt(100000)); From 4278098f9a243c740961248ad2232e425bd567d9 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 18 Feb 2021 13:09:01 +0300 Subject: [PATCH 15/22] Reinterpret function added Decimal, DateTim64 support --- .../functions/type-conversion-functions.md | 10 ++- src/Functions/reinterpretAs.cpp | 65 ++++++++++++++----- .../01676_reinterpret_as.reference | 10 +++ .../0_stateless/01676_reinterpret_as.sql | 12 +++- 4 files changed, 76 insertions(+), 21 deletions(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 6bc274eba73..0cfeb282bb3 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -324,16 +324,20 @@ SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint, └─────────────┴──────────────┴───────────────┘ ``` -## reinterpretAsUInt(8\|16\|32\|64\|256) {#type_conversion_function-reinterpretAsUInt8163264256} +## reinterpretAsUInt(8\|16\|32\|64\|256) {#reinterpretAsUInt8163264256} -## reinterpretAsInt(8\|16\|32\|64\|128\|256) {#type_conversion_function-reinterpretAsInt8163264128256} +## reinterpretAsInt(8\|16\|32\|64\|128\|256) {#reinterpretAsInt8163264128256} -## reinterpretAsFloat(32\|64) {##type_conversion_function-reinterpretAsFloat} +## reinterpretAsDecimal(32\|64\|128\|256) {#reinterpretAsDecimal3264128256} + +## reinterpretAsFloat(32\|64) {#type_conversion_function-reinterpretAsFloat} ## reinterpretAsDate {#type_conversion_function-reinterpretAsDate} ## reinterpretAsDateTime {#type_conversion_function-reinterpretAsDateTime} +## reinterpretAsDateTime64 {#type_conversion_function-reinterpretAsDateTime64} + ## reinterpretAsString {#type_conversion_function-reinterpretAsString} ## reinterpretAsFixedString {#type_conversion_function-reinterpretAsFixedString} diff --git a/src/Functions/reinterpretAs.cpp b/src/Functions/reinterpretAs.cpp index c15ba969fdb..3f4ba3d23e1 100644 --- a/src/Functions/reinterpretAs.cpp +++ b/src/Functions/reinterpretAs.cpp @@ -11,10 +11,13 @@ #include #include #include +#include +#include #include #include #include #include +#include #include #include @@ -158,7 +161,7 @@ public: { const auto * col_from = assert_cast(arguments[0].column.get()); - auto col_res = ToColumnType::create(); + auto col_res = numericColumnCreateHelper(static_cast(*result_type.get())); const ColumnString::Chars & data_from = col_from->getChars(); const ColumnString::Offsets & offsets_from = col_from->getOffsets(); @@ -185,7 +188,7 @@ public: { const auto * col_from_fixed = assert_cast(arguments[0].column.get()); - auto col_res = ToColumnType::create(); + auto col_res = numericColumnCreateHelper(static_cast(*result_type.get())); const ColumnString::Chars & data_from = col_from_fixed->getChars(); size_t step = col_from_fixed->getN(); @@ -209,12 +212,27 @@ public: } else if constexpr (CanBeReinterpretedAsNumeric) { - using FromTypeFieldType = typename FromType::FieldType; - const auto * col = assert_cast*>(arguments[0].column.get()); + using From = typename FromType::FieldType; + using To = typename ToType::FieldType; - auto col_res = ToColumnType::create(); - reinterpretImpl(col->getData(), col_res->getData()); - result = std::move(col_res); + using FromColumnType = std::conditional_t, ColumnDecimal, ColumnVector>; + + const auto * column_from = assert_cast(arguments[0].column.get()); + + auto column_to = numericColumnCreateHelper(static_cast(*result_type.get())); + + auto & from = column_from->getData(); + auto & to = column_to->getData(); + + size_t size = from.size(); + to.resize_fill(size); + + static constexpr size_t copy_size = std::min(sizeof(From), sizeof(To)); + + for (size_t i = 0; i < size; ++i) + memcpy(static_cast(&to[i]), static_cast(&from[i]), copy_size); + + result = std::move(column_to); return true; } @@ -232,7 +250,7 @@ public: private: template static constexpr auto CanBeReinterpretedAsNumeric = - IsDataTypeNumber || + IsDataTypeDecimalOrNumber || std::is_same_v || std::is_same_v || std::is_same_v; @@ -243,7 +261,8 @@ private: type.isInt() || type.isDateOrDateTime() || type.isFloat() || - type.isUUID(); + type.isUUID() || + type.isDecimal(); } static void NO_INLINE executeToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n) @@ -296,18 +315,32 @@ private: } } - template - static void reinterpretImpl(const PaddedPODArray & from, PaddedPODArray & to) + template + static typename Type::ColumnType::MutablePtr numericColumnCreateHelper(const Type & type) { + size_t column_size = 0; + + using ColumnType = typename Type::ColumnType; + + if constexpr (IsDataTypeDecimal) + return ColumnType::create(column_size, type.getScale()); + else + return ColumnType::create(column_size); + } + + template + static void reinterpretImpl(const FromContainer & from, ToContainer & to) + { + using From = typename FromContainer::value_type; + using To = typename ToContainer::value_type; + size_t size = from.size(); to.resize_fill(size); + static constexpr size_t copy_size = std::min(sizeof(From), sizeof(To)); + for (size_t i = 0; i < size; ++i) - { - memcpy(static_cast(&to[i]), - static_cast(&from[i]), - std::min(sizeof(From), sizeof(To))); - } + memcpy(static_cast(&to[i]), static_cast(&from[i]), copy_size); } }; diff --git a/tests/queries/0_stateless/01676_reinterpret_as.reference b/tests/queries/0_stateless/01676_reinterpret_as.reference index b39deb55a7f..459ca166dc1 100644 --- a/tests/queries/0_stateless/01676_reinterpret_as.reference +++ b/tests/queries/0_stateless/01676_reinterpret_as.reference @@ -28,4 +28,14 @@ Integer and String types 1 1 49 1 1 49 11 11 12593 +Dates +1970-01-01 1970-01-01 +1970-01-01 03:00:00 1970-01-01 03:00:00 +1970-01-01 03:00:00.000 1970-01-01 03:00:00.000 +Decimals +5.00 0.49 +5.00 0.49 +5.00 0.49 +5.00 0.49 +0.00 ReinterpretErrors diff --git a/tests/queries/0_stateless/01676_reinterpret_as.sql b/tests/queries/0_stateless/01676_reinterpret_as.sql index ff727f284bb..5eb94ed0a13 100644 --- a/tests/queries/0_stateless/01676_reinterpret_as.sql +++ b/tests/queries/0_stateless/01676_reinterpret_as.sql @@ -28,7 +28,15 @@ SELECT 'Integer and String types'; SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt8('1') as a; SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt8('11') as a; SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt16('11') as a; +SELECT 'Dates'; +SELECT reinterpret(0, 'Date'), reinterpret('', 'Date'); +SELECT reinterpret(0, 'DateTime'), reinterpret('', 'DateTime'); +SELECT reinterpret(0, 'DateTime64'), reinterpret('', 'DateTime64'); +SELECT 'Decimals'; +SELECT reinterpret(toDecimal32(5, 2), 'Decimal32(2)'), reinterpret('1', 'Decimal32(2)'); +SELECT reinterpret(toDecimal64(5, 2), 'Decimal64(2)'), reinterpret('1', 'Decimal64(2)');; +SELECT reinterpret(toDecimal128(5, 2), 'Decimal128(2)'), reinterpret('1', 'Decimal128(2)'); +SELECT reinterpret(toDecimal256(5, 2), 'Decimal256(2)'), reinterpret('1', 'Decimal256(2)'); +SELECT reinterpret(toDateTime64(0, 0), 'Decimal64(2)'); SELECT 'ReinterpretErrors'; -SELECT reinterpret(toDecimal64(1, 2), 'UInt8'); -- {serverError 43} SELECT reinterpret('123', 'FixedString(1)'); -- {serverError 43} -SELECT reinterpret(toDateTime('9922337203.6854775808', 1), 'Decimal64(1)'); -- {serverError 43} From 97f4c457ec979fc489892472dfb50a93062b4ce5 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 18 Feb 2021 16:27:51 +0300 Subject: [PATCH 16/22] fix MySQL COMM_FIELD_LIST response --- docker/test/fasttest/run.sh | 1 + docker/test/stateless/Dockerfile | 3 ++- src/Core/MySQL/PacketsProtocolText.cpp | 22 +++++++++++++--- src/Core/MySQL/PacketsProtocolText.h | 5 +++- src/Server/MySQLHandler.cpp | 2 +- .../01176_mysql_client_interactive.expect | 26 +++++++++++++++++++ .../01176_mysql_client_interactive.reference | 0 tests/queries/shell_config.sh | 13 ++++++++++ 8 files changed, 65 insertions(+), 7 deletions(-) create mode 100755 tests/queries/0_stateless/01176_mysql_client_interactive.expect create mode 100644 tests/queries/0_stateless/01176_mysql_client_interactive.reference diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index e6294b5d74d..7e7c8116901 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -259,6 +259,7 @@ function run_tests 00929_multi_match_edit_distance 01681_hyperscan_debug_assertion + 01176_mysql_client_interactive # requires mysql client 01031_mutations_interpreter_and_context 01053_ssd_dictionary # this test mistakenly requires acces to /var/lib/clickhouse -- can't run this locally, disabled 01083_expressions_in_engine_arguments diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index b063f8d81f6..f2e3016692f 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -23,7 +23,8 @@ RUN apt-get update -y \ telnet \ tree \ unixodbc \ - wget + wget \ + mysql-client-5.7 RUN pip3 install numpy scipy pandas diff --git a/src/Core/MySQL/PacketsProtocolText.cpp b/src/Core/MySQL/PacketsProtocolText.cpp index ad34cd8c28d..62efe549b33 100644 --- a/src/Core/MySQL/PacketsProtocolText.cpp +++ b/src/Core/MySQL/PacketsProtocolText.cpp @@ -62,10 +62,10 @@ ColumnDefinition::ColumnDefinition() ColumnDefinition::ColumnDefinition( String schema_, String table_, String org_table_, String name_, String org_name_, uint16_t character_set_, uint32_t column_length_, - ColumnType column_type_, uint16_t flags_, uint8_t decimals_) + ColumnType column_type_, uint16_t flags_, uint8_t decimals_, bool with_defaults_) : schema(std::move(schema_)), table(std::move(table_)), org_table(std::move(org_table_)), name(std::move(name_)), org_name(std::move(org_name_)), character_set(character_set_), column_length(column_length_), column_type(column_type_), - flags(flags_), decimals(decimals_) + flags(flags_), decimals(decimals_), is_comm_field_list_response(with_defaults_) { } @@ -77,8 +77,15 @@ ColumnDefinition::ColumnDefinition( size_t ColumnDefinition::getPayloadSize() const { - return 12 + getLengthEncodedStringSize("def") + getLengthEncodedStringSize(schema) + getLengthEncodedStringSize(table) + getLengthEncodedStringSize(org_table) + \ - getLengthEncodedStringSize(name) + getLengthEncodedStringSize(org_name) + getLengthEncodedNumberSize(next_length); + return 12 + + getLengthEncodedStringSize("def") + + getLengthEncodedStringSize(schema) + + getLengthEncodedStringSize(table) + + getLengthEncodedStringSize(org_table) + + getLengthEncodedStringSize(name) + + getLengthEncodedStringSize(org_name) + + getLengthEncodedNumberSize(next_length) + + is_comm_field_list_response; } void ColumnDefinition::readPayloadImpl(ReadBuffer & payload) @@ -115,6 +122,13 @@ void ColumnDefinition::writePayloadImpl(WriteBuffer & buffer) const buffer.write(reinterpret_cast(&flags), 2); buffer.write(reinterpret_cast(&decimals), 1); writeChar(0x0, 2, buffer); + if (is_comm_field_list_response) + { + /// We should write length encoded int with string size + /// followed by string with some "default values" (possibly it's column defaults). + /// But we just send NULL for simplicity. + writeChar(0xfb, buffer); + } } ColumnDefinition getColumnDefinition(const String & column_name, const TypeIndex type_index) diff --git a/src/Core/MySQL/PacketsProtocolText.h b/src/Core/MySQL/PacketsProtocolText.h index d449e94cff1..b54b1c5ca19 100644 --- a/src/Core/MySQL/PacketsProtocolText.h +++ b/src/Core/MySQL/PacketsProtocolText.h @@ -101,6 +101,9 @@ public: ColumnType column_type; uint16_t flags; uint8_t decimals = 0x00; + /// https://dev.mysql.com/doc/internals/en/com-query-response.html#column-definition + /// There are extra fields in the packet for column defaults + bool is_comm_field_list_response = false; protected: size_t getPayloadSize() const override; @@ -114,7 +117,7 @@ public: ColumnDefinition( String schema_, String table_, String org_table_, String name_, String org_name_, uint16_t character_set_, uint32_t column_length_, - ColumnType column_type_, uint16_t flags_, uint8_t decimals_); + ColumnType column_type_, uint16_t flags_, uint8_t decimals_, bool with_defaults_ = false); /// Should be used when column metadata (original name, table, original table, database) is unknown. ColumnDefinition( diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index 3cbe285615e..ea2813cf639 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -289,7 +289,7 @@ void MySQLHandler::comFieldList(ReadBuffer & payload) for (const NameAndTypePair & column : metadata_snapshot->getColumns().getAll()) { ColumnDefinition column_definition( - database, packet.table, packet.table, column.name, column.name, CharacterSet::binary, 100, ColumnType::MYSQL_TYPE_STRING, 0, 0 + database, packet.table, packet.table, column.name, column.name, CharacterSet::binary, 100, ColumnType::MYSQL_TYPE_STRING, 0, 0, true ); packet_endpoint->sendPacket(column_definition); } diff --git a/tests/queries/0_stateless/01176_mysql_client_interactive.expect b/tests/queries/0_stateless/01176_mysql_client_interactive.expect new file mode 100755 index 00000000000..d592bbe1ce2 --- /dev/null +++ b/tests/queries/0_stateless/01176_mysql_client_interactive.expect @@ -0,0 +1,26 @@ +#!/usr/bin/expect -f + +log_user 0 +set timeout 5 +match_max 100000 +# A default timeout action is to do nothing, change it to fail +expect_after { + timeout { + exit 1 + } +} + +set basedir [file dirname $argv0] +spawn bash -c "source $basedir/../shell_config.sh ; \$MYSQL_CLIENT_BINARY \$MYSQL_CLIENT_OPT" +expect "mysql> " + +send -- "USE system;\r" +expect "Database changed" + +send -- "SELECT * FROM one;\r" +expect "| dummy |" +expect "| 0 |" +expect "1 row in set" + +send -- "quit;\r" +expect eof diff --git a/tests/queries/0_stateless/01176_mysql_client_interactive.reference b/tests/queries/0_stateless/01176_mysql_client_interactive.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh index eed77fb107d..d20b5669cc5 100644 --- a/tests/queries/shell_config.sh +++ b/tests/queries/shell_config.sh @@ -54,6 +54,8 @@ export CLICKHOUSE_PORT_HTTP=${CLICKHOUSE_PORT_HTTP:="8123"} export CLICKHOUSE_PORT_HTTPS=${CLICKHOUSE_PORT_HTTPS:=$(${CLICKHOUSE_EXTRACT_CONFIG} --try --key=https_port 2>/dev/null)} 2>/dev/null export CLICKHOUSE_PORT_HTTPS=${CLICKHOUSE_PORT_HTTPS:="8443"} export CLICKHOUSE_PORT_HTTP_PROTO=${CLICKHOUSE_PORT_HTTP_PROTO:="http"} +export CLICKHOUSE_PORT_MYSQL=${CLICKHOUSE_PORT_MYSQL:=$(${CLICKHOUSE_EXTRACT_CONFIG} --try --key=mysql_port 2>/dev/null)} 2>/dev/null +export CLICKHOUSE_PORT_MYSQL=${CLICKHOUSE_PORT_MYSQL:="9004"} # Add database and log comment to url params if [ -v CLICKHOUSE_URL_PARAMS ] @@ -87,6 +89,17 @@ export CLICKHOUSE_CURL=${CLICKHOUSE_CURL:="${CLICKHOUSE_CURL_COMMAND} -q -s --ma export CLICKHOUSE_TMP=${CLICKHOUSE_TMP:="."} mkdir -p ${CLICKHOUSE_TMP} +export MYSQL_CLIENT_BINARY=${MYSQL_CLIENT_BINARY:="mysql"} +export MYSQL_CLIENT_CLICKHOUSE_USER=${MYSQL_CLIENT_CLICKHOUSE_USER:="default"} +# Avoids "Can't connect to local MySQL server through socket '/var/run/mysqld/mysqld.sock'" when connecting to localhost +[ -v CLICKHOUSE_HOST ] && MYSQL_CLIENT_OPT0+=" --protocol tcp " +[ -v CLICKHOUSE_HOST ] && MYSQL_CLIENT_OPT0+=" --host ${CLICKHOUSE_HOST} " +[ -v CLICKHOUSE_PORT_MYSQL ] && MYSQL_CLIENT_OPT0+=" --port ${CLICKHOUSE_PORT_MYSQL} " +[ -v CLICKHOUSE_DATABASE ] && MYSQL_CLIENT_OPT0+=" --database ${CLICKHOUSE_DATABASE} " +MYSQL_CLIENT_OPT0+=" --user ${MYSQL_CLIENT_CLICKHOUSE_USER} " +export MYSQL_CLIENT_OPT="${MYSQL_CLIENT_OPT0:-} ${MYSQL_CLIENT_OPT:-}" +export MYSQL_CLIENT=${MYSQL_CLIENT:="$MYSQL_CLIENT_BINARY ${MYSQL_CLIENT_OPT:-}"} + function clickhouse_client_removed_host_parameter() { # removing only `--host=value` and `--host value` (removing '-hvalue' feels to dangerous) with python regex. From 1bad1e3a7ca49af3c990999ae414bc1bcc4fc3ea Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 18 Feb 2021 17:37:51 +0300 Subject: [PATCH 17/22] fix dockerfile --- docker/test/stateless/Dockerfile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index f2e3016692f..ba3355db89b 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -3,6 +3,9 @@ FROM yandex/clickhouse-test-base ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz" +RUN echo "deb [trusted=yes] http://repo.mysql.com/apt/ubuntu/ bionic mysql-5.7" >> /etc/apt/sources.list \ + && apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 8C718D3B5072E1F5 + RUN apt-get update -y \ && env DEBIAN_FRONTEND=noninteractive \ apt-get install --yes --no-install-recommends \ @@ -24,7 +27,7 @@ RUN apt-get update -y \ tree \ unixodbc \ wget \ - mysql-client-5.7 + mysql-client=5.7* RUN pip3 install numpy scipy pandas From 865dca0b0d7c2327e56b609a56f0693d6b43c6d7 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 18 Feb 2021 22:38:21 +0300 Subject: [PATCH 18/22] ccache 4.2+ does not requires any quirks for SOURCE_DATE_EPOCH And besides "ccache compiler" does not work, since it interpret everything as ccache options. Refs: https://github.com/ccache/ccache/commit/cad2416291c042443cf0c045047c34a2e07e103a --- cmake/find/ccache.cmake | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/cmake/find/ccache.cmake b/cmake/find/ccache.cmake index d8e9cf9588d..d9ccd1a9ac6 100644 --- a/cmake/find/ccache.cmake +++ b/cmake/find/ccache.cmake @@ -37,15 +37,13 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE) # # - 4.0+ ccache always includes this environment variable into the hash # of the manifest, which do not allow to use previous cache, - # - 4.2+ ccache ignores SOURCE_DATE_EPOCH under time_macros sloppiness. + # - 4.2+ ccache ignores SOURCE_DATE_EPOCH for every file w/o __DATE__/__TIME__ # # So for: - # - 4.2+ time_macros sloppiness is used, + # - 4.2+ does not require any sloppiness # - 4.0+ will ignore SOURCE_DATE_EPOCH environment variable. if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.2") - message(STATUS "Use time_macros sloppiness for ccache") - set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_FOUND} --set-config=sloppiness=time_macros") - set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "${CCACHE_FOUND} --set-config=sloppiness=time_macros") + message(STATUS "ccache is 4.2+ no quirks for SOURCE_DATE_EPOCH required") elseif (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0") message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache") set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}") From 5bbd6f7480281a7acdf5c16ac1efc4626ba51175 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 19 Feb 2021 12:37:00 +0300 Subject: [PATCH 19/22] Fixed documentation --- docs/en/sql-reference/functions/hash-functions.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 9394426b20b..14ac288339b 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -9,7 +9,7 @@ Hash functions can be used for the deterministic pseudo-random shuffling of elem ## halfMD5 {#hash-functions-halfmd5} -[Interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order. +[Interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order. ``` sql halfMD5(par1, ...) @@ -54,7 +54,7 @@ sipHash64(par1,...) This is a cryptographic hash function. It works at least three times faster than the [MD5](#hash_functions-md5) function. -Function [interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. Then combines hashes by the following algorithm: +Function [interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. Then combines hashes by the following algorithm: 1. After hashing all the input parameters, the function gets the array of hashes. 2. Function takes the first and the second elements and calculates a hash for the array of them. From 1c5b10de41a8266b623f5bcc7f3b8d3b72c6982d Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 19 Feb 2021 09:23:51 +0000 Subject: [PATCH 20/22] Use fixed version for aerospike --- docker/test/integration/runner/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 502dc3736b2..e0e5e36a3d6 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -58,7 +58,7 @@ RUN dockerd --version; docker --version RUN python3 -m pip install \ PyMySQL \ - aerospike \ + aerospike==4.0.0 \ avro \ cassandra-driver \ confluent-kafka==1.5.0 \ From 414f470c79eb22b0ca47b82f11625cf80b0231aa Mon Sep 17 00:00:00 2001 From: Ivan <5627721+abyss7@users.noreply.github.com> Date: Fri, 19 Feb 2021 15:51:26 +0300 Subject: [PATCH 21/22] Make Poco HTTP Server zero-copy again (#19516) * Refactoring: part 1 * Refactoring: part 2 * Handle request using ReadBuffer interface * Struggles with ReadBuffer's * Fix URI parsing * Implement parsing of multipart/form-data * Check HTTP_LENGTH_REQUIRED before eof() or will hang * Fix HTTPChunkedReadBuffer * Fix build and style * Fix test * Resist double-eof * Fix arcadian build --- base/daemon/BaseDaemon.h | 6 +- programs/odbc-bridge/ColumnInfoHandler.cpp | 12 +- programs/odbc-bridge/ColumnInfoHandler.h | 9 +- programs/odbc-bridge/HandlerFactory.cpp | 15 +- programs/odbc-bridge/HandlerFactory.h | 15 +- .../odbc-bridge/IdentifierQuoteHandler.cpp | 12 +- programs/odbc-bridge/IdentifierQuoteHandler.h | 7 +- programs/odbc-bridge/MainHandler.cpp | 22 +- programs/odbc-bridge/MainHandler.h | 11 +- programs/odbc-bridge/ODBCBridge.cpp | 10 +- programs/odbc-bridge/PingHandler.cpp | 2 +- programs/odbc-bridge/PingHandler.h | 14 +- programs/odbc-bridge/SchemaAllowedHandler.cpp | 12 +- programs/odbc-bridge/SchemaAllowedHandler.h | 11 +- programs/server/Server.cpp | 43 +- programs/server/Server.h | 3 +- src/CMakeLists.txt | 1 + src/Common/HTMLForm.h | 42 -- src/Common/StringUtils/StringUtils.h | 6 + src/Common/formatIPv6.h | 12 +- src/Common/hex.h | 4 +- src/Core/ExternalTable.cpp | 9 +- src/Core/ExternalTable.h | 24 +- src/IO/EmptyReadBuffer.h | 18 + src/IO/HTTPChunkedReadBuffer.cpp | 92 +++++ src/IO/HTTPChunkedReadBuffer.h | 25 ++ src/IO/HTTPCommon.cpp | 4 +- src/IO/HTTPCommon.h | 17 +- src/IO/LimitReadBuffer.cpp | 42 +- src/IO/LimitReadBuffer.h | 15 +- src/IO/PeekableReadBuffer.cpp | 17 +- src/IO/PeekableReadBuffer.h | 2 +- src/IO/ReadBuffer.h | 52 ++- src/IO/ReadBufferFromPocoSocket.cpp | 2 +- src/IO/ReadBufferFromPocoSocket.h | 13 +- src/IO/ReadHelpers.cpp | 19 + src/IO/ReadHelpers.h | 15 +- src/IO/ya.make | 2 +- src/Interpreters/InterserverIOHandler.h | 15 +- src/Server/HTTP/HTMLForm.cpp | 381 ++++++++++++++++++ src/Server/HTTP/HTMLForm.h | 175 ++++++++ src/Server/HTTP/HTTPRequest.h | 10 + src/Server/HTTP/HTTPRequestHandler.h | 19 + src/Server/HTTP/HTTPRequestHandlerFactory.h | 20 + src/Server/HTTP/HTTPResponse.h | 10 + src/Server/HTTP/HTTPServer.cpp | 48 +++ src/Server/HTTP/HTTPServer.h | 46 +++ src/Server/HTTP/HTTPServerConnection.cpp | 128 ++++++ src/Server/HTTP/HTTPServerConnection.h | 36 ++ .../HTTP/HTTPServerConnectionFactory.cpp | 19 + src/Server/HTTP/HTTPServerConnectionFactory.h | 25 ++ src/Server/HTTP/HTTPServerRequest.cpp | 123 ++++++ src/Server/HTTP/HTTPServerRequest.h | 59 +++ src/Server/HTTP/HTTPServerResponse.cpp | 163 ++++++++ src/Server/HTTP/HTTPServerResponse.h | 91 +++++ src/Server/HTTP/ReadHeaders.cpp | 88 ++++ src/Server/HTTP/ReadHeaders.h | 17 + .../WriteBufferFromHTTPServerResponse.cpp | 44 +- .../HTTP}/WriteBufferFromHTTPServerResponse.h | 41 +- src/Server/HTTPHandler.cpp | 194 ++++----- src/Server/HTTPHandler.h | 36 +- src/Server/HTTPHandlerFactory.cpp | 101 +++-- src/Server/HTTPHandlerFactory.h | 112 ++--- src/Server/HTTPHandlerRequestFilter.h | 48 +-- src/Server/InterserverIOHTTPHandler.cpp | 37 +- src/Server/InterserverIOHTTPHandler.h | 16 +- src/Server/NotFoundHandler.cpp | 31 +- src/Server/NotFoundHandler.h | 9 +- src/Server/PrometheusRequestHandler.cpp | 34 +- src/Server/PrometheusRequestHandler.h | 16 +- src/Server/ReplicasStatusHandler.cpp | 27 +- src/Server/ReplicasStatusHandler.h | 10 +- src/Server/StaticRequestHandler.cpp | 31 +- src/Server/StaticRequestHandler.h | 6 +- src/Server/WebUIRequestHandler.cpp | 6 +- src/Server/WebUIRequestHandler.h | 6 +- src/Server/ya.make | 8 + src/Storages/MergeTree/DataPartsExchange.cpp | 17 +- src/Storages/MergeTree/DataPartsExchange.h | 15 +- tests/queries/query_test.py | 2 +- 80 files changed, 2303 insertions(+), 654 deletions(-) delete mode 100644 src/Common/HTMLForm.h create mode 100644 src/IO/EmptyReadBuffer.h create mode 100644 src/IO/HTTPChunkedReadBuffer.cpp create mode 100644 src/IO/HTTPChunkedReadBuffer.h create mode 100644 src/Server/HTTP/HTMLForm.cpp create mode 100644 src/Server/HTTP/HTMLForm.h create mode 100644 src/Server/HTTP/HTTPRequest.h create mode 100644 src/Server/HTTP/HTTPRequestHandler.h create mode 100644 src/Server/HTTP/HTTPRequestHandlerFactory.h create mode 100644 src/Server/HTTP/HTTPResponse.h create mode 100644 src/Server/HTTP/HTTPServer.cpp create mode 100644 src/Server/HTTP/HTTPServer.h create mode 100644 src/Server/HTTP/HTTPServerConnection.cpp create mode 100644 src/Server/HTTP/HTTPServerConnection.h create mode 100644 src/Server/HTTP/HTTPServerConnectionFactory.cpp create mode 100644 src/Server/HTTP/HTTPServerConnectionFactory.h create mode 100644 src/Server/HTTP/HTTPServerRequest.cpp create mode 100644 src/Server/HTTP/HTTPServerRequest.h create mode 100644 src/Server/HTTP/HTTPServerResponse.cpp create mode 100644 src/Server/HTTP/HTTPServerResponse.h create mode 100644 src/Server/HTTP/ReadHeaders.cpp create mode 100644 src/Server/HTTP/ReadHeaders.h rename src/{IO => Server/HTTP}/WriteBufferFromHTTPServerResponse.cpp (81%) rename src/{IO => Server/HTTP}/WriteBufferFromHTTPServerResponse.h (86%) diff --git a/base/daemon/BaseDaemon.h b/base/daemon/BaseDaemon.h index 42d94629ae9..8b9d765cf2e 100644 --- a/base/daemon/BaseDaemon.h +++ b/base/daemon/BaseDaemon.h @@ -83,7 +83,7 @@ public: template void writeToGraphite(const std::string & key, const T & value, const std::string & config_name = DEFAULT_GRAPHITE_CONFIG_NAME, time_t timestamp = 0, const std::string & custom_root_path = "") { - auto writer = getGraphiteWriter(config_name); + auto *writer = getGraphiteWriter(config_name); if (writer) writer->write(key, value, timestamp, custom_root_path); } @@ -91,7 +91,7 @@ public: template void writeToGraphite(const GraphiteWriter::KeyValueVector & key_vals, const std::string & config_name = DEFAULT_GRAPHITE_CONFIG_NAME, time_t timestamp = 0, const std::string & custom_root_path = "") { - auto writer = getGraphiteWriter(config_name); + auto *writer = getGraphiteWriter(config_name); if (writer) writer->write(key_vals, timestamp, custom_root_path); } @@ -99,7 +99,7 @@ public: template void writeToGraphite(const GraphiteWriter::KeyValueVector & key_vals, const std::chrono::system_clock::time_point & current_time, const std::string & custom_root_path) { - auto writer = getGraphiteWriter(); + auto *writer = getGraphiteWriter(); if (writer) writer->write(key_vals, std::chrono::system_clock::to_time_t(current_time), custom_root_path); } diff --git a/programs/odbc-bridge/ColumnInfoHandler.cpp b/programs/odbc-bridge/ColumnInfoHandler.cpp index ee4daa3e16d..5aef7f1ac38 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.cpp +++ b/programs/odbc-bridge/ColumnInfoHandler.cpp @@ -4,14 +4,14 @@ # include # include -# include +# include # include # include # include # include # include # include -# include +# include # include # include # include @@ -59,16 +59,16 @@ namespace } } -void ODBCColumnsInfoHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) +void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) { - Poco::Net::HTMLForm params(request, request.stream()); + HTMLForm params(request, request.getStream()); LOG_TRACE(log, "Request URI: {}", request.getURI()); auto process_error = [&response, this](const std::string & message) { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); if (!response.sent()) - response.send() << message << std::endl; + *response.send() << message << std::endl; LOG_WARNING(log, message); }; @@ -159,7 +159,7 @@ void ODBCColumnsInfoHandler::handleRequest(Poco::Net::HTTPServerRequest & reques columns.emplace_back(reinterpret_cast(column_name), std::move(column_type)); } - WriteBufferFromHTTPServerResponse out(request, response, keep_alive_timeout); + WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout); writeStringBinary(columns.toString(), out); } catch (...) diff --git a/programs/odbc-bridge/ColumnInfoHandler.h b/programs/odbc-bridge/ColumnInfoHandler.h index 04b4c06693b..9b5b470b31d 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.h +++ b/programs/odbc-bridge/ColumnInfoHandler.h @@ -3,10 +3,11 @@ #if USE_ODBC # include -# include -# include +# include # include +# include + /** The structure of the table is taken from the query "SELECT * FROM table WHERE 1=0". * TODO: It would be much better to utilize ODBC methods dedicated for columns description. * If there is no such table, an exception is thrown. @@ -14,7 +15,7 @@ namespace DB { -class ODBCColumnsInfoHandler : public Poco::Net::HTTPRequestHandler +class ODBCColumnsInfoHandler : public HTTPRequestHandler { public: ODBCColumnsInfoHandler(size_t keep_alive_timeout_, Context & context_) @@ -22,7 +23,7 @@ public: { } - void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; private: Poco::Logger * log; diff --git a/programs/odbc-bridge/HandlerFactory.cpp b/programs/odbc-bridge/HandlerFactory.cpp index 0cc40480b87..9ac48af4ace 100644 --- a/programs/odbc-bridge/HandlerFactory.cpp +++ b/programs/odbc-bridge/HandlerFactory.cpp @@ -7,39 +7,40 @@ namespace DB { -Poco::Net::HTTPRequestHandler * HandlerFactory::createRequestHandler(const Poco::Net::HTTPServerRequest & request) + +std::unique_ptr HandlerFactory::createRequestHandler(const HTTPServerRequest & request) { Poco::URI uri{request.getURI()}; LOG_TRACE(log, "Request URI: {}", uri.toString()); if (uri.getPath() == "/ping" && request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET) - return new PingHandler(keep_alive_timeout); + return std::make_unique(keep_alive_timeout); if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST) { if (uri.getPath() == "/columns_info") #if USE_ODBC - return new ODBCColumnsInfoHandler(keep_alive_timeout, context); + return std::make_unique(keep_alive_timeout, context); #else return nullptr; #endif else if (uri.getPath() == "/identifier_quote") #if USE_ODBC - return new IdentifierQuoteHandler(keep_alive_timeout, context); + return std::make_unique(keep_alive_timeout, context); #else return nullptr; #endif else if (uri.getPath() == "/schema_allowed") #if USE_ODBC - return new SchemaAllowedHandler(keep_alive_timeout, context); + return std::make_unique(keep_alive_timeout, context); #else return nullptr; #endif else if (uri.getPath() == "/write") - return new ODBCHandler(pool_map, keep_alive_timeout, context, "write"); + return std::make_unique(pool_map, keep_alive_timeout, context, "write"); else - return new ODBCHandler(pool_map, keep_alive_timeout, context, "read"); + return std::make_unique(pool_map, keep_alive_timeout, context, "read"); } return nullptr; } diff --git a/programs/odbc-bridge/HandlerFactory.h b/programs/odbc-bridge/HandlerFactory.h index 1d4edfc9dd1..5dce6f02ecd 100644 --- a/programs/odbc-bridge/HandlerFactory.h +++ b/programs/odbc-bridge/HandlerFactory.h @@ -1,16 +1,17 @@ #pragma once + #include -#include -#include -#include -#include "MainHandler.h" +#include #include "ColumnInfoHandler.h" #include "IdentifierQuoteHandler.h" +#include "MainHandler.h" #include "SchemaAllowedHandler.h" +#include + #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" - #include +#include #pragma GCC diagnostic pop @@ -19,7 +20,7 @@ namespace DB /** Factory for '/ping', '/', '/columns_info', '/identifier_quote', '/schema_allowed' handlers. * Also stores Session pools for ODBC connections */ -class HandlerFactory : public Poco::Net::HTTPRequestHandlerFactory +class HandlerFactory : public HTTPRequestHandlerFactory { public: HandlerFactory(const std::string & name_, size_t keep_alive_timeout_, Context & context_) @@ -28,7 +29,7 @@ public: pool_map = std::make_shared(); } - Poco::Net::HTTPRequestHandler * createRequestHandler(const Poco::Net::HTTPServerRequest & request) override; + std::unique_ptr createRequestHandler(const HTTPServerRequest & request) override; private: Poco::Logger * log; diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.cpp b/programs/odbc-bridge/IdentifierQuoteHandler.cpp index 2c3701cfff9..ec4e4493d61 100644 --- a/programs/odbc-bridge/IdentifierQuoteHandler.cpp +++ b/programs/odbc-bridge/IdentifierQuoteHandler.cpp @@ -3,14 +3,14 @@ #if USE_ODBC # include -# include +# include +# include # include # include # include # include # include # include -# include # include # include # include @@ -22,16 +22,16 @@ namespace DB { -void IdentifierQuoteHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) +void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) { - Poco::Net::HTMLForm params(request, request.stream()); + HTMLForm params(request, request.getStream()); LOG_TRACE(log, "Request URI: {}", request.getURI()); auto process_error = [&response, this](const std::string & message) { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); if (!response.sent()) - response.send() << message << std::endl; + *response.send() << message << std::endl; LOG_WARNING(log, message); }; @@ -49,7 +49,7 @@ void IdentifierQuoteHandler::handleRequest(Poco::Net::HTTPServerRequest & reques auto identifier = getIdentifierQuote(hdbc); - WriteBufferFromHTTPServerResponse out(request, response, keep_alive_timeout); + WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout); writeStringBinary(identifier, out); } catch (...) diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.h b/programs/odbc-bridge/IdentifierQuoteHandler.h index fd357e32786..dad88c72ad8 100644 --- a/programs/odbc-bridge/IdentifierQuoteHandler.h +++ b/programs/odbc-bridge/IdentifierQuoteHandler.h @@ -1,8 +1,9 @@ #pragma once #include +#include + #include -#include #if USE_ODBC @@ -10,7 +11,7 @@ namespace DB { -class IdentifierQuoteHandler : public Poco::Net::HTTPRequestHandler +class IdentifierQuoteHandler : public HTTPRequestHandler { public: IdentifierQuoteHandler(size_t keep_alive_timeout_, Context &) @@ -18,7 +19,7 @@ public: { } - void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; private: Poco::Logger * log; diff --git a/programs/odbc-bridge/MainHandler.cpp b/programs/odbc-bridge/MainHandler.cpp index 64cb7bc0b46..b9670397878 100644 --- a/programs/odbc-bridge/MainHandler.cpp +++ b/programs/odbc-bridge/MainHandler.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -73,19 +74,19 @@ ODBCHandler::PoolPtr ODBCHandler::getPool(const std::string & connection_str) return pool_map->at(connection_str); } -void ODBCHandler::processError(Poco::Net::HTTPServerResponse & response, const std::string & message) +void ODBCHandler::processError(HTTPServerResponse & response, const std::string & message) { - response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); + response.setStatusAndReason(HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); if (!response.sent()) - response.send() << message << std::endl; + *response.send() << message << std::endl; LOG_WARNING(log, message); } -void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) +void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) { - Poco::Net::HTMLForm params(request); + HTMLForm params(request); if (mode == "read") - params.read(request.stream()); + params.read(request.getStream()); LOG_TRACE(log, "Request URI: {}", request.getURI()); if (mode == "read" && !params.has("query")) @@ -136,7 +137,7 @@ void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne std::string connection_string = params.get("connection_string"); LOG_TRACE(log, "Connection string: '{}'", connection_string); - WriteBufferFromHTTPServerResponse out(request, response, keep_alive_timeout); + WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout); try { @@ -163,9 +164,8 @@ void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne #endif auto pool = getPool(connection_string); - ReadBufferFromIStream read_buf(request.stream()); - auto input_format = FormatFactory::instance().getInput(format, read_buf, *sample_block, - context, max_block_size); + auto & read_buf = request.getStream(); + auto input_format = FormatFactory::instance().getInput(format, read_buf, *sample_block, context, max_block_size); auto input_stream = std::make_shared(input_format); ODBCBlockOutputStream output_stream(pool->get(), db_name, table_name, *sample_block, quoting_style); copyData(*input_stream, output_stream); diff --git a/programs/odbc-bridge/MainHandler.h b/programs/odbc-bridge/MainHandler.h index ec5e6693a60..e237ede5814 100644 --- a/programs/odbc-bridge/MainHandler.h +++ b/programs/odbc-bridge/MainHandler.h @@ -1,12 +1,13 @@ #pragma once #include +#include + #include -#include #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" - #include +#include #pragma GCC diagnostic pop namespace DB @@ -16,7 +17,7 @@ namespace DB * and also query in request body * response in RowBinary format */ -class ODBCHandler : public Poco::Net::HTTPRequestHandler +class ODBCHandler : public HTTPRequestHandler { public: using PoolPtr = std::shared_ptr; @@ -34,7 +35,7 @@ public: { } - void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; private: Poco::Logger * log; @@ -47,7 +48,7 @@ private: static inline std::mutex mutex; PoolPtr getPool(const std::string & connection_str); - void processError(Poco::Net::HTTPServerResponse & response, const std::string & message); + void processError(HTTPServerResponse & response, const std::string & message); }; } diff --git a/programs/odbc-bridge/ODBCBridge.cpp b/programs/odbc-bridge/ODBCBridge.cpp index 9deefaf7895..8869a2639c1 100644 --- a/programs/odbc-bridge/ODBCBridge.cpp +++ b/programs/odbc-bridge/ODBCBridge.cpp @@ -11,7 +11,6 @@ # include #endif -#include #include #include #include @@ -23,6 +22,7 @@ #include #include #include +#include namespace DB @@ -212,8 +212,12 @@ int ODBCBridge::main(const std::vector & /*args*/) SensitiveDataMasker::setInstance(std::make_unique(config(), "query_masking_rules")); } - auto server = Poco::Net::HTTPServer( - new HandlerFactory("ODBCRequestHandlerFactory-factory", keep_alive_timeout, context), server_pool, socket, http_params); + auto server = HTTPServer( + context, + std::make_shared("ODBCRequestHandlerFactory-factory", keep_alive_timeout, context), + server_pool, + socket, + http_params); server.start(); LOG_INFO(log, "Listening http://{}", address.toString()); diff --git a/programs/odbc-bridge/PingHandler.cpp b/programs/odbc-bridge/PingHandler.cpp index b0313e46bf3..e3ab5e5cd00 100644 --- a/programs/odbc-bridge/PingHandler.cpp +++ b/programs/odbc-bridge/PingHandler.cpp @@ -6,7 +6,7 @@ namespace DB { -void PingHandler::handleRequest(Poco::Net::HTTPServerRequest & /*request*/, Poco::Net::HTTPServerResponse & response) +void PingHandler::handleRequest(HTTPServerRequest & /* request */, HTTPServerResponse & response) { try { diff --git a/programs/odbc-bridge/PingHandler.h b/programs/odbc-bridge/PingHandler.h index d8109a50bb6..c969ec55af7 100644 --- a/programs/odbc-bridge/PingHandler.h +++ b/programs/odbc-bridge/PingHandler.h @@ -1,17 +1,19 @@ #pragma once -#include + +#include namespace DB { -/** Simple ping handler, answers "Ok." to GET request - */ -class PingHandler : public Poco::Net::HTTPRequestHandler + +/// Simple ping handler, answers "Ok." to GET request +class PingHandler : public HTTPRequestHandler { public: - PingHandler(size_t keep_alive_timeout_) : keep_alive_timeout(keep_alive_timeout_) {} - void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override; + explicit PingHandler(size_t keep_alive_timeout_) : keep_alive_timeout(keep_alive_timeout_) {} + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; private: size_t keep_alive_timeout; }; + } diff --git a/programs/odbc-bridge/SchemaAllowedHandler.cpp b/programs/odbc-bridge/SchemaAllowedHandler.cpp index fa08a27da59..48744b6d2ca 100644 --- a/programs/odbc-bridge/SchemaAllowedHandler.cpp +++ b/programs/odbc-bridge/SchemaAllowedHandler.cpp @@ -2,12 +2,12 @@ #if USE_ODBC -# include +# include +# include # include # include # include # include -# include # include # include # include @@ -33,16 +33,16 @@ namespace } -void SchemaAllowedHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) +void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) { - Poco::Net::HTMLForm params(request, request.stream()); + HTMLForm params(request, request.getStream()); LOG_TRACE(log, "Request URI: {}", request.getURI()); auto process_error = [&response, this](const std::string & message) { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); if (!response.sent()) - response.send() << message << std::endl; + *response.send() << message << std::endl; LOG_WARNING(log, message); }; @@ -60,7 +60,7 @@ void SchemaAllowedHandler::handleRequest(Poco::Net::HTTPServerRequest & request, bool result = isSchemaAllowed(hdbc); - WriteBufferFromHTTPServerResponse out(request, response, keep_alive_timeout); + WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout); writeBoolText(result, out); } catch (...) diff --git a/programs/odbc-bridge/SchemaAllowedHandler.h b/programs/odbc-bridge/SchemaAllowedHandler.h index 76aa23b903c..91eddf67803 100644 --- a/programs/odbc-bridge/SchemaAllowedHandler.h +++ b/programs/odbc-bridge/SchemaAllowedHandler.h @@ -1,17 +1,18 @@ #pragma once +#include + #include -#include #if USE_ODBC namespace DB { + class Context; - -/// This handler establishes connection to database, and retrieve whether schema is allowed. -class SchemaAllowedHandler : public Poco::Net::HTTPRequestHandler +/// This handler establishes connection to database, and retrieves whether schema is allowed. +class SchemaAllowedHandler : public HTTPRequestHandler { public: SchemaAllowedHandler(size_t keep_alive_timeout_, Context &) @@ -19,7 +20,7 @@ public: { } - void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; private: Poco::Logger * log; diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index a96cb2b8973..4194bb4a06b 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -69,6 +69,7 @@ #include #include #include +#include #if !defined(ARCADIA_BUILD) @@ -1070,8 +1071,10 @@ int Server::main(const std::vector & /*args*/) socket.setReceiveTimeout(settings.http_receive_timeout); socket.setSendTimeout(settings.http_send_timeout); - servers->emplace_back(port_name, std::make_unique( - createHandlerFactory(*this, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params)); + servers->emplace_back( + port_name, + std::make_unique( + context(), createHandlerFactory(*this, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params)); LOG_INFO(log, "Listening for http://{}", address.toString()); }); @@ -1085,8 +1088,10 @@ int Server::main(const std::vector & /*args*/) auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); socket.setReceiveTimeout(settings.http_receive_timeout); socket.setSendTimeout(settings.http_send_timeout); - servers->emplace_back(port_name, std::make_unique( - createHandlerFactory(*this, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params)); + servers->emplace_back( + port_name, + std::make_unique( + context(), createHandlerFactory(*this, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params)); LOG_INFO(log, "Listening for https://{}", address.toString()); #else @@ -1160,8 +1165,14 @@ int Server::main(const std::vector & /*args*/) auto address = socketBindListen(socket, listen_host, port); socket.setReceiveTimeout(settings.http_receive_timeout); socket.setSendTimeout(settings.http_send_timeout); - servers->emplace_back(port_name, std::make_unique( - createHandlerFactory(*this, async_metrics, "InterserverIOHTTPHandler-factory"), server_pool, socket, http_params)); + servers->emplace_back( + port_name, + std::make_unique( + context(), + createHandlerFactory(*this, async_metrics, "InterserverIOHTTPHandler-factory"), + server_pool, + socket, + http_params)); LOG_INFO(log, "Listening for replica communication (interserver): http://{}", address.toString()); }); @@ -1174,8 +1185,14 @@ int Server::main(const std::vector & /*args*/) auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); socket.setReceiveTimeout(settings.http_receive_timeout); socket.setSendTimeout(settings.http_send_timeout); - servers->emplace_back(port_name, std::make_unique( - createHandlerFactory(*this, async_metrics, "InterserverIOHTTPSHandler-factory"), server_pool, socket, http_params)); + servers->emplace_back( + port_name, + std::make_unique( + context(), + createHandlerFactory(*this, async_metrics, "InterserverIOHTTPSHandler-factory"), + server_pool, + socket, + http_params)); LOG_INFO(log, "Listening for secure replica communication (interserver): https://{}", address.toString()); #else @@ -1235,8 +1252,14 @@ int Server::main(const std::vector & /*args*/) auto address = socketBindListen(socket, listen_host, port); socket.setReceiveTimeout(settings.http_receive_timeout); socket.setSendTimeout(settings.http_send_timeout); - servers->emplace_back(port_name, std::make_unique( - createHandlerFactory(*this, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params)); + servers->emplace_back( + port_name, + std::make_unique( + context(), + createHandlerFactory(*this, async_metrics, "PrometheusHandler-factory"), + server_pool, + socket, + http_params)); LOG_INFO(log, "Listening for Prometheus: http://{}", address.toString()); }); diff --git a/programs/server/Server.h b/programs/server/Server.h index c582e475308..fbfc26f6ee5 100644 --- a/programs/server/Server.h +++ b/programs/server/Server.h @@ -51,6 +51,7 @@ public: } void defineOptions(Poco::Util::OptionSet & _options) override; + protected: int run() override; @@ -65,8 +66,6 @@ protected: private: Context * global_context_ptr = nullptr; -private: - Poco::Net::SocketAddress socketBindListen(Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port, [[maybe_unused]] bool secure = false) const; using CreateServerFunc = std::function; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d370016da00..215a13cce1a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -181,6 +181,7 @@ add_object_library(clickhouse_storages_mergetree Storages/MergeTree) add_object_library(clickhouse_storages_liveview Storages/LiveView) add_object_library(clickhouse_client Client) add_object_library(clickhouse_server Server) +add_object_library(clickhouse_server_http Server/HTTP) add_object_library(clickhouse_formats Formats) add_object_library(clickhouse_processors Processors) add_object_library(clickhouse_processors_executors Processors/Executors) diff --git a/src/Common/HTMLForm.h b/src/Common/HTMLForm.h deleted file mode 100644 index 2b62167dce7..00000000000 --- a/src/Common/HTMLForm.h +++ /dev/null @@ -1,42 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include - - -/** Somehow, in case of POST, Poco::Net::HTMLForm doesn't read parameters from URL, only from body. - * This helper allows to read parameters just from URL. - */ -struct HTMLForm : public Poco::Net::HTMLForm -{ - HTMLForm(const Poco::Net::HTTPRequest & request) - { - Poco::URI uri(request.getURI()); - std::istringstream istr(uri.getRawQuery()); // STYLE_CHECK_ALLOW_STD_STRING_STREAM - readUrl(istr); - } - - HTMLForm(const Poco::URI & uri) - { - std::istringstream istr(uri.getRawQuery()); // STYLE_CHECK_ALLOW_STD_STRING_STREAM - readUrl(istr); - } - - - template - T getParsed(const std::string & key, T default_value) - { - auto it = find(key); - return (it != end()) ? DB::parse(it->second) : default_value; - } - - template - T getParsed(const std::string & key) - { - return DB::parse(get(key)); - } -}; diff --git a/src/Common/StringUtils/StringUtils.h b/src/Common/StringUtils/StringUtils.h index 904e3035dd8..cb2227f01a8 100644 --- a/src/Common/StringUtils/StringUtils.h +++ b/src/Common/StringUtils/StringUtils.h @@ -120,6 +120,12 @@ inline bool isWhitespaceASCII(char c) return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'; } +/// Since |isWhiteSpaceASCII()| is used inside algorithms it's easier to implement another function than add extra argument. +inline bool isWhitespaceASCIIOneLine(char c) +{ + return c == ' ' || c == '\t' || c == '\r' || c == '\f' || c == '\v'; +} + inline bool isControlASCII(char c) { return static_cast(c) <= 31; diff --git a/src/Common/formatIPv6.h b/src/Common/formatIPv6.h index 63c064b21f8..bd0c68d70f9 100644 --- a/src/Common/formatIPv6.h +++ b/src/Common/formatIPv6.h @@ -85,9 +85,9 @@ inline bool parseIPv6(const char * src, unsigned char * dst) return clear_dst(); unsigned char tmp[IPV6_BINARY_LENGTH]{}; - auto tp = tmp; - auto endp = tp + IPV6_BINARY_LENGTH; - auto curtok = src; + auto * tp = tmp; + auto * endp = tp + IPV6_BINARY_LENGTH; + const auto * curtok = src; auto saw_xdigit = false; UInt32 val{}; unsigned char * colonp = nullptr; @@ -97,14 +97,14 @@ inline bool parseIPv6(const char * src, unsigned char * dst) { const auto num = unhex(ch); - if (num != '\xff') + if (num != u8'\xff') { val <<= 4; val |= num; if (val > 0xffffu) return clear_dst(); - saw_xdigit = 1; + saw_xdigit = true; continue; } @@ -204,7 +204,7 @@ inline void formatIPv4(const unsigned char * src, char *& dst, uint8_t mask_tail for (size_t octet = 0; octet < limit; ++octet) { const uint8_t value = static_cast(src[IPV4_BINARY_LENGTH - octet - 1]); - auto rep = one_byte_to_string_lookup_table[value]; + const auto * rep = one_byte_to_string_lookup_table[value]; const uint8_t len = rep[0]; const char* str = rep + 1; diff --git a/src/Common/hex.h b/src/Common/hex.h index db094e1dfd1..a1fa7b32465 100644 --- a/src/Common/hex.h +++ b/src/Common/hex.h @@ -90,12 +90,12 @@ std::string getHexUIntLowercase(TUInt uint_) extern const char * const hex_char_to_digit_table; -inline char unhex(char c) +inline UInt8 unhex(char c) { return hex_char_to_digit_table[static_cast(c)]; } -inline char unhex2(const char * data) +inline UInt8 unhex2(const char * data) { return static_cast(unhex(data[0])) * 0x10 diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp index 767ed959950..afc9fe00ef5 100644 --- a/src/Core/ExternalTable.cpp +++ b/src/Core/ExternalTable.cpp @@ -125,19 +125,16 @@ ExternalTable::ExternalTable(const boost::program_options::variables_map & exter } -void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header, std::istream & stream) +void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header, ReadBuffer & stream) { const Settings & settings = context.getSettingsRef(); - /// The buffer is initialized here, not in the virtual function initReadBuffer - read_buffer_impl = std::make_unique(stream); - if (settings.http_max_multipart_form_data_size) read_buffer = std::make_unique( - *read_buffer_impl, settings.http_max_multipart_form_data_size, + stream, settings.http_max_multipart_form_data_size, true, "the maximum size of multipart/form-data. This limit can be tuned by 'http_max_multipart_form_data_size' setting"); else - read_buffer = std::move(read_buffer_impl); + read_buffer = wrapReadBufferReference(stream); /// Retrieve a collection of parameters from MessageHeader Poco::Net::NameValueCollection content; diff --git a/src/Core/ExternalTable.h b/src/Core/ExternalTable.h index 0d8e0aaf8ac..aa15846d48a 100644 --- a/src/Core/ExternalTable.h +++ b/src/Core/ExternalTable.h @@ -1,15 +1,14 @@ #pragma once +#include +#include +#include +#include + +#include +#include #include #include -#include -#include - -#include - -#include -#include -#include namespace Poco @@ -51,7 +50,7 @@ public: std::unique_ptr read_buffer; Block sample_block; - virtual ~BaseExternalTable() {} + virtual ~BaseExternalTable() = default; /// Initialize read_buffer, depending on the data source. By default, does nothing. virtual void initReadBuffer() {} @@ -82,24 +81,23 @@ public: void initReadBuffer() override; /// Extract parameters from variables_map, which is built on the client command line - ExternalTable(const boost::program_options::variables_map & external_options); + explicit ExternalTable(const boost::program_options::variables_map & external_options); }; /// Parsing of external table used when sending tables via http /// The `handlePart` function will be called for each table passed, /// so it's also necessary to call `clean` at the end of the `handlePart`. -class ExternalTablesHandler : public Poco::Net::PartHandler, BaseExternalTable +class ExternalTablesHandler : public HTMLForm::PartHandler, BaseExternalTable { public: ExternalTablesHandler(Context & context_, const Poco::Net::NameValueCollection & params_) : context(context_), params(params_) {} - void handlePart(const Poco::Net::MessageHeader & header, std::istream & stream) override; + void handlePart(const Poco::Net::MessageHeader & header, ReadBuffer & stream) override; private: Context & context; const Poco::Net::NameValueCollection & params; - std::unique_ptr read_buffer_impl; }; diff --git a/src/IO/EmptyReadBuffer.h b/src/IO/EmptyReadBuffer.h new file mode 100644 index 00000000000..e2189b9943f --- /dev/null +++ b/src/IO/EmptyReadBuffer.h @@ -0,0 +1,18 @@ +#pragma once + +#include + +namespace DB +{ + +/// Just a stub - reads nothing from nowhere. +class EmptyReadBuffer : public ReadBuffer +{ +public: + EmptyReadBuffer() : ReadBuffer(nullptr, 0) {} + +private: + bool nextImpl() override { return false; } +}; + +} diff --git a/src/IO/HTTPChunkedReadBuffer.cpp b/src/IO/HTTPChunkedReadBuffer.cpp new file mode 100644 index 00000000000..bd9bbba4c6c --- /dev/null +++ b/src/IO/HTTPChunkedReadBuffer.cpp @@ -0,0 +1,92 @@ +#include + +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ARGUMENT_OUT_OF_BOUND; + extern const int UNEXPECTED_END_OF_FILE; + extern const int CORRUPTED_DATA; + extern const int TOO_MANY_BYTES; +} + +size_t HTTPChunkedReadBuffer::readChunkHeader() +{ + if (in->eof()) + throw Exception("Unexpected end of file while reading chunk header of HTTP chunked data", ErrorCodes::UNEXPECTED_END_OF_FILE); + + if (!isHexDigit(*in->position())) + throw Exception("Unexpected data instead of HTTP chunk header", ErrorCodes::CORRUPTED_DATA); + + size_t res = 0; + do + { + if (common::mulOverflow(res, 16ul, res) || common::addOverflow(res, unhex(*in->position()), res)) + throw Exception("Chunk size is out of bounds", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + ++in->position(); + } while (!in->eof() && isHexDigit(*in->position())); + + /// NOTE: If we want to read any chunk extensions, it should be done here. + + skipToCarriageReturnOrEOF(*in); + + if (in->eof()) + throw Exception("Unexpected end of file while reading chunk header of HTTP chunked data", ErrorCodes::UNEXPECTED_END_OF_FILE); + + if (res > max_size) + throw Exception("Chunk size is too large", ErrorCodes::TOO_MANY_BYTES); + + assertString("\n", *in); + return res; +} + +void HTTPChunkedReadBuffer::readChunkFooter() +{ + assertString("\r\n", *in); +} + +bool HTTPChunkedReadBuffer::nextImpl() +{ + if (!in) + return false; + + /// The footer of previous chunk. + if (count()) + readChunkFooter(); + + size_t chunk_size = readChunkHeader(); + if (0 == chunk_size) + { + readChunkFooter(); + in.reset(); // prevent double-eof situation. + return false; + } + + if (in->available() >= chunk_size) + { + /// Zero-copy read from input. + working_buffer = Buffer(in->position(), in->position() + chunk_size); + in->position() += chunk_size; + } + else + { + /// Chunk is not completely in buffer, copy it to scratch space. + memory.resize(chunk_size); + in->readStrict(memory.data(), chunk_size); + working_buffer = Buffer(memory.data(), memory.data() + chunk_size); + } + + /// NOTE: We postpone reading the footer to the next iteration, because it may not be completely in buffer, + /// but we need to keep the current data in buffer available. + + return true; +} + +} diff --git a/src/IO/HTTPChunkedReadBuffer.h b/src/IO/HTTPChunkedReadBuffer.h new file mode 100644 index 00000000000..0ccebc69d08 --- /dev/null +++ b/src/IO/HTTPChunkedReadBuffer.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include + +namespace DB +{ + +/// Reads data with HTTP Chunked Transfer Encoding. +class HTTPChunkedReadBuffer : public BufferWithOwnMemory +{ +public: + HTTPChunkedReadBuffer(std::unique_ptr in_, size_t max_chunk_size) : in(std::move(in_)), max_size(max_chunk_size) {} + +private: + std::unique_ptr in; + const size_t max_size; + + size_t readChunkHeader(); + void readChunkFooter(); + + bool nextImpl() override; +}; + +} diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index d12aa10fe6a..346bbf0427e 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -23,7 +24,6 @@ # include #endif -#include #include #include @@ -266,7 +266,7 @@ namespace }; } -void setResponseDefaultHeaders(Poco::Net::HTTPServerResponse & response, unsigned keep_alive_timeout) +void setResponseDefaultHeaders(HTTPServerResponse & response, unsigned keep_alive_timeout) { if (!response.getKeepAlive()) return; diff --git a/src/IO/HTTPCommon.h b/src/IO/HTTPCommon.h index 4a81d23a8a3..18e83abb83b 100644 --- a/src/IO/HTTPCommon.h +++ b/src/IO/HTTPCommon.h @@ -14,20 +14,13 @@ #include -namespace Poco -{ -namespace Net -{ - class HTTPServerResponse; -} -} - - namespace DB { constexpr int HTTP_TOO_MANY_REQUESTS = 429; +class HTTPServerResponse; + class SingleEndpointHTTPSessionPool : public PoolBase { private: @@ -45,7 +38,7 @@ public: using PooledHTTPSessionPtr = SingleEndpointHTTPSessionPool::Entry; using HTTPSessionPtr = std::shared_ptr; -void setResponseDefaultHeaders(Poco::Net::HTTPServerResponse & response, unsigned keep_alive_timeout); +void setResponseDefaultHeaders(HTTPServerResponse & response, unsigned keep_alive_timeout); /// Create session object to perform requests and set required parameters. HTTPSessionPtr makeHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts, bool resolve_host = true); @@ -54,7 +47,7 @@ HTTPSessionPtr makeHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & PooledHTTPSessionPtr makePooledHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts, size_t per_endpoint_pool_size, bool resolve_host = true); PooledHTTPSessionPtr makePooledHTTPSession(const Poco::URI & uri, const Poco::URI & proxy_uri, const ConnectionTimeouts & timeouts, size_t per_endpoint_pool_size, bool resolve_host = true); -bool isRedirect(const Poco::Net::HTTPResponse::HTTPStatus status); +bool isRedirect(Poco::Net::HTTPResponse::HTTPStatus status); /** Used to receive response (response headers and possibly body) * after sending data (request headers and possibly body). @@ -65,5 +58,5 @@ std::istream * receiveResponse( Poco::Net::HTTPClientSession & session, const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, bool allow_redirects); void assertResponseIsOk( - const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, std::istream & istr, const bool allow_redirects = false); + const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, std::istream & istr, bool allow_redirects = false); } diff --git a/src/IO/LimitReadBuffer.cpp b/src/IO/LimitReadBuffer.cpp index baa9e487688..9daffa3a1d3 100644 --- a/src/IO/LimitReadBuffer.cpp +++ b/src/IO/LimitReadBuffer.cpp @@ -14,10 +14,10 @@ namespace ErrorCodes bool LimitReadBuffer::nextImpl() { - assert(position() >= in.position()); + assert(position() >= in->position()); /// Let underlying buffer calculate read bytes in `next()` call. - in.position() = position(); + in->position() = position(); if (bytes >= limit) { @@ -27,13 +27,13 @@ bool LimitReadBuffer::nextImpl() return false; } - if (!in.next()) + if (!in->next()) { - working_buffer = in.buffer(); + working_buffer = in->buffer(); return false; } - working_buffer = in.buffer(); + working_buffer = in->buffer(); if (limit - bytes < working_buffer.size()) working_buffer.resize(limit - bytes); @@ -42,14 +42,33 @@ bool LimitReadBuffer::nextImpl() } -LimitReadBuffer::LimitReadBuffer(ReadBuffer & in_, UInt64 limit_, bool throw_exception_, std::string exception_message_) - : ReadBuffer(in_.position(), 0), in(in_), limit(limit_), throw_exception(throw_exception_), exception_message(std::move(exception_message_)) +LimitReadBuffer::LimitReadBuffer(ReadBuffer * in_, bool owns, UInt64 limit_, bool throw_exception_, std::string exception_message_) + : ReadBuffer(in_ ? in_->position() : nullptr, 0) + , in(in_) + , owns_in(owns) + , limit(limit_) + , throw_exception(throw_exception_) + , exception_message(std::move(exception_message_)) { - size_t remaining_bytes_in_buffer = in.buffer().end() - in.position(); + assert(in); + + size_t remaining_bytes_in_buffer = in->buffer().end() - in->position(); if (remaining_bytes_in_buffer > limit) remaining_bytes_in_buffer = limit; - working_buffer = Buffer(in.position(), in.position() + remaining_bytes_in_buffer); + working_buffer = Buffer(in->position(), in->position() + remaining_bytes_in_buffer); +} + + +LimitReadBuffer::LimitReadBuffer(ReadBuffer & in_, UInt64 limit_, bool throw_exception_, std::string exception_message_) + : LimitReadBuffer(&in_, false, limit_, throw_exception_, exception_message_) +{ +} + + +LimitReadBuffer::LimitReadBuffer(std::unique_ptr in_, UInt64 limit_, bool throw_exception_, std::string exception_message_) + : LimitReadBuffer(in_.release(), true, limit_, throw_exception_, exception_message_) +{ } @@ -57,7 +76,10 @@ LimitReadBuffer::~LimitReadBuffer() { /// Update underlying buffer's position in case when limit wasn't reached. if (!working_buffer.empty()) - in.position() = position(); + in->position() = position(); + + if (owns_in) + delete in; } } diff --git a/src/IO/LimitReadBuffer.h b/src/IO/LimitReadBuffer.h index db3d2684ef7..a5fa0f0d5cc 100644 --- a/src/IO/LimitReadBuffer.h +++ b/src/IO/LimitReadBuffer.h @@ -12,17 +12,22 @@ namespace DB */ class LimitReadBuffer : public ReadBuffer { +public: + LimitReadBuffer(ReadBuffer & in_, UInt64 limit_, bool throw_exception_, std::string exception_message_ = {}); + LimitReadBuffer(std::unique_ptr in_, UInt64 limit_, bool throw_exception_, std::string exception_message_ = {}); + ~LimitReadBuffer() override; + private: - ReadBuffer & in; + ReadBuffer * in; + bool owns_in; + UInt64 limit; bool throw_exception; std::string exception_message; - bool nextImpl() override; + LimitReadBuffer(ReadBuffer * in_, bool owns, UInt64 limit_, bool throw_exception_, std::string exception_message_); -public: - LimitReadBuffer(ReadBuffer & in_, UInt64 limit_, bool throw_exception_, std::string exception_message_ = {}); - ~LimitReadBuffer() override; + bool nextImpl() override; }; } diff --git a/src/IO/PeekableReadBuffer.cpp b/src/IO/PeekableReadBuffer.cpp index e0e99afbfec..1d999d586b2 100644 --- a/src/IO/PeekableReadBuffer.cpp +++ b/src/IO/PeekableReadBuffer.cpp @@ -1,7 +1,9 @@ #include + namespace DB { + namespace ErrorCodes { extern const int LOGICAL_ERROR; @@ -107,22 +109,29 @@ bool PeekableReadBuffer::peekNext() return sub_buf.next(); } -void PeekableReadBuffer::rollbackToCheckpoint() +void PeekableReadBuffer::rollbackToCheckpoint(bool drop) { checkStateCorrect(); + if (!checkpoint) throw DB::Exception("There is no checkpoint", ErrorCodes::LOGICAL_ERROR); else if (checkpointInOwnMemory() == currentlyReadFromOwnMemory()) pos = *checkpoint; else /// Checkpoint is in own memory and pos is not. Switch to reading from own memory BufferBase::set(memory.data(), peeked_size, *checkpoint - memory.data()); + + if (drop) + dropCheckpoint(); + checkStateCorrect(); } bool PeekableReadBuffer::nextImpl() { - /// FIXME wrong bytes count because it can read the same data again after rollbackToCheckpoint() - /// However, changing bytes count on every call of next() (even after rollback) allows to determine if some pointers were invalidated. + /// FIXME: wrong bytes count because it can read the same data again after rollbackToCheckpoint() + /// however, changing bytes count on every call of next() (even after rollback) allows to determine + /// if some pointers were invalidated. + checkStateCorrect(); bool res; @@ -138,7 +147,7 @@ bool PeekableReadBuffer::nextImpl() if (useSubbufferOnly()) { /// Load next data to sub_buf - sub_buf.position() = pos; + sub_buf.position() = position(); res = sub_buf.next(); } else diff --git a/src/IO/PeekableReadBuffer.h b/src/IO/PeekableReadBuffer.h index e425f9bc953..4f6e669b31d 100644 --- a/src/IO/PeekableReadBuffer.h +++ b/src/IO/PeekableReadBuffer.h @@ -58,7 +58,7 @@ public: /// Sets position at checkpoint. /// All pointers (such as this->buffer().end()) may be invalidated - void rollbackToCheckpoint(); + void rollbackToCheckpoint(bool drop = false); /// If checkpoint and current position are in different buffers, appends data from sub-buffer to own memory, /// so data between checkpoint and position will be in continuous memory. diff --git a/src/IO/ReadBuffer.h b/src/IO/ReadBuffer.h index 5cbe04f8348..e3166ba8180 100644 --- a/src/IO/ReadBuffer.h +++ b/src/IO/ReadBuffer.h @@ -134,15 +134,27 @@ public: tryIgnore(std::numeric_limits::max()); } - /** Reads a single byte. */ - bool ALWAYS_INLINE read(char & c) + /// Peeks a single byte. + bool ALWAYS_INLINE peek(char & c) { if (eof()) return false; - c = *pos++; + c = *pos; return true; } + /// Reads a single byte. + bool ALWAYS_INLINE read(char & c) + { + if (peek(c)) + { + ++pos; + return true; + } + + return false; + } + void ALWAYS_INLINE readStrict(char & c) { if (read(c)) @@ -207,5 +219,39 @@ private: using ReadBufferPtr = std::shared_ptr; +/// Due to inconsistencies in ReadBuffer-family interfaces: +/// - some require to fully wrap underlying buffer and own it, +/// - some just wrap the reference without ownership, +/// we need to be able to wrap reference-only buffers with movable transparent proxy-buffer. +/// The uniqueness of such wraps is responsibility of the code author. +inline std::unique_ptr wrapReadBufferReference(ReadBuffer & buf) +{ + class ReadBufferWrapper : public ReadBuffer + { + public: + explicit ReadBufferWrapper(ReadBuffer & buf_) : ReadBuffer(buf_.position(), 0), buf(buf_) + { + working_buffer = Buffer(buf.position(), buf.buffer().end()); + } + + private: + ReadBuffer & buf; + + bool nextImpl() override + { + buf.position() = position(); + + if (!buf.next()) + return false; + + working_buffer = buf.buffer(); + + return true; + } + }; + + return std::make_unique(buf); +} + } diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp index 2c13446e693..59f0dc25667 100644 --- a/src/IO/ReadBufferFromPocoSocket.cpp +++ b/src/IO/ReadBufferFromPocoSocket.cpp @@ -78,7 +78,7 @@ ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, { } -bool ReadBufferFromPocoSocket::poll(size_t timeout_microseconds) +bool ReadBufferFromPocoSocket::poll(size_t timeout_microseconds) const { return available() || socket.poll(timeout_microseconds, Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_ERROR); } diff --git a/src/IO/ReadBufferFromPocoSocket.h b/src/IO/ReadBufferFromPocoSocket.h index 8064cd39246..d182d48d1f8 100644 --- a/src/IO/ReadBufferFromPocoSocket.h +++ b/src/IO/ReadBufferFromPocoSocket.h @@ -1,15 +1,14 @@ #pragma once -#include - -#include #include +#include + +#include namespace DB { -/** Works with the ready Poco::Net::Socket. Blocking operations. - */ +/// Works with the ready Poco::Net::Socket. Blocking operations. class ReadBufferFromPocoSocket : public BufferWithOwnMemory { protected: @@ -24,9 +23,9 @@ protected: bool nextImpl() override; public: - ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); + explicit ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); - bool poll(size_t timeout_microseconds); + bool poll(size_t timeout_microseconds) const; void setAsyncCallback(std::function async_callback_) { async_callback = std::move(async_callback_); } diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index baa12297718..fe563021d2e 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -1050,6 +1050,25 @@ void readAndThrowException(ReadBuffer & buf, const String & additional_message) } +void skipToCarriageReturnOrEOF(ReadBuffer & buf) +{ + while (!buf.eof()) + { + char * next_pos = find_first_symbols<'\r'>(buf.position(), buf.buffer().end()); + buf.position() = next_pos; + + if (!buf.hasPendingData()) + continue; + + if (*buf.position() == '\r') + { + ++buf.position(); + return; + } + } +} + + void skipToNextLineOrEOF(ReadBuffer & buf) { while (!buf.eof()) diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 4482667f447..d203bd7bbee 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -536,7 +536,7 @@ void parseUUID(const UInt8 * src36, std::reverse_iterator dst16); void parseUUIDWithoutSeparator(const UInt8 * src36, std::reverse_iterator dst16); template -void formatHex(IteratorSrc src, IteratorDst dst, const size_t num_bytes); +void formatHex(IteratorSrc src, IteratorDst dst, size_t num_bytes); template @@ -1046,10 +1046,14 @@ void readText(std::vector & x, ReadBuffer & buf) /// Skip whitespace characters. -inline void skipWhitespaceIfAny(ReadBuffer & buf) +inline void skipWhitespaceIfAny(ReadBuffer & buf, bool one_line = false) { - while (!buf.eof() && isWhitespaceASCII(*buf.position())) - ++buf.position(); + if (!one_line) + while (!buf.eof() && isWhitespaceASCII(*buf.position())) + ++buf.position(); + else + while (!buf.eof() && isWhitespaceASCIIOneLine(*buf.position())) + ++buf.position(); } /// Skips json value. @@ -1212,6 +1216,9 @@ inline void skipBOMIfExists(ReadBuffer & buf) /// Skip to next character after next \n. If no \n in stream, skip to end. void skipToNextLineOrEOF(ReadBuffer & buf); +/// Skip to next character after next \r. If no \r in stream, skip to end. +void skipToCarriageReturnOrEOF(ReadBuffer & buf); + /// Skip to next character after next unescaped \n. If no \n in stream, skip to end. Does not throw on invalid escape sequences. void skipToUnescapedNextLineOrEOF(ReadBuffer & buf); diff --git a/src/IO/ya.make b/src/IO/ya.make index 2ef8bd0a986..980719aa74f 100644 --- a/src/IO/ya.make +++ b/src/IO/ya.make @@ -26,6 +26,7 @@ SRCS( CascadeWriteBuffer.cpp CompressionMethod.cpp DoubleConverter.cpp + HTTPChunkedReadBuffer.cpp HTTPCommon.cpp HashingWriteBuffer.cpp HexWriteBuffer.cpp @@ -56,7 +57,6 @@ SRCS( WriteBufferFromFileDescriptor.cpp WriteBufferFromFileDescriptorDiscardOnFailure.cpp WriteBufferFromHTTP.cpp - WriteBufferFromHTTPServerResponse.cpp WriteBufferFromOStream.cpp WriteBufferFromPocoSocket.cpp WriteBufferFromTemporaryFile.cpp diff --git a/src/Interpreters/InterserverIOHandler.h b/src/Interpreters/InterserverIOHandler.h index 6d62c9651ca..db95a00d0f7 100644 --- a/src/Interpreters/InterserverIOHandler.h +++ b/src/Interpreters/InterserverIOHandler.h @@ -8,13 +8,13 @@ #include #include #include -#include -#include -#include -#include + #include -namespace Poco { namespace Net { class HTTPServerResponse; } } +#include +#include +#include +#include namespace DB { @@ -25,13 +25,16 @@ namespace ErrorCodes extern const int NO_SUCH_INTERSERVER_IO_ENDPOINT; } +class HTMLForm; +class HTTPServerResponse; + /** Query processor from other servers. */ class InterserverIOEndpoint { public: virtual std::string getId(const std::string & path) const = 0; - virtual void processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & body, WriteBuffer & out, Poco::Net::HTTPServerResponse & response) = 0; + virtual void processQuery(const HTMLForm & params, ReadBuffer & body, WriteBuffer & out, HTTPServerResponse & response) = 0; virtual ~InterserverIOEndpoint() = default; /// You need to stop the data transfer if blocker is activated. diff --git a/src/Server/HTTP/HTMLForm.cpp b/src/Server/HTTP/HTMLForm.cpp new file mode 100644 index 00000000000..ca407858c33 --- /dev/null +++ b/src/Server/HTTP/HTMLForm.cpp @@ -0,0 +1,381 @@ +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ + +namespace +{ + +class NullPartHandler : public HTMLForm::PartHandler +{ +public: + void handlePart(const Poco::Net::MessageHeader &, ReadBuffer &) override {} +}; + +} + +const std::string HTMLForm::ENCODING_URL = "application/x-www-form-urlencoded"; +const std::string HTMLForm::ENCODING_MULTIPART = "multipart/form-data"; +const int HTMLForm::UNKNOWN_CONTENT_LENGTH = -1; + + +HTMLForm::HTMLForm() : field_limit(DFL_FIELD_LIMIT), value_length_limit(DFL_MAX_VALUE_LENGTH), encoding(ENCODING_URL) +{ +} + + +HTMLForm::HTMLForm(const std::string & encoding_) + : field_limit(DFL_FIELD_LIMIT), value_length_limit(DFL_MAX_VALUE_LENGTH), encoding(encoding_) +{ +} + + +HTMLForm::HTMLForm(const Poco::Net::HTTPRequest & request, ReadBuffer & requestBody, PartHandler & handler) + : field_limit(DFL_FIELD_LIMIT), value_length_limit(DFL_MAX_VALUE_LENGTH) +{ + load(request, requestBody, handler); +} + + +HTMLForm::HTMLForm(const Poco::Net::HTTPRequest & request, ReadBuffer & requestBody) + : field_limit(DFL_FIELD_LIMIT), value_length_limit(DFL_MAX_VALUE_LENGTH) +{ + load(request, requestBody); +} + + +HTMLForm::HTMLForm(const Poco::Net::HTTPRequest & request) : HTMLForm(Poco::URI(request.getURI())) +{ +} + +HTMLForm::HTMLForm(const Poco::URI & uri) : field_limit(DFL_FIELD_LIMIT), value_length_limit(DFL_MAX_VALUE_LENGTH) +{ + ReadBufferFromString istr(uri.getRawQuery()); // STYLE_CHECK_ALLOW_STD_STRING_STREAM + readQuery(istr); +} + + +void HTMLForm::setEncoding(const std::string & encoding_) +{ + encoding = encoding_; +} + + +void HTMLForm::addPart(const std::string & name, Poco::Net::PartSource * source) +{ + poco_check_ptr(source); + + Part part; + part.name = name; + part.source = std::unique_ptr(source); + parts.push_back(std::move(part)); +} + + +void HTMLForm::load(const Poco::Net::HTTPRequest & request, ReadBuffer & requestBody, PartHandler & handler) +{ + clear(); + + Poco::URI uri(request.getURI()); + const std::string & query = uri.getRawQuery(); + if (!query.empty()) + { + ReadBufferFromString istr(query); + readQuery(istr); + } + + if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST || request.getMethod() == Poco::Net::HTTPRequest::HTTP_PUT) + { + std::string media_type; + NameValueCollection params; + Poco::Net::MessageHeader::splitParameters(request.getContentType(), media_type, params); + encoding = media_type; + if (encoding == ENCODING_MULTIPART) + { + boundary = params["boundary"]; + readMultipart(requestBody, handler); + } + else + { + readQuery(requestBody); + } + } +} + + +void HTMLForm::load(const Poco::Net::HTTPRequest & request, ReadBuffer & requestBody) +{ + NullPartHandler nah; + load(request, requestBody, nah); +} + + +void HTMLForm::load(const Poco::Net::HTTPRequest & request) +{ + NullPartHandler nah; + EmptyReadBuffer nis; + load(request, nis, nah); +} + + +void HTMLForm::read(ReadBuffer & in, PartHandler & handler) +{ + if (encoding == ENCODING_URL) + readQuery(in); + else + readMultipart(in, handler); +} + + +void HTMLForm::read(ReadBuffer & in) +{ + readQuery(in); +} + + +void HTMLForm::read(const std::string & queryString) +{ + ReadBufferFromString istr(queryString); + readQuery(istr); +} + + +void HTMLForm::readQuery(ReadBuffer & in) +{ + size_t fields = 0; + char ch = 0; // silence "uninitialized" warning from gcc-* + bool is_first = true; + + while (true) + { + if (field_limit > 0 && fields == field_limit) + throw Poco::Net::HTMLFormException("Too many form fields"); + + std::string name; + std::string value; + + while (in.read(ch) && ch != '=' && ch != '&') + { + if (ch == '+') + ch = ' '; + if (name.size() < MAX_NAME_LENGTH) + name += ch; + else + throw Poco::Net::HTMLFormException("Field name too long"); + } + + if (ch == '=') + { + while (in.read(ch) && ch != '&') + { + if (ch == '+') + ch = ' '; + if (value.size() < value_length_limit) + value += ch; + else + throw Poco::Net::HTMLFormException("Field value too long"); + } + } + + // Remove UTF-8 BOM from first name, if present + if (is_first) + Poco::UTF8::removeBOM(name); + + std::string decoded_name; + std::string decoded_value; + Poco::URI::decode(name, decoded_name); + Poco::URI::decode(value, decoded_value); + add(decoded_name, decoded_value); + ++fields; + + is_first = false; + + if (in.eof()) + break; + } +} + + +void HTMLForm::readMultipart(ReadBuffer & in_, PartHandler & handler) +{ + /// Assume there is always a boundary provided. + assert(!boundary.empty()); + + size_t fields = 0; + MultipartReadBuffer in(in_, boundary); + + /// Assume there is at least one part + in.skipToNextBoundary(); + + /// Read each part until next boundary (or last boundary) + while (!in.eof()) + { + if (field_limit && fields > field_limit) + throw Poco::Net::HTMLFormException("Too many form fields"); + + Poco::Net::MessageHeader header; + readHeaders(header, in); + skipToNextLineOrEOF(in); + + NameValueCollection params; + if (header.has("Content-Disposition")) + { + std::string unused; + Poco::Net::MessageHeader::splitParameters(header.get("Content-Disposition"), unused, params); + } + + if (params.has("filename")) + handler.handlePart(header, in); + else + { + std::string name = params["name"]; + std::string value; + char ch; + + while (in.read(ch)) + { + if (value.size() > value_length_limit) + throw Poco::Net::HTMLFormException("Field value too long"); + value += ch; + } + + add(name, value); + } + + ++fields; + + /// If we already encountered EOF for the buffer |in|, it's possible that the next symbol is a start of boundary line. + /// In this case reading the boundary line will reset the EOF state, potentially breaking invariant of EOF idempotency - + /// if there is such invariant in the first place. + if (!in.skipToNextBoundary()) + break; + } +} + + +void HTMLForm::setFieldLimit(int limit) +{ + poco_assert(limit >= 0); + + field_limit = limit; +} + + +void HTMLForm::setValueLengthLimit(int limit) +{ + poco_assert(limit >= 0); + + value_length_limit = limit; +} + + +HTMLForm::MultipartReadBuffer::MultipartReadBuffer(ReadBuffer & in_, const std::string & boundary_) + : ReadBuffer(nullptr, 0), in(in_), boundary("--" + boundary_) +{ + /// For consistency with |nextImpl()| + position() = in.position(); +} + +bool HTMLForm::MultipartReadBuffer::skipToNextBoundary() +{ + assert(working_buffer.empty() || eof()); + assert(boundary_hit); + + boundary_hit = false; + + while (!in.eof()) + { + auto line = readLine(); + if (startsWith(line, boundary)) + { + set(in.position(), 0); + next(); /// We need to restrict our buffer to size of next available line. + return !startsWith(line, boundary + "--"); + } + } + + throw Poco::Net::HTMLFormException("No boundary line found"); +} + +std::string HTMLForm::MultipartReadBuffer::readLine(bool strict) +{ + std::string line; + char ch = 0; // silence "uninitialized" warning from gcc-* + + while (in.read(ch) && ch != '\r' && ch != '\n') + line += ch; + + if (in.eof()) + { + if (strict) + throw Poco::Net::HTMLFormException("Unexpected end of message"); + return line; + } + + line += ch; + + if (ch == '\r') + { + if (!in.read(ch) || ch != '\n') + throw Poco::Net::HTMLFormException("No CRLF found"); + else + line += ch; + } + + return line; +} + +bool HTMLForm::MultipartReadBuffer::nextImpl() +{ + if (boundary_hit) + return false; + + assert(position() >= in.position()); + + in.position() = position(); + + /// We expect to start from the first symbol after EOL, so we can put checkpoint + /// and safely try to read til the next EOL and check for boundary. + in.setCheckpoint(); + + /// FIXME: there is an extra copy because we cannot traverse PeekableBuffer from checkpoint to position() + /// since it may store different data parts in different sub-buffers, + /// anyway calling makeContinuousMemoryFromCheckpointToPos() will also make an extra copy. + std::string line = readLine(false); + + /// According to RFC2046 the preceding CRLF is a part of boundary line. + if (line == "\r\n") + { + line = readLine(false); + boundary_hit = startsWith(line, boundary); + if (!boundary_hit) line = "\r\n"; + } + else + boundary_hit = startsWith(line, boundary); + + in.rollbackToCheckpoint(true); + + /// Rolling back to checkpoint may change underlying buffers. + /// Limit readable data to a single line. + BufferBase::set(in.position(), line.size(), 0); + + return !boundary_hit && !line.empty(); +} + +} diff --git a/src/Server/HTTP/HTMLForm.h b/src/Server/HTTP/HTMLForm.h new file mode 100644 index 00000000000..27be712e1d5 --- /dev/null +++ b/src/Server/HTTP/HTMLForm.h @@ -0,0 +1,175 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + +namespace DB +{ + +class HTMLForm : public Poco::Net::NameValueCollection, private boost::noncopyable +{ +public: + class PartHandler; + + enum Options + { + OPT_USE_CONTENT_LENGTH = 0x01 // don't use Chunked Transfer-Encoding for multipart requests. + }; + + /// Creates an empty HTMLForm and sets the + /// encoding to "application/x-www-form-urlencoded". + HTMLForm(); + + /// Creates an empty HTMLForm that uses the given encoding. + /// Encoding must be either "application/x-www-form-urlencoded" (which is the default) or "multipart/form-data". + explicit HTMLForm(const std::string & encoding); + + /// Creates a HTMLForm from the given HTTP request. + /// Uploaded files are passed to the given PartHandler. + HTMLForm(const Poco::Net::HTTPRequest & request, ReadBuffer & requestBody, PartHandler & handler); + + /// Creates a HTMLForm from the given HTTP request. + /// Uploaded files are silently discarded. + HTMLForm(const Poco::Net::HTTPRequest & request, ReadBuffer & requestBody); + + /// Creates a HTMLForm from the given HTTP request. + /// The request must be a GET request and the form data must be in the query string (URL encoded). + /// For POST requests, you must use one of the constructors taking an additional input stream for the request body. + explicit HTMLForm(const Poco::Net::HTTPRequest & request); + + explicit HTMLForm(const Poco::URI & uri); + + template + T getParsed(const std::string & key, T default_value) + { + auto it = find(key); + return (it != end()) ? DB::parse(it->second) : default_value; + } + + template + T getParsed(const std::string & key) + { + return DB::parse(get(key)); + } + + /// Sets the encoding used for posting the form. + /// Encoding must be either "application/x-www-form-urlencoded" (which is the default) or "multipart/form-data". + void setEncoding(const std::string & encoding); + + /// Returns the encoding used for posting the form. + const std::string & getEncoding() const { return encoding; } + + /// Adds an part/attachment (file upload) to the form. + /// The form takes ownership of the PartSource and deletes it when it is no longer needed. + /// The part will only be sent if the encoding set for the form is "multipart/form-data" + void addPart(const std::string & name, Poco::Net::PartSource * pSource); + + /// Reads the form data from the given HTTP request. + /// Uploaded files are passed to the given PartHandler. + void load(const Poco::Net::HTTPRequest & request, ReadBuffer & requestBody, PartHandler & handler); + + /// Reads the form data from the given HTTP request. + /// Uploaded files are silently discarded. + void load(const Poco::Net::HTTPRequest & request, ReadBuffer & requestBody); + + /// Reads the form data from the given HTTP request. + /// The request must be a GET request and the form data must be in the query string (URL encoded). + /// For POST requests, you must use one of the overloads taking an additional input stream for the request body. + void load(const Poco::Net::HTTPRequest & request); + + /// Reads the form data from the given input stream. + /// The form data read from the stream must be in the encoding specified for the form. + /// Note that read() does not clear the form before reading the new values. + void read(ReadBuffer & in, PartHandler & handler); + + /// Reads the URL-encoded form data from the given input stream. + /// Note that read() does not clear the form before reading the new values. + void read(ReadBuffer & in); + + /// Reads the form data from the given HTTP query string. + /// Note that read() does not clear the form before reading the new values. + void read(const std::string & queryString); + + /// Returns the MIME boundary used for writing multipart form data. + const std::string & getBoundary() const { return boundary; } + + /// Returns the maximum number of header fields allowed. + /// See setFieldLimit() for more information. + int getFieldLimit() const { return field_limit; } + + /// Sets the maximum number of header fields allowed. This limit is used to defend certain kinds of denial-of-service attacks. + /// Specify 0 for unlimited (not recommended). The default limit is 100. + void setFieldLimit(int limit); + + /// Sets the maximum size for form field values stored as strings. + void setValueLengthLimit(int limit); + + /// Returns the maximum size for form field values stored as strings. + int getValueLengthLimit() const { return value_length_limit; } + + static const std::string ENCODING_URL; /// "application/x-www-form-urlencoded" + static const std::string ENCODING_MULTIPART; /// "multipart/form-data" + static const int UNKNOWN_CONTENT_LENGTH; + +protected: + void readQuery(ReadBuffer & in); + void readMultipart(ReadBuffer & in, PartHandler & handler); + +private: + /// This buffer provides data line by line to check for boundary line in a convenient way. + class MultipartReadBuffer; + + enum Limits + { + DFL_FIELD_LIMIT = 100, + MAX_NAME_LENGTH = 1024, + DFL_MAX_VALUE_LENGTH = 256 * 1024 + }; + + struct Part + { + std::string name; + std::unique_ptr source; + }; + + using PartVec = std::vector; + + size_t field_limit; + size_t value_length_limit; + std::string encoding; + std::string boundary; + PartVec parts; +}; + +class HTMLForm::PartHandler +{ +public: + virtual ~PartHandler() = default; + virtual void handlePart(const Poco::Net::MessageHeader &, ReadBuffer &) = 0; +}; + +class HTMLForm::MultipartReadBuffer : public ReadBuffer +{ +public: + MultipartReadBuffer(ReadBuffer & in, const std::string & boundary); + + /// Returns false if last boundary found. + bool skipToNextBoundary(); + +private: + PeekableReadBuffer in; + const std::string boundary; + bool boundary_hit = true; + + std::string readLine(bool strict = true); + + bool nextImpl() override; +}; + +} diff --git a/src/Server/HTTP/HTTPRequest.h b/src/Server/HTTP/HTTPRequest.h new file mode 100644 index 00000000000..40839cbcdd2 --- /dev/null +++ b/src/Server/HTTP/HTTPRequest.h @@ -0,0 +1,10 @@ +#pragma once + +#include + +namespace DB +{ + +using HTTPRequest = Poco::Net::HTTPRequest; + +} diff --git a/src/Server/HTTP/HTTPRequestHandler.h b/src/Server/HTTP/HTTPRequestHandler.h new file mode 100644 index 00000000000..19340866bb7 --- /dev/null +++ b/src/Server/HTTP/HTTPRequestHandler.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include + +#include + +namespace DB +{ + +class HTTPRequestHandler : private boost::noncopyable +{ +public: + virtual ~HTTPRequestHandler() = default; + + virtual void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) = 0; +}; + +} diff --git a/src/Server/HTTP/HTTPRequestHandlerFactory.h b/src/Server/HTTP/HTTPRequestHandlerFactory.h new file mode 100644 index 00000000000..3d50bf0a2ed --- /dev/null +++ b/src/Server/HTTP/HTTPRequestHandlerFactory.h @@ -0,0 +1,20 @@ +#pragma once + +#include + +#include + +namespace DB +{ + +class HTTPRequestHandlerFactory : private boost::noncopyable +{ +public: + virtual ~HTTPRequestHandlerFactory() = default; + + virtual std::unique_ptr createRequestHandler(const HTTPServerRequest & request) = 0; +}; + +using HTTPRequestHandlerFactoryPtr = std::shared_ptr; + +} diff --git a/src/Server/HTTP/HTTPResponse.h b/src/Server/HTTP/HTTPResponse.h new file mode 100644 index 00000000000..c73bcec6c39 --- /dev/null +++ b/src/Server/HTTP/HTTPResponse.h @@ -0,0 +1,10 @@ +#pragma once + +#include + +namespace DB +{ + +using HTTPResponse = Poco::Net::HTTPResponse; + +} diff --git a/src/Server/HTTP/HTTPServer.cpp b/src/Server/HTTP/HTTPServer.cpp new file mode 100644 index 00000000000..3e050080bdd --- /dev/null +++ b/src/Server/HTTP/HTTPServer.cpp @@ -0,0 +1,48 @@ +#include + +#include + + +namespace DB +{ +HTTPServer::HTTPServer( + const Context & context, + HTTPRequestHandlerFactoryPtr factory_, + UInt16 portNumber, + Poco::Net::HTTPServerParams::Ptr params) + : TCPServer(new HTTPServerConnectionFactory(context, params, factory_), portNumber, params), factory(factory_) +{ +} + +HTTPServer::HTTPServer( + const Context & context, + HTTPRequestHandlerFactoryPtr factory_, + const Poco::Net::ServerSocket & socket, + Poco::Net::HTTPServerParams::Ptr params) + : TCPServer(new HTTPServerConnectionFactory(context, params, factory_), socket, params), factory(factory_) +{ +} + +HTTPServer::HTTPServer( + const Context & context, + HTTPRequestHandlerFactoryPtr factory_, + Poco::ThreadPool & threadPool, + const Poco::Net::ServerSocket & socket, + Poco::Net::HTTPServerParams::Ptr params) + : TCPServer(new HTTPServerConnectionFactory(context, params, factory_), threadPool, socket, params), factory(factory_) +{ +} + +HTTPServer::~HTTPServer() +{ + /// We should call stop and join thread here instead of destructor of parent TCPHandler, + /// because there's possible race on 'vptr' between this virtual destructor and 'run' method. + stop(); +} + +void HTTPServer::stopAll(bool /* abortCurrent */) +{ + stop(); +} + +} diff --git a/src/Server/HTTP/HTTPServer.h b/src/Server/HTTP/HTTPServer.h new file mode 100644 index 00000000000..1ce62c65ca2 --- /dev/null +++ b/src/Server/HTTP/HTTPServer.h @@ -0,0 +1,46 @@ +#pragma once + +#include + +#include +#include + +#include + + +namespace DB +{ + +class Context; + +class HTTPServer : public Poco::Net::TCPServer +{ +public: + explicit HTTPServer( + const Context & context, + HTTPRequestHandlerFactoryPtr factory, + UInt16 portNumber = 80, + Poco::Net::HTTPServerParams::Ptr params = new Poco::Net::HTTPServerParams); + + HTTPServer( + const Context & context, + HTTPRequestHandlerFactoryPtr factory, + const Poco::Net::ServerSocket & socket, + Poco::Net::HTTPServerParams::Ptr params); + + HTTPServer( + const Context & context, + HTTPRequestHandlerFactoryPtr factory, + Poco::ThreadPool & threadPool, + const Poco::Net::ServerSocket & socket, + Poco::Net::HTTPServerParams::Ptr params); + + ~HTTPServer() override; + + void stopAll(bool abortCurrent = false); + +private: + HTTPRequestHandlerFactoryPtr factory; +}; + +} diff --git a/src/Server/HTTP/HTTPServerConnection.cpp b/src/Server/HTTP/HTTPServerConnection.cpp new file mode 100644 index 00000000000..e2ee4c8882b --- /dev/null +++ b/src/Server/HTTP/HTTPServerConnection.cpp @@ -0,0 +1,128 @@ +#include + +#include + +namespace DB +{ + +HTTPServerConnection::HTTPServerConnection( + const Context & context_, + const Poco::Net::StreamSocket & socket, + Poco::Net::HTTPServerParams::Ptr params_, + HTTPRequestHandlerFactoryPtr factory_) + : TCPServerConnection(socket), context(context_), params(params_), factory(factory_), stopped(false) +{ + poco_check_ptr(factory); +} + +void HTTPServerConnection::run() +{ + std::string server = params->getSoftwareVersion(); + Poco::Net::HTTPServerSession session(socket(), params); + + while (!stopped && session.hasMoreRequests()) + { + try + { + std::unique_lock lock(mutex); + if (!stopped) + { + HTTPServerResponse response(session); + HTTPServerRequest request(context, response, session); + + Poco::Timestamp now; + response.setDate(now); + response.setVersion(request.getVersion()); + response.setKeepAlive(params->getKeepAlive() && request.getKeepAlive() && session.canKeepAlive()); + if (!server.empty()) + response.set("Server", server); + try + { + std::unique_ptr handler(factory->createRequestHandler(request)); + + if (handler) + { + if (request.getExpectContinue() && response.getStatus() == Poco::Net::HTTPResponse::HTTP_OK) + response.sendContinue(); + + handler->handleRequest(request, response); + session.setKeepAlive(params->getKeepAlive() && response.getKeepAlive() && session.canKeepAlive()); + } + else + sendErrorResponse(session, Poco::Net::HTTPResponse::HTTP_NOT_IMPLEMENTED); + } + catch (Poco::Exception &) + { + if (!response.sent()) + { + try + { + sendErrorResponse(session, Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); + } + catch (...) + { + } + } + throw; + } + } + } + catch (Poco::Net::NoMessageException &) + { + break; + } + catch (Poco::Net::MessageException &) + { + sendErrorResponse(session, Poco::Net::HTTPResponse::HTTP_BAD_REQUEST); + } + catch (Poco::Exception &) + { + if (session.networkException()) + { + session.networkException()->rethrow(); + } + else + throw; + } + } +} + +// static +void HTTPServerConnection::sendErrorResponse(Poco::Net::HTTPServerSession & session, Poco::Net::HTTPResponse::HTTPStatus status) +{ + HTTPServerResponse response(session); + response.setVersion(Poco::Net::HTTPMessage::HTTP_1_1); + response.setStatusAndReason(status); + response.setKeepAlive(false); + response.send(); + session.setKeepAlive(false); +} + +void HTTPServerConnection::onServerStopped(const bool & abortCurrent) +{ + stopped = true; + if (abortCurrent) + { + try + { + socket().shutdown(); + } + catch (...) + { + } + } + else + { + std::unique_lock lock(mutex); + + try + { + socket().shutdown(); + } + catch (...) + { + } + } +} + +} diff --git a/src/Server/HTTP/HTTPServerConnection.h b/src/Server/HTTP/HTTPServerConnection.h new file mode 100644 index 00000000000..589c33025bf --- /dev/null +++ b/src/Server/HTTP/HTTPServerConnection.h @@ -0,0 +1,36 @@ +#pragma once + +#include +#include + +#include +#include +#include + +namespace DB +{ + +class HTTPServerConnection : public Poco::Net::TCPServerConnection +{ +public: + HTTPServerConnection( + const Context & context, + const Poco::Net::StreamSocket & socket, + Poco::Net::HTTPServerParams::Ptr params, + HTTPRequestHandlerFactoryPtr factory); + + void run() override; + +protected: + static void sendErrorResponse(Poco::Net::HTTPServerSession & session, Poco::Net::HTTPResponse::HTTPStatus status); + void onServerStopped(const bool & abortCurrent); + +private: + Context context; + Poco::Net::HTTPServerParams::Ptr params; + HTTPRequestHandlerFactoryPtr factory; + bool stopped; + std::mutex mutex; // guards the |factory| with assumption that creating handlers is not thread-safe. +}; + +} diff --git a/src/Server/HTTP/HTTPServerConnectionFactory.cpp b/src/Server/HTTP/HTTPServerConnectionFactory.cpp new file mode 100644 index 00000000000..876ccb9096b --- /dev/null +++ b/src/Server/HTTP/HTTPServerConnectionFactory.cpp @@ -0,0 +1,19 @@ +#include + +#include + +namespace DB +{ +HTTPServerConnectionFactory::HTTPServerConnectionFactory( + const Context & context_, Poco::Net::HTTPServerParams::Ptr params_, HTTPRequestHandlerFactoryPtr factory_) + : context(context_), params(params_), factory(factory_) +{ + poco_check_ptr(factory); +} + +Poco::Net::TCPServerConnection * HTTPServerConnectionFactory::createConnection(const Poco::Net::StreamSocket & socket) +{ + return new HTTPServerConnection(context, socket, params, factory); +} + +} diff --git a/src/Server/HTTP/HTTPServerConnectionFactory.h b/src/Server/HTTP/HTTPServerConnectionFactory.h new file mode 100644 index 00000000000..4f8ca43cbfb --- /dev/null +++ b/src/Server/HTTP/HTTPServerConnectionFactory.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include + +#include +#include + +namespace DB +{ + +class HTTPServerConnectionFactory : public Poco::Net::TCPServerConnectionFactory +{ +public: + HTTPServerConnectionFactory(const Context & context, Poco::Net::HTTPServerParams::Ptr params, HTTPRequestHandlerFactoryPtr factory); + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket) override; + +private: + Context context; + Poco::Net::HTTPServerParams::Ptr params; + HTTPRequestHandlerFactoryPtr factory; +}; + +} diff --git a/src/Server/HTTP/HTTPServerRequest.cpp b/src/Server/HTTP/HTTPServerRequest.cpp new file mode 100644 index 00000000000..bdba6a51d91 --- /dev/null +++ b/src/Server/HTTP/HTTPServerRequest.cpp @@ -0,0 +1,123 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace DB +{ + +HTTPServerRequest::HTTPServerRequest(const Context & context, HTTPServerResponse & response, Poco::Net::HTTPServerSession & session) +{ + response.attachRequest(this); + + /// Now that we know socket is still connected, obtain addresses + client_address = session.clientAddress(); + server_address = session.serverAddress(); + + auto receive_timeout = context.getSettingsRef().http_receive_timeout; + auto send_timeout = context.getSettingsRef().http_send_timeout; + auto max_query_size = context.getSettingsRef().max_query_size; + + session.socket().setReceiveTimeout(receive_timeout); + session.socket().setSendTimeout(send_timeout); + + auto in = std::make_unique(session.socket()); + socket = session.socket().impl(); + + readRequest(*in); /// Try parse according to RFC7230 + + if (getChunkedTransferEncoding()) + stream = std::make_unique(std::move(in), max_query_size); + else if (hasContentLength()) + stream = std::make_unique(std::move(in), getContentLength(), false); + else if (getMethod() != HTTPRequest::HTTP_GET && getMethod() != HTTPRequest::HTTP_HEAD && getMethod() != HTTPRequest::HTTP_DELETE) + stream = std::move(in); + else + /// We have to distinguish empty buffer and nullptr. + stream = std::make_unique(); +} + +bool HTTPServerRequest::checkPeerConnected() const +{ + try + { + char b; + if (!socket->receiveBytes(&b, 1, MSG_DONTWAIT | MSG_PEEK)) + return false; + } + catch (Poco::TimeoutException &) + { + } + catch (...) + { + return false; + } + + return true; +} + +void HTTPServerRequest::readRequest(ReadBuffer & in) +{ + char ch; + std::string method; + std::string uri; + std::string version; + + method.reserve(16); + uri.reserve(64); + version.reserve(16); + + if (in.eof()) + throw Poco::Net::NoMessageException(); + + skipWhitespaceIfAny(in); + + if (in.eof()) + throw Poco::Net::MessageException("No HTTP request header"); + + while (in.read(ch) && !Poco::Ascii::isSpace(ch) && method.size() <= MAX_METHOD_LENGTH) + method += ch; + + if (method.size() > MAX_METHOD_LENGTH) + throw Poco::Net::MessageException("HTTP request method invalid or too long"); + + skipWhitespaceIfAny(in); + + while (in.read(ch) && !Poco::Ascii::isSpace(ch) && uri.size() <= MAX_URI_LENGTH) + uri += ch; + + if (uri.size() > MAX_URI_LENGTH) + throw Poco::Net::MessageException("HTTP request URI invalid or too long"); + + skipWhitespaceIfAny(in); + + while (in.read(ch) && !Poco::Ascii::isSpace(ch) && version.size() <= MAX_VERSION_LENGTH) + version += ch; + + if (version.size() > MAX_VERSION_LENGTH) + throw Poco::Net::MessageException("Invalid HTTP version string"); + + // since HTTP always use Windows-style EOL '\r\n' we always can safely skip to '\n' + + skipToNextLineOrEOF(in); + + readHeaders(*this, in); + + skipToNextLineOrEOF(in); + + setMethod(method); + setURI(uri); + setVersion(version); +} + +} diff --git a/src/Server/HTTP/HTTPServerRequest.h b/src/Server/HTTP/HTTPServerRequest.h new file mode 100644 index 00000000000..7fd54850212 --- /dev/null +++ b/src/Server/HTTP/HTTPServerRequest.h @@ -0,0 +1,59 @@ +#pragma once + +#include +#include + +#include + +namespace DB +{ + +class Context; +class HTTPServerResponse; +class ReadBufferFromPocoSocket; + +class HTTPServerRequest : public HTTPRequest +{ +public: + HTTPServerRequest(const Context & context, HTTPServerResponse & response, Poco::Net::HTTPServerSession & session); + + /// FIXME: it's a little bit inconvenient interface. The rationale is that all other ReadBuffer's wrap each other + /// via unique_ptr - but we can't inherit HTTPServerRequest from ReadBuffer and pass it around, + /// since we also need it in other places. + + /// Returns the input stream for reading the request body. + ReadBuffer & getStream() + { + poco_check_ptr(stream); + return *stream; + } + + bool checkPeerConnected() const; + + /// Returns the client's address. + const Poco::Net::SocketAddress & clientAddress() const { return client_address; } + + /// Returns the server's address. + const Poco::Net::SocketAddress & serverAddress() const { return server_address; } + +private: + /// Limits for basic sanity checks when reading a header + enum Limits + { + MAX_NAME_LENGTH = 256, + MAX_VALUE_LENGTH = 8192, + MAX_METHOD_LENGTH = 32, + MAX_URI_LENGTH = 16384, + MAX_VERSION_LENGTH = 8, + MAX_FIELDS_NUMBER = 100, + }; + + std::unique_ptr stream; + Poco::Net::SocketImpl * socket; + Poco::Net::SocketAddress client_address; + Poco::Net::SocketAddress server_address; + + void readRequest(ReadBuffer & in); +}; + +} diff --git a/src/Server/HTTP/HTTPServerResponse.cpp b/src/Server/HTTP/HTTPServerResponse.cpp new file mode 100644 index 00000000000..e3d52fffa80 --- /dev/null +++ b/src/Server/HTTP/HTTPServerResponse.cpp @@ -0,0 +1,163 @@ +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +HTTPServerResponse::HTTPServerResponse(Poco::Net::HTTPServerSession & session_) : session(session_) +{ +} + +void HTTPServerResponse::sendContinue() +{ + Poco::Net::HTTPHeaderOutputStream hs(session); + hs << getVersion() << " 100 Continue\r\n\r\n"; +} + +std::shared_ptr HTTPServerResponse::send() +{ + poco_assert(!stream); + + if ((request && request->getMethod() == HTTPRequest::HTTP_HEAD) || getStatus() < 200 || getStatus() == HTTPResponse::HTTP_NO_CONTENT + || getStatus() == HTTPResponse::HTTP_NOT_MODIFIED) + { + Poco::CountingOutputStream cs; + write(cs); + stream = std::make_shared(session, cs.chars()); + write(*stream); + } + else if (getChunkedTransferEncoding()) + { + Poco::Net::HTTPHeaderOutputStream hs(session); + write(hs); + stream = std::make_shared(session); + } + else if (hasContentLength()) + { + Poco::CountingOutputStream cs; + write(cs); + stream = std::make_shared(session, getContentLength64() + cs.chars()); + write(*stream); + } + else + { + stream = std::make_shared(session); + setKeepAlive(false); + write(*stream); + } + + return stream; +} + +std::pair, std::shared_ptr> HTTPServerResponse::beginSend() +{ + poco_assert(!stream); + poco_assert(!header_stream); + + /// NOTE: Code is not exception safe. + + if ((request && request->getMethod() == HTTPRequest::HTTP_HEAD) || getStatus() < 200 || getStatus() == HTTPResponse::HTTP_NO_CONTENT + || getStatus() == HTTPResponse::HTTP_NOT_MODIFIED) + { + throw Poco::Exception("HTTPServerResponse::beginSend is invalid for HEAD request"); + } + else if (getChunkedTransferEncoding()) + { + header_stream = std::make_shared(session); + beginWrite(*header_stream); + stream = std::make_shared(session); + } + else if (hasContentLength()) + { + throw Poco::Exception("HTTPServerResponse::beginSend is invalid for response with Content-Length header"); + } + else + { + stream = std::make_shared(session); + header_stream = stream; + setKeepAlive(false); + beginWrite(*stream); + } + + return std::make_pair(header_stream, stream); +} + +void HTTPServerResponse::sendFile(const std::string & path, const std::string & mediaType) +{ + poco_assert(!stream); + + Poco::File f(path); + Poco::Timestamp date_time = f.getLastModified(); + Poco::File::FileSize length = f.getSize(); + set("Last-Modified", Poco::DateTimeFormatter::format(date_time, Poco::DateTimeFormat::HTTP_FORMAT)); + setContentLength64(length); + setContentType(mediaType); + setChunkedTransferEncoding(false); + + Poco::FileInputStream istr(path); + if (istr.good()) + { + stream = std::make_shared(session); + write(*stream); + if (request && request->getMethod() != HTTPRequest::HTTP_HEAD) + { + Poco::StreamCopier::copyStream(istr, *stream); + } + } + else + throw Poco::OpenFileException(path); +} + +void HTTPServerResponse::sendBuffer(const void * buffer, std::size_t length) +{ + poco_assert(!stream); + + setContentLength(static_cast(length)); + setChunkedTransferEncoding(false); + + stream = std::make_shared(session); + write(*stream); + if (request && request->getMethod() != HTTPRequest::HTTP_HEAD) + { + stream->write(static_cast(buffer), static_cast(length)); + } +} + +void HTTPServerResponse::redirect(const std::string & uri, HTTPStatus status) +{ + poco_assert(!stream); + + setContentLength(0); + setChunkedTransferEncoding(false); + + setStatusAndReason(status); + set("Location", uri); + + stream = std::make_shared(session); + write(*stream); +} + +void HTTPServerResponse::requireAuthentication(const std::string & realm) +{ + poco_assert(!stream); + + setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED); + std::string auth("Basic realm=\""); + auth.append(realm); + auth.append("\""); + set("WWW-Authenticate", auth); +} + +} diff --git a/src/Server/HTTP/HTTPServerResponse.h b/src/Server/HTTP/HTTPServerResponse.h new file mode 100644 index 00000000000..82221ce3a83 --- /dev/null +++ b/src/Server/HTTP/HTTPServerResponse.h @@ -0,0 +1,91 @@ +#pragma once + +#include + +#include +#include + +#include +#include + +namespace DB +{ + +class HTTPServerRequest; + +class HTTPServerResponse : public HTTPResponse +{ +public: + explicit HTTPServerResponse(Poco::Net::HTTPServerSession & session); + + void sendContinue(); /// Sends a 100 Continue response to the client. + + /// Sends the response header to the client and + /// returns an output stream for sending the + /// response body. + /// + /// Must not be called after beginSend(), sendFile(), sendBuffer() + /// or redirect() has been called. + std::shared_ptr send(); /// TODO: use some WriteBuffer implementation here. + + /// Sends the response headers to the client + /// but do not finish headers with \r\n, + /// allowing to continue sending additional header fields. + /// + /// Must not be called after send(), sendFile(), sendBuffer() + /// or redirect() has been called. + std::pair, std::shared_ptr> beginSend(); /// TODO: use some WriteBuffer implementation here. + + /// Sends the response header to the client, followed + /// by the content of the given file. + /// + /// Must not be called after send(), sendBuffer() + /// or redirect() has been called. + /// + /// Throws a FileNotFoundException if the file + /// cannot be found, or an OpenFileException if + /// the file cannot be opened. + void sendFile(const std::string & path, const std::string & mediaType); + + /// Sends the response header to the client, followed + /// by the contents of the given buffer. + /// + /// The Content-Length header of the response is set + /// to length and chunked transfer encoding is disabled. + /// + /// If both the HTTP message header and body (from the + /// given buffer) fit into one single network packet, the + /// complete response can be sent in one network packet. + /// + /// Must not be called after send(), sendFile() + /// or redirect() has been called. + void sendBuffer(const void * pBuffer, std::size_t length); /// FIXME: do we need this one? + + /// Sets the status code, which must be one of + /// HTTP_MOVED_PERMANENTLY (301), HTTP_FOUND (302), + /// or HTTP_SEE_OTHER (303), + /// and sets the "Location" header field + /// to the given URI, which according to + /// the HTTP specification, must be absolute. + /// + /// Must not be called after send() has been called. + void redirect(const std::string & uri, Poco::Net::HTTPResponse::HTTPStatus status = Poco::Net::HTTPResponse::HTTP_FOUND); + + void requireAuthentication(const std::string & realm); + /// Sets the status code to 401 (Unauthorized) + /// and sets the "WWW-Authenticate" header field + /// according to the given realm. + + /// Returns true if the response (header) has been sent. + bool sent() const { return !!stream; } + + void attachRequest(HTTPServerRequest * request_) { request = request_; } + +private: + Poco::Net::HTTPServerSession & session; + HTTPServerRequest * request; + std::shared_ptr stream; + std::shared_ptr header_stream; +}; + +} diff --git a/src/Server/HTTP/ReadHeaders.cpp b/src/Server/HTTP/ReadHeaders.cpp new file mode 100644 index 00000000000..77ec48c11b1 --- /dev/null +++ b/src/Server/HTTP/ReadHeaders.cpp @@ -0,0 +1,88 @@ +#include + +#include +#include + +#include + +namespace DB +{ + +void readHeaders( + Poco::Net::MessageHeader & headers, ReadBuffer & in, size_t max_fields_number, size_t max_name_length, size_t max_value_length) +{ + char ch = 0; // silence uninitialized warning from gcc-* + std::string name; + std::string value; + + name.reserve(32); + value.reserve(64); + + size_t fields = 0; + + while (true) + { + if (fields > max_fields_number) + throw Poco::Net::MessageException("Too many header fields"); + + name.clear(); + value.clear(); + + /// Field name + while (in.peek(ch) && ch != ':' && !Poco::Ascii::isSpace(ch) && name.size() <= max_name_length) + { + name += ch; + in.ignore(); + } + + if (in.eof()) + throw Poco::Net::MessageException("Field is invalid"); + + if (name.empty()) + { + if (ch == '\r') + /// Start of the empty-line delimiter + break; + if (ch == ':') + throw Poco::Net::MessageException("Field name is empty"); + } + else + { + if (name.size() > max_name_length) + throw Poco::Net::MessageException("Field name is too long"); + if (ch != ':') + throw Poco::Net::MessageException("Field name is invalid or no colon found"); + } + + in.ignore(); + + skipWhitespaceIfAny(in, true); + + if (in.eof()) + throw Poco::Net::MessageException("Field is invalid"); + + /// Field value - folded values not supported. + while (in.read(ch) && ch != '\r' && ch != '\n' && value.size() <= max_value_length) + value += ch; + + if (in.eof()) + throw Poco::Net::MessageException("Field is invalid"); + + if (value.empty()) + throw Poco::Net::MessageException("Field value is empty"); + + if (ch == '\n') + throw Poco::Net::MessageException("No CRLF found"); + + if (value.size() > max_value_length) + throw Poco::Net::MessageException("Field value is too long"); + + skipToNextLineOrEOF(in); + + Poco::trimRightInPlace(value); + headers.add(name, headers.decodeWord(value)); + ++fields; + } +} + +} diff --git a/src/Server/HTTP/ReadHeaders.h b/src/Server/HTTP/ReadHeaders.h new file mode 100644 index 00000000000..e94cddcf489 --- /dev/null +++ b/src/Server/HTTP/ReadHeaders.h @@ -0,0 +1,17 @@ +#pragma once + +#include + +namespace DB +{ + +class ReadBuffer; + +void readHeaders( + Poco::Net::MessageHeader & headers, + ReadBuffer & in, + size_t max_fields_number = 100, + size_t max_name_length = 256, + size_t max_value_length = 8192); + +} diff --git a/src/IO/WriteBufferFromHTTPServerResponse.cpp b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp similarity index 81% rename from src/IO/WriteBufferFromHTTPServerResponse.cpp rename to src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp index ac2eeac1652..86133fc2ffe 100644 --- a/src/IO/WriteBufferFromHTTPServerResponse.cpp +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp @@ -1,9 +1,8 @@ -#include -#include -#include -#include +#include + #include #include +#include #include #include #include @@ -13,6 +12,8 @@ # include #endif +#include + namespace DB { @@ -33,16 +34,13 @@ void WriteBufferFromHTTPServerResponse::startSendHeaders() setResponseDefaultHeaders(response, keep_alive_timeout); -#if defined(POCO_CLICKHOUSE_PATCH) - if (request.getMethod() != Poco::Net::HTTPRequest::HTTP_HEAD) + if (!is_http_method_head) std::tie(response_header_ostr, response_body_ostr) = response.beginSend(); -#endif } } void WriteBufferFromHTTPServerResponse::writeHeaderSummary() { -#if defined(POCO_CLICKHOUSE_PATCH) if (headers_finished_sending) return; @@ -51,12 +49,10 @@ void WriteBufferFromHTTPServerResponse::writeHeaderSummary() if (response_header_ostr) *response_header_ostr << "X-ClickHouse-Summary: " << progress_string_writer.str() << "\r\n" << std::flush; -#endif } void WriteBufferFromHTTPServerResponse::writeHeaderProgress() { -#if defined(POCO_CLICKHOUSE_PATCH) if (headers_finished_sending) return; @@ -65,7 +61,6 @@ void WriteBufferFromHTTPServerResponse::writeHeaderProgress() if (response_header_ostr) *response_header_ostr << "X-ClickHouse-Progress: " << progress_string_writer.str() << "\r\n" << std::flush; -#endif } void WriteBufferFromHTTPServerResponse::finishSendHeaders() @@ -75,23 +70,16 @@ void WriteBufferFromHTTPServerResponse::finishSendHeaders() writeHeaderSummary(); headers_finished_sending = true; - if (request.getMethod() != Poco::Net::HTTPRequest::HTTP_HEAD) + if (!is_http_method_head) { -#if defined(POCO_CLICKHOUSE_PATCH) /// Send end of headers delimiter. if (response_header_ostr) *response_header_ostr << "\r\n" << std::flush; -#else - /// Newline autosent by response.send() - /// if nothing to send in body: - if (!response_body_ostr) - response_body_ostr = &(response.send()); -#endif } else { if (!response_body_ostr) - response_body_ostr = &(response.send()); + response_body_ostr = response.send(); } } } @@ -104,23 +92,15 @@ void WriteBufferFromHTTPServerResponse::nextImpl() startSendHeaders(); - if (!out && request.getMethod() != Poco::Net::HTTPRequest::HTTP_HEAD) + if (!out && !is_http_method_head) { if (compress) { auto content_encoding_name = toContentEncodingName(compression_method); -#if defined(POCO_CLICKHOUSE_PATCH) *response_header_ostr << "Content-Encoding: " << content_encoding_name << "\r\n"; -#else - response.set("Content-Encoding", content_encoding_name); -#endif } -#if !defined(POCO_CLICKHOUSE_PATCH) - response_body_ostr = &(response.send()); -#endif - /// We reuse our buffer in "out" to avoid extra allocations and copies. if (compress) @@ -150,14 +130,14 @@ void WriteBufferFromHTTPServerResponse::nextImpl() WriteBufferFromHTTPServerResponse::WriteBufferFromHTTPServerResponse( - Poco::Net::HTTPServerRequest & request_, - Poco::Net::HTTPServerResponse & response_, + HTTPServerResponse & response_, + bool is_http_method_head_, unsigned keep_alive_timeout_, bool compress_, CompressionMethod compression_method_) : BufferWithOwnMemory(DBMS_DEFAULT_BUFFER_SIZE) - , request(request_) , response(response_) + , is_http_method_head(is_http_method_head_) , keep_alive_timeout(keep_alive_timeout_) , compress(compress_) , compression_method(compression_method_) diff --git a/src/IO/WriteBufferFromHTTPServerResponse.h b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h similarity index 86% rename from src/IO/WriteBufferFromHTTPServerResponse.h rename to src/Server/HTTP/WriteBufferFromHTTPServerResponse.h index 85a81c3dda7..b4ff454195f 100644 --- a/src/IO/WriteBufferFromHTTPServerResponse.h +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h @@ -1,31 +1,17 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include #include -#include +#include #include #include +#include +#include +#include #include #include -#if !defined(ARCADIA_BUILD) -# include -#endif - - -namespace Poco -{ - namespace Net - { - class HTTPServerResponse; - } -} +#include +#include namespace DB @@ -47,20 +33,17 @@ namespace DB class WriteBufferFromHTTPServerResponse final : public BufferWithOwnMemory { private: - Poco::Net::HTTPServerRequest & request; - Poco::Net::HTTPServerResponse & response; + HTTPServerResponse & response; + bool is_http_method_head; bool add_cors_header = false; unsigned keep_alive_timeout = 0; bool compress = false; CompressionMethod compression_method; int compression_level = 1; - std::ostream * response_body_ostr = nullptr; - -#if defined(POCO_CLICKHOUSE_PATCH) - std::ostream * response_header_ostr = nullptr; -#endif + std::shared_ptr response_body_ostr; + std::shared_ptr response_header_ostr; std::unique_ptr out; @@ -91,8 +74,8 @@ private: public: WriteBufferFromHTTPServerResponse( - Poco::Net::HTTPServerRequest & request_, - Poco::Net::HTTPServerResponse & response_, + HTTPServerResponse & response_, + bool is_http_method_head_, unsigned keep_alive_timeout_, bool compress_ = false, /// If true - set Content-Encoding header and compress the result. CompressionMethod compression_method_ = CompressionMethod::None); diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index e9a77c3b433..d200ee7421f 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -1,49 +1,47 @@ -#include "HTTPHandler.h" +#include -#include "HTTPHandlerFactory.h" -#include "HTTPHandlerRequestFilter.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include +#include +#include +#include +#include +#include +#include #include #include -#include -#include #include +#include +#include +#include #include #include -#include -#include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include +#include +#include #if !defined(ARCADIA_BUILD) # include #endif +#include +#include +#include +#include + +#include +#include + namespace DB { @@ -237,16 +235,14 @@ HTTPHandler::HTTPHandler(IServer & server_, const std::string & name) void HTTPHandler::processQuery( Context & context, - Poco::Net::HTTPServerRequest & request, + HTTPServerRequest & request, HTMLForm & params, - Poco::Net::HTTPServerResponse & response, + HTTPServerResponse & response, Output & used_output, std::optional & query_scope) { LOG_TRACE(log, "Request URI: {}", request.getURI()); - std::istream & istr = request.stream(); - /// The user and password can be passed by headers (similar to X-Auth-*), /// which is used by load balancers to pass authentication information. std::string user = request.get("X-ClickHouse-User", ""); @@ -291,9 +287,9 @@ void HTTPHandler::processQuery( client_info.interface = ClientInfo::Interface::HTTP; ClientInfo::HTTPMethod http_method = ClientInfo::HTTPMethod::UNKNOWN; - if (request.getMethod() == Poco::Net::HTTPServerRequest::HTTP_GET) + if (request.getMethod() == HTTPServerRequest::HTTP_GET) http_method = ClientInfo::HTTPMethod::GET; - else if (request.getMethod() == Poco::Net::HTTPServerRequest::HTTP_POST) + else if (request.getMethod() == HTTPServerRequest::HTTP_POST) http_method = ClientInfo::HTTPMethod::POST; client_info.http_method = http_method; @@ -356,10 +352,8 @@ void HTTPHandler::processQuery( } #endif - // Set the query id supplied by the user, if any, and also update the - // OpenTelemetry fields. - context.setCurrentQueryId(params.get("query_id", - request.get("X-ClickHouse-Query-Id", ""))); + // Set the query id supplied by the user, if any, and also update the OpenTelemetry fields. + context.setCurrentQueryId(params.get("query_id", request.get("X-ClickHouse-Query-Id", ""))); client_info.initial_query_id = client_info.current_query_id; @@ -405,7 +399,11 @@ void HTTPHandler::processQuery( unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", 10); used_output.out = std::make_shared( - request, response, keep_alive_timeout, client_supports_http_compression, http_response_compression_method); + response, + request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, + keep_alive_timeout, + client_supports_http_compression, + http_response_compression_method); if (internal_compression) used_output.out_maybe_compressed = std::make_shared(*used_output.out); @@ -459,8 +457,8 @@ void HTTPHandler::processQuery( /// Request body can be compressed using algorithm specified in the Content-Encoding header. String http_request_compression_method_str = request.get("Content-Encoding", ""); - std::unique_ptr in_post = wrapReadBufferWithCompressionMethod( - std::make_unique(istr), chooseCompressionMethod({}, http_request_compression_method_str)); + auto in_post = wrapReadBufferWithCompressionMethod( + wrapReadBufferReference(request.getStream()), chooseCompressionMethod({}, http_request_compression_method_str)); /// The data can also be compressed using incompatible internal algorithm. This is indicated by /// 'decompress' query parameter. @@ -513,7 +511,7 @@ void HTTPHandler::processQuery( const auto & settings = context.getSettingsRef(); /// Only readonly queries are allowed for HTTP GET requests. - if (request.getMethod() == Poco::Net::HTTPServerRequest::HTTP_GET) + if (request.getMethod() == HTTPServerRequest::HTTP_GET) { if (settings.readonly == 0) context.setSetting("readonly", 2); @@ -608,26 +606,12 @@ void HTTPHandler::processQuery( if (settings.readonly > 0 && settings.cancel_http_readonly_queries_on_client_close) { - Poco::Net::StreamSocket & socket = dynamic_cast(request).socket(); - - append_callback([&context, &socket](const Progress &) + append_callback([&context, &request](const Progress &) { - /// Assume that at the point this method is called no one is reading data from the socket any more. - /// True for read-only queries. - try - { - char b; - int status = socket.receiveBytes(&b, 1, MSG_DONTWAIT | MSG_PEEK); - if (status == 0) - context.killCurrentQuery(); - } - catch (Poco::TimeoutException &) - { - } - catch (...) - { + /// Assume that at the point this method is called no one is reading data from the socket any more: + /// should be true for read-only queries. + if (!request.checkPeerConnected()) context.killCurrentQuery(); - } }); } @@ -656,22 +640,23 @@ void HTTPHandler::processQuery( used_output.out->finalize(); } -void HTTPHandler::trySendExceptionToClient(const std::string & s, int exception_code, - Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response, - Output & used_output) +void HTTPHandler::trySendExceptionToClient( + const std::string & s, int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output) { try { response.set("X-ClickHouse-Exception-Code", toString(exception_code)); + /// FIXME: make sure that no one else is reading from the same stream at the moment. + /// If HTTP method is POST and Keep-Alive is turned on, we should read the whole request body /// to avoid reading part of the current request body in the next request. if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST && response.getKeepAlive() - && !request.stream().eof() - && exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED) + && exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED + && !request.getStream().eof()) { - request.stream().ignore(std::numeric_limits::max()); + request.getStream().ignoreAll(); } bool auth_fail = exception_code == ErrorCodes::UNKNOWN_USER || @@ -690,7 +675,7 @@ void HTTPHandler::trySendExceptionToClient(const std::string & s, int exception_ if (!response.sent() && !used_output.out_maybe_compressed) { /// If nothing was sent yet and we don't even know if we must compress the response. - response.send() << s << std::endl; + *response.send() << s << std::endl; } else if (used_output.out_maybe_compressed) { @@ -717,6 +702,11 @@ void HTTPHandler::trySendExceptionToClient(const std::string & s, int exception_ used_output.out_maybe_compressed->next(); used_output.out->finalize(); } + else + { + assert(false); + __builtin_unreachable(); + } } catch (...) { @@ -725,7 +715,7 @@ void HTTPHandler::trySendExceptionToClient(const std::string & s, int exception_ } -void HTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) +void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) { setThreadName("HTTPHandler"); ThreadStatus thread_status; @@ -746,17 +736,18 @@ void HTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne response.setContentType("text/plain; charset=UTF-8"); response.set("X-ClickHouse-Server-Display-Name", server_display_name); /// For keep-alive to work. - if (request.getVersion() == Poco::Net::HTTPServerRequest::HTTP_1_1) + if (request.getVersion() == HTTPServerRequest::HTTP_1_1) response.setChunkedTransferEncoding(true); HTMLForm params(request); with_stacktrace = params.getParsed("stacktrace", false); /// Workaround. Poco does not detect 411 Length Required case. - if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST && !request.getChunkedTransferEncoding() && - !request.hasContentLength()) + if (request.getMethod() == HTTPRequest::HTTP_POST && !request.getChunkedTransferEncoding() && !request.hasContentLength()) { - throw Exception("The Transfer-Encoding is not chunked and there is no Content-Length header for POST request", ErrorCodes::HTTP_LENGTH_REQUIRED); + throw Exception( + "The Transfer-Encoding is not chunked and there is no Content-Length header for POST request", + ErrorCodes::HTTP_LENGTH_REQUIRED); } processQuery(context, request, params, response, used_output, query_scope); @@ -800,7 +791,7 @@ bool DynamicQueryHandler::customizeQueryParam(Context & context, const std::stri return false; } -std::string DynamicQueryHandler::getQuery(Poco::Net::HTTPServerRequest & request, HTMLForm & params, Context & context) +std::string DynamicQueryHandler::getQuery(HTTPServerRequest & request, HTMLForm & params, Context & context) { if (likely(!startsWith(request.getContentType(), "multipart/form-data"))) { @@ -814,7 +805,7 @@ std::string DynamicQueryHandler::getQuery(Poco::Net::HTTPServerRequest & request /// Support for "external data for query processing". /// Used in case of POST request with form-data, but it isn't expected to be deleted after that scope. ExternalTablesHandler handler(context, params); - params.load(request, request.stream(), handler); + params.load(request, request.getStream(), handler); std::string full_query; /// Params are of both form params POST and uri (GET params) @@ -844,7 +835,7 @@ bool PredefinedQueryHandler::customizeQueryParam(Context & context, const std::s return false; } -void PredefinedQueryHandler::customizeContext(Poco::Net::HTTPServerRequest & request, DB::Context & context) +void PredefinedQueryHandler::customizeContext(HTTPServerRequest & request, DB::Context & context) { /// If in the configuration file, the handler's header is regex and contains named capture group /// We will extract regex named capture groups as query parameters @@ -880,22 +871,26 @@ void PredefinedQueryHandler::customizeContext(Poco::Net::HTTPServerRequest & req } } -std::string PredefinedQueryHandler::getQuery(Poco::Net::HTTPServerRequest & request, HTMLForm & params, Context & context) +std::string PredefinedQueryHandler::getQuery(HTTPServerRequest & request, HTMLForm & params, Context & context) { if (unlikely(startsWith(request.getContentType(), "multipart/form-data"))) { /// Support for "external data for query processing". ExternalTablesHandler handler(context, params); - params.load(request, request.stream(), handler); + params.load(request, request.getStream(), handler); } return predefined_query; } -Poco::Net::HTTPRequestHandlerFactory * createDynamicHandlerFactory(IServer & server, const std::string & config_prefix) +HTTPRequestHandlerFactoryPtr createDynamicHandlerFactory(IServer & server, const std::string & config_prefix) { - std::string query_param_name = server.config().getString(config_prefix + ".handler.query_param_name", "query"); - return addFiltersFromConfig(new HandlingRuleHTTPHandlerFactory(server, std::move(query_param_name)), server.config(), config_prefix); + const auto & query_param_name = server.config().getString(config_prefix + ".handler.query_param_name", "query"); + auto factory = std::make_shared>(server, std::move(query_param_name)); + + factory->addFiltersFromConfig(server.config(), config_prefix); + + return factory; } static inline bool capturingNamedQueryParam(NameSet receive_params, const CompiledRegexPtr & compiled_regex) @@ -913,18 +908,20 @@ static inline CompiledRegexPtr getCompiledRegex(const std::string & expression) auto compiled_regex = std::make_shared(expression); if (!compiled_regex->ok()) - throw Exception("Cannot compile re2: " + expression + " for http handling rule, error: " + - compiled_regex->error() + ". Look at https://github.com/google/re2/wiki/Syntax for reference.", ErrorCodes::CANNOT_COMPILE_REGEXP); + throw Exception( + "Cannot compile re2: " + expression + " for http handling rule, error: " + compiled_regex->error() + + ". Look at https://github.com/google/re2/wiki/Syntax for reference.", + ErrorCodes::CANNOT_COMPILE_REGEXP); return compiled_regex; } -Poco::Net::HTTPRequestHandlerFactory * createPredefinedHandlerFactory(IServer & server, const std::string & config_prefix) +HTTPRequestHandlerFactoryPtr createPredefinedHandlerFactory(IServer & server, const std::string & config_prefix) { Poco::Util::AbstractConfiguration & configuration = server.config(); if (!configuration.has(config_prefix + ".handler.query")) - throw Exception("There is no path '" + config_prefix + ".handler.query" + "' in configuration file.", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception("There is no path '" + config_prefix + ".handler.query' in configuration file.", ErrorCodes::NO_ELEMENTS_IN_CONFIG); std::string predefined_query = configuration.getString(config_prefix + ".handler.query"); NameSet analyze_receive_params = analyzeReceiveQueryParams(predefined_query); @@ -946,6 +943,8 @@ Poco::Net::HTTPRequestHandlerFactory * createPredefinedHandlerFactory(IServer & headers_name_with_regex.emplace(std::make_pair(header_name, regex)); } + std::shared_ptr> factory; + if (configuration.has(config_prefix + ".url")) { auto url_expression = configuration.getString(config_prefix + ".url"); @@ -955,14 +954,23 @@ Poco::Net::HTTPRequestHandlerFactory * createPredefinedHandlerFactory(IServer & auto regex = getCompiledRegex(url_expression); if (capturingNamedQueryParam(analyze_receive_params, regex)) - return addFiltersFromConfig(new HandlingRuleHTTPHandlerFactory( - server, std::move(analyze_receive_params), std::move(predefined_query), std::move(regex), - std::move(headers_name_with_regex)), configuration, config_prefix); + { + factory = std::make_shared>( + server, + std::move(analyze_receive_params), + std::move(predefined_query), + std::move(regex), + std::move(headers_name_with_regex)); + factory->addFiltersFromConfig(configuration, config_prefix); + return factory; + } } - return addFiltersFromConfig(new HandlingRuleHTTPHandlerFactory( - server, std::move(analyze_receive_params), std::move(predefined_query), CompiledRegexPtr{} ,std::move(headers_name_with_regex)), - configuration, config_prefix); + factory = std::make_shared>( + server, std::move(analyze_receive_params), std::move(predefined_query), CompiledRegexPtr{}, std::move(headers_name_with_regex)); + factory->addFiltersFromConfig(configuration, config_prefix); + + return factory; } } diff --git a/src/Server/HTTPHandler.h b/src/Server/HTTPHandler.h index 96727df5404..e903fbfbff7 100644 --- a/src/Server/HTTPHandler.h +++ b/src/Server/HTTPHandler.h @@ -1,13 +1,10 @@ #pragma once -#include "IServer.h" - -#include - -#include -#include -#include #include +#include +#include +#include +#include #include @@ -21,23 +18,24 @@ namespace Poco { class Logger; } namespace DB { +class IServer; class WriteBufferFromHTTPServerResponse; using CompiledRegexPtr = std::shared_ptr; -class HTTPHandler : public Poco::Net::HTTPRequestHandler +class HTTPHandler : public HTTPRequestHandler { public: - explicit HTTPHandler(IServer & server_, const std::string & name); + HTTPHandler(IServer & server_, const std::string & name); - void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; /// This method is called right before the query execution. - virtual void customizeContext(Poco::Net::HTTPServerRequest & /*request*/, Context & /* context */) {} + virtual void customizeContext(HTTPServerRequest & /* request */, Context & /* context */) {} virtual bool customizeQueryParam(Context & context, const std::string & key, const std::string & value) = 0; - virtual std::string getQuery(Poco::Net::HTTPServerRequest & request, HTMLForm & params, Context & context) = 0; + virtual std::string getQuery(HTTPServerRequest & request, HTMLForm & params, Context & context) = 0; private: struct Output @@ -74,17 +72,17 @@ private: /// Also initializes 'used_output'. void processQuery( Context & context, - Poco::Net::HTTPServerRequest & request, + HTTPServerRequest & request, HTMLForm & params, - Poco::Net::HTTPServerResponse & response, + HTTPServerResponse & response, Output & used_output, std::optional & query_scope); void trySendExceptionToClient( const std::string & s, int exception_code, - Poco::Net::HTTPServerRequest & request, - Poco::Net::HTTPServerResponse & response, + HTTPServerRequest & request, + HTTPServerResponse & response, Output & used_output); static void pushDelayedResults(Output & used_output); @@ -97,7 +95,7 @@ private: public: explicit DynamicQueryHandler(IServer & server_, const std::string & param_name_ = "query"); - std::string getQuery(Poco::Net::HTTPServerRequest & request, HTMLForm & params, Context & context) override; + std::string getQuery(HTTPServerRequest & request, HTMLForm & params, Context & context) override; bool customizeQueryParam(Context &context, const std::string &key, const std::string &value) override; }; @@ -114,9 +112,9 @@ public: IServer & server_, const NameSet & receive_params_, const std::string & predefined_query_ , const CompiledRegexPtr & url_regex_, const std::unordered_map & header_name_with_regex_); - virtual void customizeContext(Poco::Net::HTTPServerRequest & request, Context & context) override; + virtual void customizeContext(HTTPServerRequest & request, Context & context) override; - std::string getQuery(Poco::Net::HTTPServerRequest & request, HTMLForm & params, Context & context) override; + std::string getQuery(HTTPServerRequest & request, HTMLForm & params, Context & context) override; bool customizeQueryParam(Context & context, const std::string & key, const std::string & value) override; }; diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp index 9eac60355d2..db80750beb8 100644 --- a/src/Server/HTTPHandlerFactory.cpp +++ b/src/Server/HTTPHandlerFactory.cpp @@ -1,4 +1,7 @@ -#include "HTTPHandlerFactory.h" +#include + +#include +#include #include @@ -29,7 +32,7 @@ HTTPRequestHandlerFactoryMain::HTTPRequestHandlerFactoryMain(const std::string & { } -Poco::Net::HTTPRequestHandler * HTTPRequestHandlerFactoryMain::createRequestHandler(const Poco::Net::HTTPServerRequest & request) +std::unique_ptr HTTPRequestHandlerFactoryMain::createRequestHandler(const HTTPServerRequest & request) { LOG_TRACE(log, "HTTP Request for {}. Method: {}, Address: {}, User-Agent: {}{}, Content Type: {}, Transfer Encoding: {}, X-Forwarded-For: {}", name, request.getMethod(), request.clientAddress().toString(), request.get("User-Agent", "(none)"), @@ -38,8 +41,8 @@ Poco::Net::HTTPRequestHandler * HTTPRequestHandlerFactoryMain::createRequestHand for (auto & handler_factory : child_factories) { - auto * handler = handler_factory->createRequestHandler(request); - if (handler != nullptr) + auto handler = handler_factory->createRequestHandler(request); + if (handler) return handler; } @@ -47,31 +50,16 @@ Poco::Net::HTTPRequestHandler * HTTPRequestHandlerFactoryMain::createRequestHand || request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD || request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST) { - return new NotFoundHandler; + return std::unique_ptr(new NotFoundHandler); } return nullptr; } -HTTPRequestHandlerFactoryMain::~HTTPRequestHandlerFactoryMain() -{ - while (!child_factories.empty()) - { - delete child_factories.back(); - child_factories.pop_back(); - } -} - -HTTPRequestHandlerFactoryMain::TThis * HTTPRequestHandlerFactoryMain::addHandler(Poco::Net::HTTPRequestHandlerFactory * child_factory) -{ - child_factories.emplace_back(child_factory); - return this; -} - static inline auto createHandlersFactoryFromConfig( IServer & server, const std::string & name, const String & prefix, AsynchronousMetrics & async_metrics) { - auto main_handler_factory = std::make_unique(name); + auto main_handler_factory = std::make_shared(name); Poco::Util::AbstractConfiguration::Keys keys; server.config().keys(prefix, keys); @@ -109,10 +97,11 @@ static inline auto createHandlersFactoryFromConfig( ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); } - return main_handler_factory.release(); + return main_handler_factory; } -static inline Poco::Net::HTTPRequestHandlerFactory * createHTTPHandlerFactory(IServer & server, const std::string & name, AsynchronousMetrics & async_metrics) +static inline HTTPRequestHandlerFactoryPtr +createHTTPHandlerFactory(IServer & server, const std::string & name, AsynchronousMetrics & async_metrics) { if (server.config().has("http_handlers")) { @@ -120,25 +109,25 @@ static inline Poco::Net::HTTPRequestHandlerFactory * createHTTPHandlerFactory(IS } else { - auto factory = std::make_unique(name); + auto factory = std::make_shared(name); addDefaultHandlersFactory(*factory, server, async_metrics); - return factory.release(); + return factory; } } -static inline Poco::Net::HTTPRequestHandlerFactory * createInterserverHTTPHandlerFactory(IServer & server, const std::string & name) +static inline HTTPRequestHandlerFactoryPtr createInterserverHTTPHandlerFactory(IServer & server, const std::string & name) { - auto factory = std::make_unique(name); + auto factory = std::make_shared(name); addCommonDefaultHandlersFactory(*factory, server); - auto main_handler = std::make_unique>(server); + auto main_handler = std::make_shared>(server); main_handler->allowPostAndGetParamsRequest(); - factory->addHandler(main_handler.release()); + factory->addHandler(main_handler); - return factory.release(); + return factory; } -Poco::Net::HTTPRequestHandlerFactory * createHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & name) +HTTPRequestHandlerFactoryPtr createHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & name) { if (name == "HTTPHandler-factory" || name == "HTTPSHandler-factory") return createHTTPHandlerFactory(server, name, async_metrics); @@ -146,12 +135,13 @@ Poco::Net::HTTPRequestHandlerFactory * createHandlerFactory(IServer & server, As return createInterserverHTTPHandlerFactory(server, name); else if (name == "PrometheusHandler-factory") { - auto factory = std::make_unique(name); - auto handler = std::make_unique>( + auto factory = std::make_shared(name); + auto handler = std::make_shared>( server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics)); - handler->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest(); - factory->addHandler(handler.release()); - return factory.release(); + handler->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics")); + handler->allowGetAndHeadRequest(); + factory->addHandler(handler); + return factory; } throw Exception("LOGICAL ERROR: Unknown HTTP handler factory name.", ErrorCodes::LOGICAL_ERROR); @@ -162,39 +152,44 @@ static const auto root_response_expression = "config://http_server_default_respo void addCommonDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server) { - auto root_handler = std::make_unique>(server, root_response_expression); - root_handler->attachStrictPath("/")->allowGetAndHeadRequest(); - factory.addHandler(root_handler.release()); + auto root_handler = std::make_shared>(server, root_response_expression); + root_handler->attachStrictPath("/"); + root_handler->allowGetAndHeadRequest(); + factory.addHandler(root_handler); - auto ping_handler = std::make_unique>(server, ping_response_expression); - ping_handler->attachStrictPath("/ping")->allowGetAndHeadRequest(); - factory.addHandler(ping_handler.release()); + auto ping_handler = std::make_shared>(server, ping_response_expression); + ping_handler->attachStrictPath("/ping"); + ping_handler->allowGetAndHeadRequest(); + factory.addHandler(ping_handler); - auto replicas_status_handler = std::make_unique>(server); - replicas_status_handler->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest(); - factory.addHandler(replicas_status_handler.release()); + auto replicas_status_handler = std::make_shared>(server); + replicas_status_handler->attachNonStrictPath("/replicas_status"); + replicas_status_handler->allowGetAndHeadRequest(); + factory.addHandler(replicas_status_handler); - auto web_ui_handler = std::make_unique>(server, "play.html"); - web_ui_handler->attachNonStrictPath("/play")->allowGetAndHeadRequest(); - factory.addHandler(web_ui_handler.release()); + auto web_ui_handler = std::make_shared>(server, "play.html"); + web_ui_handler->attachNonStrictPath("/play"); + web_ui_handler->allowGetAndHeadRequest(); + factory.addHandler(web_ui_handler); } void addDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server, AsynchronousMetrics & async_metrics) { addCommonDefaultHandlersFactory(factory, server); - auto query_handler = std::make_unique>(server, "query"); + auto query_handler = std::make_shared>(server, "query"); query_handler->allowPostAndGetParamsRequest(); - factory.addHandler(query_handler.release()); + factory.addHandler(query_handler); /// We check that prometheus handler will be served on current (default) port. /// Otherwise it will be created separately, see createHandlerFactory(...). if (server.config().has("prometheus") && server.config().getInt("prometheus.port", 0) == 0) { - auto prometheus_handler = std::make_unique>( + auto prometheus_handler = std::make_shared>( server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics)); - prometheus_handler->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest(); - factory.addHandler(prometheus_handler.release()); + prometheus_handler->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics")); + prometheus_handler->allowGetAndHeadRequest(); + factory.addHandler(prometheus_handler); } } diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h index 3e8313172eb..6297f988eaa 100644 --- a/src/Server/HTTPHandlerFactory.h +++ b/src/Server/HTTPHandlerFactory.h @@ -1,82 +1,102 @@ #pragma once -#include "IServer.h" -#include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include + +#include namespace DB { -/// Handle request using child handlers -class HTTPRequestHandlerFactoryMain : public Poco::Net::HTTPRequestHandlerFactory, boost::noncopyable +namespace ErrorCodes { -private: - using TThis = HTTPRequestHandlerFactoryMain; + extern const int UNKNOWN_ELEMENT_IN_CONFIG; +} +class IServer; + +/// Handle request using child handlers +class HTTPRequestHandlerFactoryMain : public HTTPRequestHandlerFactory +{ +public: + explicit HTTPRequestHandlerFactoryMain(const std::string & name_); + + void addHandler(HTTPRequestHandlerFactoryPtr child_factory) { child_factories.emplace_back(child_factory); } + + std::unique_ptr createRequestHandler(const HTTPServerRequest & request) override; + +private: Poco::Logger * log; std::string name; - std::vector child_factories; -public: - - ~HTTPRequestHandlerFactoryMain() override; - - HTTPRequestHandlerFactoryMain(const std::string & name_); - - TThis * addHandler(Poco::Net::HTTPRequestHandlerFactory * child_factory); - - Poco::Net::HTTPRequestHandler * createRequestHandler(const Poco::Net::HTTPServerRequest & request) override; + std::vector child_factories; }; template -class HandlingRuleHTTPHandlerFactory : public Poco::Net::HTTPRequestHandlerFactory +class HandlingRuleHTTPHandlerFactory : public HTTPRequestHandlerFactory { public: - using TThis = HandlingRuleHTTPHandlerFactory; - using Filter = std::function; + using Filter = std::function; template - HandlingRuleHTTPHandlerFactory(TArgs &&... args) + explicit HandlingRuleHTTPHandlerFactory(TArgs &&... args) { creator = [args = std::tuple(std::forward(args) ...)]() { return std::apply([&](auto && ... endpoint_args) { - return new TEndpoint(std::forward(endpoint_args)...); + return std::make_unique(std::forward(endpoint_args)...); }, std::move(args)); }; } - TThis * addFilter(Filter cur_filter) + void addFilter(Filter cur_filter) { Filter prev_filter = filter; filter = [prev_filter, cur_filter](const auto & request) { return prev_filter ? prev_filter(request) && cur_filter(request) : cur_filter(request); }; - - return this; } - TThis * attachStrictPath(const String & strict_path) + void addFiltersFromConfig(Poco::Util::AbstractConfiguration & config, const std::string & prefix) { - return addFilter([strict_path](const auto & request) { return request.getURI() == strict_path; }); + Poco::Util::AbstractConfiguration::Keys filters_type; + config.keys(prefix, filters_type); + + for (const auto & filter_type : filters_type) + { + if (filter_type == "handler") + continue; + else if (filter_type == "url") + addFilter(urlFilter(config, prefix + ".url")); + else if (filter_type == "headers") + addFilter(headersFilter(config, prefix + ".headers")); + else if (filter_type == "methods") + addFilter(methodsFilter(config, prefix + ".methods")); + else + throw Exception("Unknown element in config: " + prefix + "." + filter_type, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + } } - TThis * attachNonStrictPath(const String & non_strict_path) + void attachStrictPath(const String & strict_path) { - return addFilter([non_strict_path](const auto & request) { return startsWith(request.getURI(), non_strict_path); }); + addFilter([strict_path](const auto & request) { return request.getURI() == strict_path; }); + } + + void attachNonStrictPath(const String & non_strict_path) + { + addFilter([non_strict_path](const auto & request) { return startsWith(request.getURI(), non_strict_path); }); } /// Handle GET or HEAD endpoint on specified path - TThis * allowGetAndHeadRequest() + void allowGetAndHeadRequest() { - return addFilter([](const auto & request) + addFilter([](const auto & request) { return request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET || request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD; @@ -84,35 +104,35 @@ public: } /// Handle POST or GET with params - TThis * allowPostAndGetParamsRequest() + void allowPostAndGetParamsRequest() { - return addFilter([](const auto & request) + addFilter([](const auto & request) { return request.getURI().find('?') != std::string::npos || request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST; }); } - Poco::Net::HTTPRequestHandler * createRequestHandler(const Poco::Net::HTTPServerRequest & request) override + std::unique_ptr createRequestHandler(const HTTPServerRequest & request) override { return filter(request) ? creator() : nullptr; } private: Filter filter; - std::function creator; + std::function ()> creator; }; -Poco::Net::HTTPRequestHandlerFactory * createStaticHandlerFactory(IServer & server, const std::string & config_prefix); +HTTPRequestHandlerFactoryPtr createStaticHandlerFactory(IServer & server, const std::string & config_prefix); -Poco::Net::HTTPRequestHandlerFactory * createDynamicHandlerFactory(IServer & server, const std::string & config_prefix); +HTTPRequestHandlerFactoryPtr createDynamicHandlerFactory(IServer & server, const std::string & config_prefix); -Poco::Net::HTTPRequestHandlerFactory * createPredefinedHandlerFactory(IServer & server, const std::string & config_prefix); +HTTPRequestHandlerFactoryPtr createPredefinedHandlerFactory(IServer & server, const std::string & config_prefix); -Poco::Net::HTTPRequestHandlerFactory * createReplicasStatusHandlerFactory(IServer & server, const std::string & config_prefix); +HTTPRequestHandlerFactoryPtr createReplicasStatusHandlerFactory(IServer & server, const std::string & config_prefix); -Poco::Net::HTTPRequestHandlerFactory * createPrometheusHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & config_prefix); - -Poco::Net::HTTPRequestHandlerFactory * createHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & name); +HTTPRequestHandlerFactoryPtr +createPrometheusHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & config_prefix); +HTTPRequestHandlerFactoryPtr createHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & name); } diff --git a/src/Server/HTTPHandlerRequestFilter.h b/src/Server/HTTPHandlerRequestFilter.h index f952efd7653..f0474e8b953 100644 --- a/src/Server/HTTPHandlerRequestFilter.h +++ b/src/Server/HTTPHandlerRequestFilter.h @@ -1,15 +1,17 @@ #pragma once -#include "HTTPHandlerFactory.h" +#include +#include +#include +#include +#include #include #include #include -#include #include -#include - +#include namespace DB { @@ -17,11 +19,9 @@ namespace DB namespace ErrorCodes { extern const int CANNOT_COMPILE_REGEXP; - extern const int UNKNOWN_ELEMENT_IN_CONFIG; } - -typedef std::shared_ptr CompiledRegexPtr; +using CompiledRegexPtr = std::shared_ptr; static inline bool checkRegexExpression(const StringRef & match_str, const CompiledRegexPtr & compiled_regex) { @@ -45,10 +45,10 @@ static inline auto methodsFilter(Poco::Util::AbstractConfiguration & config, con std::vector methods; Poco::StringTokenizer tokenizer(config.getString(config_path), ","); - for (auto iterator = tokenizer.begin(); iterator != tokenizer.end(); ++iterator) - methods.emplace_back(Poco::toUpper(Poco::trim(*iterator))); + for (const auto & iterator : tokenizer) + methods.emplace_back(Poco::toUpper(Poco::trim(iterator))); - return [methods](const Poco::Net::HTTPServerRequest & request) { return std::count(methods.begin(), methods.end(), request.getMethod()); }; + return [methods](const HTTPServerRequest & request) { return std::count(methods.begin(), methods.end(), request.getMethod()); }; } static inline auto getExpression(const std::string & expression) @@ -66,7 +66,7 @@ static inline auto getExpression(const std::string & expression) static inline auto urlFilter(Poco::Util::AbstractConfiguration & config, const std::string & config_path) { - return [expression = getExpression(config.getString(config_path))](const Poco::Net::HTTPServerRequest & request) + return [expression = getExpression(config.getString(config_path))](const HTTPServerRequest & request) { const auto & uri = request.getURI(); const auto & end = find_first_symbols<'?'>(uri.data(), uri.data() + uri.size()); @@ -88,7 +88,7 @@ static inline auto headersFilter(Poco::Util::AbstractConfiguration & config, con headers_expression.emplace(std::make_pair(header_name, expression)); } - return [headers_expression](const Poco::Net::HTTPServerRequest & request) + return [headers_expression](const HTTPServerRequest & request) { for (const auto & [header_name, header_expression] : headers_expression) { @@ -101,28 +101,4 @@ static inline auto headersFilter(Poco::Util::AbstractConfiguration & config, con }; } -template -static inline Poco::Net::HTTPRequestHandlerFactory * addFiltersFromConfig( - HandlingRuleHTTPHandlerFactory * factory, Poco::Util::AbstractConfiguration & config, const std::string & prefix) -{ - Poco::Util::AbstractConfiguration::Keys filters_type; - config.keys(prefix, filters_type); - - for (const auto & filter_type : filters_type) - { - if (filter_type == "handler") - continue; - else if (filter_type == "url") - factory->addFilter(urlFilter(config, prefix + ".url")); - else if (filter_type == "headers") - factory->addFilter(headersFilter(config, prefix + ".headers")); - else if (filter_type == "methods") - factory->addFilter(methodsFilter(config, prefix + ".methods")); - else - throw Exception("Unknown element in config: " + prefix + "." + filter_type, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); - } - - return factory; -} - } diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp index 973759bedd1..3296da94578 100644 --- a/src/Server/InterserverIOHTTPHandler.cpp +++ b/src/Server/InterserverIOHTTPHandler.cpp @@ -1,18 +1,18 @@ -#include "InterserverIOHTTPHandler.h" +#include + +#include -#include -#include -#include -#include -#include -#include -#include #include #include -#include -#include #include -#include "IServer.h" +#include +#include +#include +#include +#include + +#include +#include namespace DB { @@ -23,7 +23,7 @@ namespace ErrorCodes extern const int TOO_MANY_SIMULTANEOUS_QUERIES; } -std::pair InterserverIOHTTPHandler::checkAuthentication(Poco::Net::HTTPServerRequest & request) const +std::pair InterserverIOHTTPHandler::checkAuthentication(HTTPServerRequest & request) const { const auto & config = server.config(); @@ -51,7 +51,7 @@ std::pair InterserverIOHTTPHandler::checkAuthentication(Poco::Net: return {"", true}; } -void InterserverIOHTTPHandler::processQuery(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response, Output & used_output) +void InterserverIOHTTPHandler::processQuery(HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output) { HTMLForm params(request); @@ -60,7 +60,7 @@ void InterserverIOHTTPHandler::processQuery(Poco::Net::HTTPServerRequest & reque String endpoint_name = params.get("endpoint"); bool compress = params.get("compress") == "true"; - ReadBufferFromIStream body(request.stream()); + auto & body = request.getStream(); auto endpoint = server.context().getInterserverIOHandler().getEndpoint(endpoint_name); /// Locked for read while query processing @@ -80,18 +80,19 @@ void InterserverIOHTTPHandler::processQuery(Poco::Net::HTTPServerRequest & reque } -void InterserverIOHTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) +void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) { setThreadName("IntersrvHandler"); /// In order to work keep-alive. - if (request.getVersion() == Poco::Net::HTTPServerRequest::HTTP_1_1) + if (request.getVersion() == HTTPServerRequest::HTTP_1_1) response.setChunkedTransferEncoding(true); Output used_output; const auto & config = server.config(); unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", 10); - used_output.out = std::make_shared(request, response, keep_alive_timeout); + used_output.out = std::make_shared( + response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout); try { @@ -102,7 +103,7 @@ void InterserverIOHTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & requ } else { - response.setStatusAndReason(Poco::Net::HTTPServerResponse::HTTP_UNAUTHORIZED); + response.setStatusAndReason(HTTPServerResponse::HTTP_UNAUTHORIZED); if (!response.sent()) writeString(message, *used_output.out); LOG_WARNING(log, "Query processing failed request: '{}' authentication failed", request.getURI()); diff --git a/src/Server/InterserverIOHTTPHandler.h b/src/Server/InterserverIOHTTPHandler.h index 8dc1962664c..47892aa678f 100644 --- a/src/Server/InterserverIOHTTPHandler.h +++ b/src/Server/InterserverIOHTTPHandler.h @@ -1,10 +1,12 @@ #pragma once -#include -#include -#include +#include #include +#include + +#include + namespace CurrentMetrics { @@ -17,7 +19,7 @@ namespace DB class IServer; class WriteBufferFromHTTPServerResponse; -class InterserverIOHTTPHandler : public Poco::Net::HTTPRequestHandler +class InterserverIOHTTPHandler : public HTTPRequestHandler { public: explicit InterserverIOHTTPHandler(IServer & server_) @@ -26,7 +28,7 @@ public: { } - void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; private: struct Output @@ -39,9 +41,9 @@ private: CurrentMetrics::Increment metric_increment{CurrentMetrics::InterserverConnection}; - void processQuery(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response, Output & used_output); + void processQuery(HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output); - std::pair checkAuthentication(Poco::Net::HTTPServerRequest & request) const; + std::pair checkAuthentication(HTTPServerRequest & request) const; }; } diff --git a/src/Server/NotFoundHandler.cpp b/src/Server/NotFoundHandler.cpp index 766e8895784..3181708b9b7 100644 --- a/src/Server/NotFoundHandler.cpp +++ b/src/Server/NotFoundHandler.cpp @@ -1,32 +1,25 @@ -#include "NotFoundHandler.h" +#include #include - #include -#include -#include - namespace DB { - -void NotFoundHandler::handleRequest( - Poco::Net::HTTPServerRequest & request, - Poco::Net::HTTPServerResponse & response) +void NotFoundHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) { try { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_NOT_FOUND); - response.send() << "There is no handle " << request.getURI() << "\n\n" - << "Use / or /ping for health checks.\n" - << "Or /replicas_status for more sophisticated health checks.\n\n" - << "Send queries from your program with POST method or GET /?query=...\n\n" - << "Use clickhouse-client:\n\n" - << "For interactive data analysis:\n" - << " clickhouse-client\n\n" - << "For batch query processing:\n" - << " clickhouse-client --query='SELECT 1' > result\n" - << " clickhouse-client < query > result\n"; + *response.send() << "There is no handle " << request.getURI() << "\n\n" + << "Use / or /ping for health checks.\n" + << "Or /replicas_status for more sophisticated health checks.\n\n" + << "Send queries from your program with POST method or GET /?query=...\n\n" + << "Use clickhouse-client:\n\n" + << "For interactive data analysis:\n" + << " clickhouse-client\n\n" + << "For batch query processing:\n" + << " clickhouse-client --query='SELECT 1' > result\n" + << " clickhouse-client < query > result\n"; } catch (...) { diff --git a/src/Server/NotFoundHandler.h b/src/Server/NotFoundHandler.h index 7f758e49d0d..749ac388c4d 100644 --- a/src/Server/NotFoundHandler.h +++ b/src/Server/NotFoundHandler.h @@ -1,18 +1,15 @@ #pragma once -#include - +#include namespace DB { /// Response with 404 and verbose description. -class NotFoundHandler : public Poco::Net::HTTPRequestHandler +class NotFoundHandler : public HTTPRequestHandler { public: - void handleRequest( - Poco::Net::HTTPServerRequest & request, - Poco::Net::HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; }; } diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp index 60deec9b289..83cb8e85a9e 100644 --- a/src/Server/PrometheusRequestHandler.cpp +++ b/src/Server/PrometheusRequestHandler.cpp @@ -1,26 +1,19 @@ -#include "PrometheusRequestHandler.h" +#include #include - -#include - -#include -#include -#include - -#include +#include +#include +#include #include +#include +#include -#include -#include +#include namespace DB { - -void PrometheusRequestHandler::handleRequest( - Poco::Net::HTTPServerRequest & request, - Poco::Net::HTTPServerResponse & response) +void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) { try { @@ -31,7 +24,7 @@ void PrometheusRequestHandler::handleRequest( response.setContentType("text/plain; version=0.0.4; charset=UTF-8"); - auto wb = WriteBufferFromHTTPServerResponse(request, response, keep_alive_timeout); + auto wb = WriteBufferFromHTTPServerResponse(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout); metrics_writer.write(wb); wb.finalize(); } @@ -41,10 +34,13 @@ void PrometheusRequestHandler::handleRequest( } } -Poco::Net::HTTPRequestHandlerFactory * createPrometheusHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & config_prefix) +HTTPRequestHandlerFactoryPtr +createPrometheusHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & config_prefix) { - return addFiltersFromConfig(new HandlingRuleHTTPHandlerFactory( - server, PrometheusMetricsWriter(server.config(), config_prefix + ".handler", async_metrics)), server.config(), config_prefix); + auto factory = std::make_shared>( + server, PrometheusMetricsWriter(server.config(), config_prefix + ".handler", async_metrics)); + factory->addFiltersFromConfig(server.config(), config_prefix); + return factory; } } diff --git a/src/Server/PrometheusRequestHandler.h b/src/Server/PrometheusRequestHandler.h index 47c8adf4774..1fb3d9f0f59 100644 --- a/src/Server/PrometheusRequestHandler.h +++ b/src/Server/PrometheusRequestHandler.h @@ -1,17 +1,15 @@ #pragma once -#include "IServer.h" -#include "PrometheusMetricsWriter.h" +#include -#include -#include -#include -#include +#include "PrometheusMetricsWriter.h" namespace DB { -class PrometheusRequestHandler : public Poco::Net::HTTPRequestHandler +class IServer; + +class PrometheusRequestHandler : public HTTPRequestHandler { private: IServer & server; @@ -24,9 +22,7 @@ public: { } - void handleRequest( - Poco::Net::HTTPServerRequest & request, - Poco::Net::HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; }; } diff --git a/src/Server/ReplicasStatusHandler.cpp b/src/Server/ReplicasStatusHandler.cpp index fc79ad9d134..778f9827131 100644 --- a/src/Server/ReplicasStatusHandler.cpp +++ b/src/Server/ReplicasStatusHandler.cpp @@ -1,17 +1,18 @@ -#include "ReplicasStatusHandler.h" +#include -#include -#include -#include -#include #include #include +#include +#include +#include +#include +#include +#include +#include #include #include #include -#include -#include namespace DB @@ -24,7 +25,7 @@ ReplicasStatusHandler::ReplicasStatusHandler(IServer & server) } -void ReplicasStatusHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) +void ReplicasStatusHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) { try { @@ -82,7 +83,7 @@ void ReplicasStatusHandler::handleRequest(Poco::Net::HTTPServerRequest & request } if (verbose) - response.send() << message.str(); + *response.send() << message.str(); else { const char * data = "Ok.\n"; @@ -100,7 +101,7 @@ void ReplicasStatusHandler::handleRequest(Poco::Net::HTTPServerRequest & request if (!response.sent()) { /// We have not sent anything yet and we don't even know if we need to compress response. - response.send() << getCurrentExceptionMessage(false) << std::endl; + *response.send() << getCurrentExceptionMessage(false) << std::endl; } } catch (...) @@ -110,9 +111,11 @@ void ReplicasStatusHandler::handleRequest(Poco::Net::HTTPServerRequest & request } } -Poco::Net::HTTPRequestHandlerFactory * createReplicasStatusHandlerFactory(IServer & server, const std::string & config_prefix) +HTTPRequestHandlerFactoryPtr createReplicasStatusHandlerFactory(IServer & server, const std::string & config_prefix) { - return addFiltersFromConfig(new HandlingRuleHTTPHandlerFactory(server), server.config(), config_prefix); + auto factory = std::make_shared>(server); + factory->addFiltersFromConfig(server.config(), config_prefix); + return factory; } } diff --git a/src/Server/ReplicasStatusHandler.h b/src/Server/ReplicasStatusHandler.h index a32f1ba905f..8a790b13ad6 100644 --- a/src/Server/ReplicasStatusHandler.h +++ b/src/Server/ReplicasStatusHandler.h @@ -1,17 +1,15 @@ #pragma once -#include "IServer.h" - -#include - +#include namespace DB { class Context; +class IServer; /// Replies "Ok.\n" if all replicas on this server don't lag too much. Otherwise output lag information. -class ReplicasStatusHandler : public Poco::Net::HTTPRequestHandler +class ReplicasStatusHandler : public HTTPRequestHandler { private: Context & context; @@ -19,7 +17,7 @@ private: public: explicit ReplicasStatusHandler(IServer & server_); - void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; }; diff --git a/src/Server/StaticRequestHandler.cpp b/src/Server/StaticRequestHandler.cpp index ad2c07ab0aa..f3f564c1cf8 100644 --- a/src/Server/StaticRequestHandler.cpp +++ b/src/Server/StaticRequestHandler.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include @@ -32,7 +32,8 @@ namespace ErrorCodes extern const int INVALID_CONFIG_PARAMETER; } -static inline WriteBufferPtr responseWriteBuffer(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response, unsigned int keep_alive_timeout) +static inline WriteBufferPtr +responseWriteBuffer(HTTPServerRequest & request, HTTPServerResponse & response, unsigned int keep_alive_timeout) { /// The client can pass a HTTP header indicating supported compression method (gzip or deflate). String http_response_compression_methods = request.get("Accept-Encoding", ""); @@ -55,12 +56,15 @@ static inline WriteBufferPtr responseWriteBuffer(Poco::Net::HTTPServerRequest & bool client_supports_http_compression = http_response_compression_method != CompressionMethod::None; return std::make_shared( - request, response, keep_alive_timeout, client_supports_http_compression, http_response_compression_method); + response, + request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, + keep_alive_timeout, + client_supports_http_compression, + http_response_compression_method); } static inline void trySendExceptionToClient( - const std::string & s, int exception_code, - Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response , WriteBuffer & out) + const std::string & s, int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, WriteBuffer & out) { try { @@ -69,13 +73,13 @@ static inline void trySendExceptionToClient( /// If HTTP method is POST and Keep-Alive is turned on, we should read the whole request body /// to avoid reading part of the current request body in the next request. if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST - && response.getKeepAlive() && !request.stream().eof() && exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED) - request.stream().ignore(std::numeric_limits::max()); + && response.getKeepAlive() && !request.getStream().eof() && exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED) + request.getStream().ignore(std::numeric_limits::max()); response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); if (!response.sent()) - response.send() << s << std::endl; + *response.send() << s << std::endl; else { if (out.count() != out.offset()) @@ -94,7 +98,7 @@ static inline void trySendExceptionToClient( } } -void StaticRequestHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) +void StaticRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) { auto keep_alive_timeout = server.config().getUInt("keep_alive_timeout", 10); const auto & out = responseWriteBuffer(request, response, keep_alive_timeout); @@ -159,14 +163,17 @@ StaticRequestHandler::StaticRequestHandler(IServer & server_, const String & exp { } -Poco::Net::HTTPRequestHandlerFactory * createStaticHandlerFactory(IServer & server, const std::string & config_prefix) +HTTPRequestHandlerFactoryPtr createStaticHandlerFactory(IServer & server, const std::string & config_prefix) { int status = server.config().getInt(config_prefix + ".handler.status", 200); std::string response_content = server.config().getRawString(config_prefix + ".handler.response_content", "Ok.\n"); std::string response_content_type = server.config().getString(config_prefix + ".handler.content_type", "text/plain; charset=UTF-8"); + auto factory = std::make_shared>( + server, std::move(response_content), std::move(status), std::move(response_content_type)); - return addFiltersFromConfig(new HandlingRuleHTTPHandlerFactory( - server, std::move(response_content), std::move(status), std::move(response_content_type)), server.config(), config_prefix); + factory->addFiltersFromConfig(server.config(), config_prefix); + + return factory; } } diff --git a/src/Server/StaticRequestHandler.h b/src/Server/StaticRequestHandler.h index 0a29384ad0e..56c7f5a6d44 100644 --- a/src/Server/StaticRequestHandler.h +++ b/src/Server/StaticRequestHandler.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include @@ -11,7 +11,7 @@ class IServer; class WriteBuffer; /// Response with custom string. Can be used for browser. -class StaticRequestHandler : public Poco::Net::HTTPRequestHandler +class StaticRequestHandler : public HTTPRequestHandler { private: IServer & server; @@ -29,7 +29,7 @@ public: void writeResponse(WriteBuffer & out); - void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; }; } diff --git a/src/Server/WebUIRequestHandler.cpp b/src/Server/WebUIRequestHandler.cpp index 6159a27971f..fb8ff71611e 100644 --- a/src/Server/WebUIRequestHandler.cpp +++ b/src/Server/WebUIRequestHandler.cpp @@ -18,18 +18,18 @@ WebUIRequestHandler::WebUIRequestHandler(IServer & server_, std::string resource } -void WebUIRequestHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) +void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) { auto keep_alive_timeout = server.config().getUInt("keep_alive_timeout", 10); response.setContentType("text/html; charset=UTF-8"); - if (request.getVersion() == Poco::Net::HTTPServerRequest::HTTP_1_1) + if (request.getVersion() == HTTPServerRequest::HTTP_1_1) response.setChunkedTransferEncoding(true); setResponseDefaultHeaders(response, keep_alive_timeout); response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); - response.send() << getResource(resource_name); + *response.send() << getResource(resource_name); } } diff --git a/src/Server/WebUIRequestHandler.h b/src/Server/WebUIRequestHandler.h index 3066b86b36a..1c52b626091 100644 --- a/src/Server/WebUIRequestHandler.h +++ b/src/Server/WebUIRequestHandler.h @@ -1,6 +1,6 @@ #pragma once -#include +#include namespace DB @@ -9,14 +9,14 @@ namespace DB class IServer; /// Response with HTML page that allows to send queries and show results in browser. -class WebUIRequestHandler : public Poco::Net::HTTPRequestHandler +class WebUIRequestHandler : public HTTPRequestHandler { private: IServer & server; std::string resource_name; public: WebUIRequestHandler(IServer & server_, std::string resource_name_); - void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; }; } diff --git a/src/Server/ya.make b/src/Server/ya.make index a0269e9ac84..ef5ef6d5f57 100644 --- a/src/Server/ya.make +++ b/src/Server/ya.make @@ -11,6 +11,14 @@ PEERDIR( SRCS( GRPCServer.cpp + HTTP/HTMLForm.cpp + HTTP/HTTPServer.cpp + HTTP/HTTPServerConnection.cpp + HTTP/HTTPServerConnectionFactory.cpp + HTTP/HTTPServerRequest.cpp + HTTP/HTTPServerResponse.cpp + HTTP/ReadHeaders.cpp + HTTP/WriteBufferFromHTTPServerResponse.cpp HTTPHandler.cpp HTTPHandlerFactory.cpp InterserverIOHTTPHandler.cpp diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index e01e7793dd3..f80020991b0 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -1,17 +1,20 @@ #include + +#include +#include +#include +#include +#include +#include #include #include -#include -#include +#include #include #include -#include -#include #include + #include -#include #include -#include namespace CurrentMetrics @@ -83,7 +86,7 @@ std::string Service::getId(const std::string & node_id) const return getEndpointId(node_id); } -void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & /*body*/, WriteBuffer & out, Poco::Net::HTTPServerResponse & response) +void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, WriteBuffer & out, HTTPServerResponse & response) { int client_protocol_version = parse(params.get("client_protocol_version", "0")); diff --git a/src/Storages/MergeTree/DataPartsExchange.h b/src/Storages/MergeTree/DataPartsExchange.h index 0a359474d2d..834fed1182f 100644 --- a/src/Storages/MergeTree/DataPartsExchange.h +++ b/src/Storages/MergeTree/DataPartsExchange.h @@ -20,21 +20,19 @@ namespace DataPartsExchange class Service final : public InterserverIOEndpoint { public: - Service(MergeTreeData & data_) - : data(data_), log(&Poco::Logger::get(data.getLogName() + " (Replicated PartsService)")) {} + explicit Service(MergeTreeData & data_) : data(data_), log(&Poco::Logger::get(data.getLogName() + " (Replicated PartsService)")) {} Service(const Service &) = delete; Service & operator=(const Service &) = delete; std::string getId(const std::string & node_id) const override; - void processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & body, WriteBuffer & out, Poco::Net::HTTPServerResponse & response) override; + void processQuery(const HTMLForm & params, ReadBuffer & body, WriteBuffer & out, HTTPServerResponse & response) override; private: MergeTreeData::DataPartPtr findPart(const String & name); void sendPartFromMemory(const MergeTreeData::DataPartPtr & part, WriteBuffer & out); void sendPartFromDisk(const MergeTreeData::DataPartPtr & part, WriteBuffer & out, int client_protocol_version); -private: /// StorageReplicatedMergeTree::shutdown() waits for all parts exchange handlers to finish, /// so Service will never access dangling reference to storage MergeTreeData & data; @@ -43,13 +41,10 @@ private: /** Client for getting the parts from the table *MergeTree. */ -class Fetcher final +class Fetcher final : private boost::noncopyable { public: - Fetcher(MergeTreeData & data_) : data(data_), log(&Poco::Logger::get("Fetcher")) {} - - Fetcher(const Fetcher &) = delete; - Fetcher & operator=(const Fetcher &) = delete; + explicit Fetcher(MergeTreeData & data_) : data(data_), log(&Poco::Logger::get("Fetcher")) {} /// Downloads a part to tmp_directory. If to_detached - downloads to the `detached` directory. MergeTreeData::MutableDataPartPtr fetchPart( @@ -75,7 +70,7 @@ private: bool to_detached, const String & tmp_prefix_, bool sync, - const ReservationPtr reservation, + ReservationPtr reservation, PooledReadWriteBufferFromHTTP & in); MergeTreeData::MutableDataPartPtr downloadPartToMemory( diff --git a/tests/queries/query_test.py b/tests/queries/query_test.py index 3dea639187e..417a51fe523 100644 --- a/tests/queries/query_test.py +++ b/tests/queries/query_test.py @@ -33,7 +33,7 @@ SKIP_LIST = [ "01057_http_compression_prefer_brotli", "01080_check_for_error_incorrect_size_of_nested_column", "01083_expressions_in_engine_arguments", - "01086_odbc_roundtrip", + # "01086_odbc_roundtrip", "01088_benchmark_query_id", "01098_temporary_and_external_tables", "01099_parallel_distributed_insert_select", From ea27c3ca32bdf9a18e90d75bf38bbc725c6db4db Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 19 Feb 2021 16:41:46 +0300 Subject: [PATCH 22/22] Add gdb to fasttest image --- docker/test/fasttest/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 03b7b2fc53a..64be52d8e30 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -47,6 +47,7 @@ RUN apt-get update \ expect \ fakeroot \ git \ + gdb \ gperf \ lld-${LLVM_VERSION} \ llvm-${LLVM_VERSION} \