mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
Merge branch 'master' into change-server-memory-usage-without-restart
This commit is contained in:
commit
712bef8eef
6
.gitmodules
vendored
6
.gitmodules
vendored
@ -209,6 +209,12 @@
|
||||
[submodule "contrib/fast_float"]
|
||||
path = contrib/fast_float
|
||||
url = https://github.com/fastfloat/fast_float
|
||||
[submodule "contrib/libpqxx"]
|
||||
path = contrib/libpqxx
|
||||
url = https://github.com/jtv/libpqxx
|
||||
[submodule "contrib/libpq"]
|
||||
path = contrib/libpq
|
||||
url = https://github.com/ClickHouse-Extras/libpq
|
||||
[submodule "contrib/boringssl"]
|
||||
path = contrib/boringssl
|
||||
url = https://github.com/ClickHouse-Extras/boringssl.git
|
||||
|
@ -490,6 +490,7 @@ include (cmake/find/rapidjson.cmake)
|
||||
include (cmake/find/fastops.cmake)
|
||||
include (cmake/find/odbc.cmake)
|
||||
include (cmake/find/rocksdb.cmake)
|
||||
include (cmake/find/libpqxx.cmake)
|
||||
include (cmake/find/nuraft.cmake)
|
||||
|
||||
|
||||
|
31
cmake/find/libpqxx.cmake
Normal file
31
cmake/find/libpqxx.cmake
Normal file
@ -0,0 +1,31 @@
|
||||
option(ENABLE_LIBPQXX "Enalbe libpqxx" ${ENABLE_LIBRARIES})
|
||||
|
||||
if (NOT ENABLE_LIBPQXX)
|
||||
return()
|
||||
endif()
|
||||
|
||||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libpqxx/CMakeLists.txt")
|
||||
message (WARNING "submodule contrib/libpqxx is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal libpqxx library")
|
||||
set (USE_LIBPQXX 0)
|
||||
return()
|
||||
endif()
|
||||
|
||||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libpq/include")
|
||||
message (ERROR "submodule contrib/libpq is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal libpq needed for libpqxx")
|
||||
set (USE_LIBPQXX 0)
|
||||
return()
|
||||
endif()
|
||||
|
||||
if (NOT USE_INTERNAL_SSL_LIBRARY)
|
||||
set (USE_LIBPQXX 0)
|
||||
else ()
|
||||
set (USE_LIBPQXX 1)
|
||||
set (LIBPQXX_LIBRARY libpqxx)
|
||||
set (LIBPQ_LIBRARY libpq)
|
||||
set (LIBPQXX_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libpqxx/include")
|
||||
set (LIBPQ_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/libpq")
|
||||
message (STATUS "Using libpqxx=${USE_LIBPQXX}: ${LIBPQXX_INCLUDE_DIR} : ${LIBPQXX_LIBRARY}")
|
||||
message (STATUS "Using libpq: ${LIBPQ_ROOT_DIR} : ${LIBPQ_INCLUDE_DIR} : ${LIBPQ_LIBRARY}")
|
||||
endif()
|
5
contrib/CMakeLists.txt
vendored
5
contrib/CMakeLists.txt
vendored
@ -310,6 +310,11 @@ if (USE_INTERNAL_ROCKSDB_LIBRARY)
|
||||
add_subdirectory(rocksdb-cmake)
|
||||
endif()
|
||||
|
||||
if (USE_LIBPQXX)
|
||||
add_subdirectory (libpq-cmake)
|
||||
add_subdirectory (libpqxx-cmake)
|
||||
endif()
|
||||
|
||||
if (USE_NURAFT)
|
||||
add_subdirectory(nuraft-cmake)
|
||||
endif()
|
||||
|
1
contrib/libpq
vendored
Submodule
1
contrib/libpq
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 8e7e905854714a7fbb49c124dbc45c7bd4b98e07
|
58
contrib/libpq-cmake/CMakeLists.txt
Normal file
58
contrib/libpq-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,58 @@
|
||||
set(LIBPQ_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libpq)
|
||||
|
||||
set(SRCS
|
||||
${LIBPQ_SOURCE_DIR}/fe-auth.c
|
||||
${LIBPQ_SOURCE_DIR}/fe-auth-scram.c
|
||||
${LIBPQ_SOURCE_DIR}/fe-connect.c
|
||||
${LIBPQ_SOURCE_DIR}/fe-exec.c
|
||||
${LIBPQ_SOURCE_DIR}/fe-lobj.c
|
||||
${LIBPQ_SOURCE_DIR}/fe-misc.c
|
||||
${LIBPQ_SOURCE_DIR}/fe-print.c
|
||||
${LIBPQ_SOURCE_DIR}/fe-protocol2.c
|
||||
${LIBPQ_SOURCE_DIR}/fe-protocol3.c
|
||||
${LIBPQ_SOURCE_DIR}/fe-secure.c
|
||||
${LIBPQ_SOURCE_DIR}/fe-secure-common.c
|
||||
${LIBPQ_SOURCE_DIR}/fe-secure-openssl.c
|
||||
${LIBPQ_SOURCE_DIR}/legacy-pqsignal.c
|
||||
${LIBPQ_SOURCE_DIR}/libpq-events.c
|
||||
${LIBPQ_SOURCE_DIR}/pqexpbuffer.c
|
||||
|
||||
${LIBPQ_SOURCE_DIR}/common/scram-common.c
|
||||
${LIBPQ_SOURCE_DIR}/common/sha2_openssl.c
|
||||
${LIBPQ_SOURCE_DIR}/common/md5.c
|
||||
${LIBPQ_SOURCE_DIR}/common/saslprep.c
|
||||
${LIBPQ_SOURCE_DIR}/common/unicode_norm.c
|
||||
${LIBPQ_SOURCE_DIR}/common/ip.c
|
||||
${LIBPQ_SOURCE_DIR}/common/jsonapi.c
|
||||
${LIBPQ_SOURCE_DIR}/common/wchar.c
|
||||
${LIBPQ_SOURCE_DIR}/common/base64.c
|
||||
${LIBPQ_SOURCE_DIR}/common/link-canary.c
|
||||
${LIBPQ_SOURCE_DIR}/common/fe_memutils.c
|
||||
${LIBPQ_SOURCE_DIR}/common/string.c
|
||||
${LIBPQ_SOURCE_DIR}/common/pg_get_line.c
|
||||
${LIBPQ_SOURCE_DIR}/common/stringinfo.c
|
||||
${LIBPQ_SOURCE_DIR}/common/psprintf.c
|
||||
${LIBPQ_SOURCE_DIR}/common/encnames.c
|
||||
${LIBPQ_SOURCE_DIR}/common/logging.c
|
||||
|
||||
${LIBPQ_SOURCE_DIR}/port/snprintf.c
|
||||
${LIBPQ_SOURCE_DIR}/port/strlcpy.c
|
||||
${LIBPQ_SOURCE_DIR}/port/strerror.c
|
||||
${LIBPQ_SOURCE_DIR}/port/inet_net_ntop.c
|
||||
${LIBPQ_SOURCE_DIR}/port/getpeereid.c
|
||||
${LIBPQ_SOURCE_DIR}/port/chklocale.c
|
||||
${LIBPQ_SOURCE_DIR}/port/noblock.c
|
||||
${LIBPQ_SOURCE_DIR}/port/pg_strong_random.c
|
||||
${LIBPQ_SOURCE_DIR}/port/pgstrcasecmp.c
|
||||
${LIBPQ_SOURCE_DIR}/port/thread.c
|
||||
${LIBPQ_SOURCE_DIR}/port/path.c
|
||||
${LIBPQ_SOURCE_DIR}/port/explicit_bzero.c
|
||||
)
|
||||
|
||||
add_library(libpq ${SRCS})
|
||||
|
||||
target_include_directories (libpq PUBLIC ${LIBPQ_SOURCE_DIR})
|
||||
target_include_directories (libpq PUBLIC ${LIBPQ_SOURCE_DIR}/include)
|
||||
target_include_directories (libpq PRIVATE ${LIBPQ_SOURCE_DIR}/configs)
|
||||
|
||||
target_link_libraries (libpq PRIVATE ssl)
|
1
contrib/libpqxx
vendored
Submodule
1
contrib/libpqxx
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 58d2a028d1600225ac3a478d6b3a06ba2f0c01f6
|
78
contrib/libpqxx-cmake/CMakeLists.txt
Normal file
78
contrib/libpqxx-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,78 @@
|
||||
set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/libpqxx)
|
||||
|
||||
set (SRCS
|
||||
${LIBRARY_DIR}/src/strconv.cxx
|
||||
${LIBRARY_DIR}/src/array.cxx
|
||||
${LIBRARY_DIR}/src/binarystring.cxx
|
||||
${LIBRARY_DIR}/src/connection.cxx
|
||||
${LIBRARY_DIR}/src/cursor.cxx
|
||||
${LIBRARY_DIR}/src/encodings.cxx
|
||||
${LIBRARY_DIR}/src/errorhandler.cxx
|
||||
${LIBRARY_DIR}/src/except.cxx
|
||||
${LIBRARY_DIR}/src/field.cxx
|
||||
${LIBRARY_DIR}/src/largeobject.cxx
|
||||
${LIBRARY_DIR}/src/notification.cxx
|
||||
${LIBRARY_DIR}/src/pipeline.cxx
|
||||
${LIBRARY_DIR}/src/result.cxx
|
||||
${LIBRARY_DIR}/src/robusttransaction.cxx
|
||||
${LIBRARY_DIR}/src/sql_cursor.cxx
|
||||
${LIBRARY_DIR}/src/stream_from.cxx
|
||||
${LIBRARY_DIR}/src/stream_to.cxx
|
||||
${LIBRARY_DIR}/src/subtransaction.cxx
|
||||
${LIBRARY_DIR}/src/transaction.cxx
|
||||
${LIBRARY_DIR}/src/transaction_base.cxx
|
||||
${LIBRARY_DIR}/src/row.cxx
|
||||
${LIBRARY_DIR}/src/util.cxx
|
||||
${LIBRARY_DIR}/src/version.cxx
|
||||
)
|
||||
|
||||
# Need to explicitly include each header file, because in the directory include/pqxx there are also files
|
||||
# like just 'array'. So if including the whole directory with `target_include_directories`, it will make
|
||||
# conflicts with all includes of <array>.
|
||||
set (HDRS
|
||||
${LIBRARY_DIR}/include/pqxx/array.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/binarystring.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/composite.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/connection.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/cursor.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/dbtransaction.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/errorhandler.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/except.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/field.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/isolation.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/largeobject.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/nontransaction.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/notification.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/pipeline.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/prepared_statement.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/result.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/robusttransaction.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/row.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/separated_list.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/strconv.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/stream_from.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/stream_to.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/subtransaction.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/transaction.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/transaction_base.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/types.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/util.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/version.hxx
|
||||
${LIBRARY_DIR}/include/pqxx/zview.hxx
|
||||
)
|
||||
|
||||
add_library(libpqxx ${SRCS} ${HDRS})
|
||||
|
||||
target_link_libraries(libpqxx PUBLIC ${LIBPQ_LIBRARY})
|
||||
target_include_directories (libpqxx PRIVATE ${LIBRARY_DIR}/include)
|
||||
|
||||
# crutch
|
||||
set(CM_CONFIG_H_IN "${LIBRARY_DIR}/include/pqxx/config.h.in")
|
||||
set(CM_CONFIG_PUB "${LIBRARY_DIR}/include/pqxx/config-public-compiler.h")
|
||||
set(CM_CONFIG_INT "${LIBRARY_DIR}/include/pqxx/config-internal-compiler.h")
|
||||
set(CM_CONFIG_PQ "${LIBRARY_DIR}/include/pqxx/config-internal-libpq.h")
|
||||
|
||||
configure_file("${CM_CONFIG_H_IN}" "${CM_CONFIG_INT}" @ONLY)
|
||||
configure_file("${CM_CONFIG_H_IN}" "${CM_CONFIG_PUB}" @ONLY)
|
||||
configure_file("${CM_CONFIG_H_IN}" "${CM_CONFIG_PQ}" @ONLY)
|
||||
|
@ -8,6 +8,7 @@ stage=${stage:-}
|
||||
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
echo "$script_dir"
|
||||
repo_dir=ch
|
||||
BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-11_debug_none_bundled_unsplitted_disable_False_binary"}
|
||||
|
||||
function clone
|
||||
{
|
||||
@ -35,7 +36,7 @@ function download
|
||||
# wget -O- -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/performance/performance.tgz" \
|
||||
# | tar --strip-components=1 -zxv
|
||||
|
||||
wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-11_debug_none_bundled_unsplitted_disable_False_binary/clickhouse"
|
||||
wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"
|
||||
chmod +x clickhouse
|
||||
ln -s ./clickhouse ./clickhouse-server
|
||||
ln -s ./clickhouse ./clickhouse-client
|
||||
|
@ -538,11 +538,11 @@ For case-insensitive search or/and in UTF-8 format use functions `ngramSearchCas
|
||||
!!! note "Note"
|
||||
For UTF-8 case we use 3-gram distance. All these are not perfectly fair n-gram distances. We use 2-byte hashes to hash n-grams and then calculate the (non-)symmetric difference between these hash tables – collisions may occur. With UTF-8 case-insensitive format we do not use fair `tolower` function – we zero the 5-th bit (starting from zero) of each codepoint byte and first bit of zeroth byte if bytes more than one – this works for Latin and mostly for all Cyrillic letters.
|
||||
|
||||
## countSubstrings(haystack, needle) {#countSubstrings}
|
||||
## countSubstrings {#countSubstrings}
|
||||
|
||||
Count the number of substring occurrences
|
||||
Returns the number of substring occurrences.
|
||||
|
||||
For a case-insensitive search, use the function `countSubstringsCaseInsensitive` (or `countSubstringsCaseInsensitiveUTF8`).
|
||||
For a case-insensitive search, use [countSubstringsCaseInsensitive](../../sql-reference/functions/string-search-functions.md#countSubstringsCaseInsensitive) or [countSubstringsCaseInsensitiveUTF8](../../sql-reference/functions/string-search-functions.md#countSubstringsCaseInsensitiveUTF8) functions.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -554,20 +554,20 @@ countSubstrings(haystack, needle[, start_pos])
|
||||
|
||||
- `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `start_pos` – Optional parameter, position of the first character in the string to start search. [UInt](../../sql-reference/data-types/int-uint.md)
|
||||
- `start_pos` – Position of the first character in the string to start search. Optional. [UInt](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- Number of occurrences.
|
||||
|
||||
Type: `Integer`.
|
||||
Type: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT countSubstrings('foobar.com', '.')
|
||||
SELECT countSubstrings('foobar.com', '.');
|
||||
```
|
||||
|
||||
Result:
|
||||
@ -581,7 +581,7 @@ Result:
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT countSubstrings('aaaa', 'aa')
|
||||
SELECT countSubstrings('aaaa', 'aa');
|
||||
```
|
||||
|
||||
Result:
|
||||
@ -592,6 +592,138 @@ Result:
|
||||
└───────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT countSubstrings('abc___abc', 'abc', 4);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─countSubstrings('abc___abc', 'abc', 4)─┐
|
||||
│ 1 │
|
||||
└────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## countSubstringsCaseInsensitive {#countSubstringsCaseInsensitive}
|
||||
|
||||
Returns the number of substring occurrences case-insensitive.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
countSubstringsCaseInsensitive(haystack, needle[, start_pos])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `start_pos` – Position of the first character in the string to start search. Optional. [UInt](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- Number of occurrences.
|
||||
|
||||
Type: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
select countSubstringsCaseInsensitive('aba', 'B');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─countSubstringsCaseInsensitive('aba', 'B')─┐
|
||||
│ 1 │
|
||||
└────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT countSubstringsCaseInsensitive('foobar.com', 'CoM');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─countSubstringsCaseInsensitive('foobar.com', 'CoM')─┐
|
||||
│ 1 │
|
||||
└─────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT countSubstringsCaseInsensitive('abC___abC', 'aBc', 2);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─countSubstringsCaseInsensitive('abC___abC', 'aBc', 2)─┐
|
||||
│ 1 │
|
||||
└───────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## countSubstringsCaseInsensitiveUTF8 {#countSubstringsCaseInsensitiveUTF8}
|
||||
|
||||
Returns the number of substring occurrences in `UTF-8` case-insensitive.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
SELECT countSubstringsCaseInsensitiveUTF8(haystack, needle[, start_pos])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `start_pos` – Position of the first character in the string to start search. Optional. [UInt](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- Number of occurrences.
|
||||
|
||||
Type: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT countSubstringsCaseInsensitiveUTF8('абв', 'A');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─countSubstringsCaseInsensitiveUTF8('абв', 'A')─┐
|
||||
│ 1 │
|
||||
└────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT countSubstringsCaseInsensitiveUTF8('аБв__АбВ__абв', 'Абв');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─countSubstringsCaseInsensitiveUTF8('аБв__АбВ__абв', 'Абв')─┐
|
||||
│ 3 │
|
||||
└────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## countMatches(haystack, pattern) {#countmatcheshaystack-pattern}
|
||||
|
||||
Returns the number of regular expression matches for a `pattern` in a `haystack`.
|
||||
|
@ -573,4 +573,190 @@ SELECT countMatches('aaaa', 'aa');
|
||||
└───────────────────────────────┘
|
||||
```
|
||||
|
||||
## countSubstrings {#countSubstrings}
|
||||
|
||||
Возвращает количество вхождений подстроки.
|
||||
|
||||
Для поиска без учета регистра, используйте функции [countSubstringsCaseInsensitive](../../sql-reference/functions/string-search-functions.md#countSubstringsCaseInsensitive) или [countSubstringsCaseInsensitiveUTF8](../../sql-reference/functions/string-search-functions.md#countSubstringsCaseInsensitiveUTF8)
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
countSubstrings(haystack, needle[, start_pos])
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `haystack` — строка, в которой ведется поиск. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `needle` — искомая подстрока. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `start_pos` – позиция первого символа в строке, с которого начнется поиск. Необязательный параметр. [UInt](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
- Число вхождений.
|
||||
|
||||
Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Примеры**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT countSubstrings('foobar.com', '.');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─countSubstrings('foobar.com', '.')─┐
|
||||
│ 1 │
|
||||
└────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT countSubstrings('aaaa', 'aa');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─countSubstrings('aaaa', 'aa')─┐
|
||||
│ 2 │
|
||||
└───────────────────────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
SELECT countSubstrings('abc___abc', 'abc', 4);
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─countSubstrings('abc___abc', 'abc', 4)─┐
|
||||
│ 1 │
|
||||
└────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## countSubstringsCaseInsensitive {#countSubstringsCaseInsensitive}
|
||||
|
||||
Возвращает количество вхождений подстроки без учета регистра.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
countSubstringsCaseInsensitive(haystack, needle[, start_pos])
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `haystack` — строка, в которой ведется поиск. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `needle` — искомая подстрока. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `start_pos` – позиция первого символа в строке, с которого начнется поиск. Необязательный параметр. [UInt](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
- Число вхождений.
|
||||
|
||||
Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Примеры**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
select countSubstringsCaseInsensitive('aba', 'B');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─countSubstringsCaseInsensitive('aba', 'B')─┐
|
||||
│ 1 │
|
||||
└────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT countSubstringsCaseInsensitive('foobar.com', 'CoM');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─countSubstringsCaseInsensitive('foobar.com', 'CoM')─┐
|
||||
│ 1 │
|
||||
└─────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT countSubstringsCaseInsensitive('abC___abC', 'aBc', 2);
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─countSubstringsCaseInsensitive('abC___abC', 'aBc', 2)─┐
|
||||
│ 1 │
|
||||
└───────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## countSubstringsCaseInsensitiveUTF8 {#countSubstringsCaseInsensitiveUTF8}
|
||||
|
||||
Возвращает количество вхождений подстроки в `UTF-8` без учета регистра.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
SELECT countSubstringsCaseInsensitiveUTF8(haystack, needle[, start_pos])
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `haystack` — строка, в которой ведется поиск. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `needle` — искомая подстрока. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
- `start_pos` – позиция первого символа в строке, с которого начнется поиск. Необязательный параметр. [UInt](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
- Число вхождений.
|
||||
|
||||
Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Примеры**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT countSubstringsCaseInsensitiveUTF8('абв', 'A');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─countSubstringsCaseInsensitiveUTF8('абв', 'A')─┐
|
||||
│ 1 │
|
||||
└────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
SELECT countSubstringsCaseInsensitiveUTF8('аБв__АбВ__абв', 'Абв');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─countSubstringsCaseInsensitiveUTF8('аБв__АбВ__абв', 'Абв')─┐
|
||||
│ 3 │
|
||||
└────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/string_search_functions/) <!--hide-->
|
||||
|
@ -1,6 +1,6 @@
|
||||
## INSERT {#insert}
|
||||
## INSERT INTO 语句 {#insert}
|
||||
|
||||
INSERT查询主要用于向系统中添加数据.
|
||||
INSERT INTO 语句主要用于向系统中添加数据.
|
||||
|
||||
查询的基本格式:
|
||||
|
||||
@ -8,7 +8,52 @@ INSERT查询主要用于向系统中添加数据.
|
||||
INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
|
||||
```
|
||||
|
||||
您可以在查询中指定插入的列的列表,如:`[(c1, c2, c3)]`。对于存在于表结构中但不存在于插入列表中的列,它们将会按照如下方式填充数据:
|
||||
您可以在查询中指定要插入的列的列表,如:`[(c1, c2, c3)]`。您还可以使用列[匹配器](../../sql-reference/statements/select/index.md#asterisk)的表达式,例如`*`和/或[修饰符](../../sql-reference/statements/select/index.md#select-modifiers),例如 [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier)。
|
||||
|
||||
例如,考虑该表:
|
||||
|
||||
``` sql
|
||||
SHOW CREATE insert_select_testtable;
|
||||
```
|
||||
|
||||
```text
|
||||
CREATE TABLE insert_select_testtable
|
||||
(
|
||||
`a` Int8,
|
||||
`b` String,
|
||||
`c` Int8
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY a
|
||||
SETTINGS index_granularity = 8192
|
||||
```
|
||||
|
||||
``` sql
|
||||
INSERT INTO insert_select_testtable (*) VALUES (1, 'a', 1) ;
|
||||
```
|
||||
|
||||
如果要在除了'b'列以外的所有列中插入数据,您需要传递和括号中选择的列数一样多的值:
|
||||
|
||||
``` sql
|
||||
INSERT INTO insert_select_testtable (* EXCEPT(b)) Values (2, 2);
|
||||
```
|
||||
|
||||
``` sql
|
||||
SELECT * FROM insert_select_testtable;
|
||||
```
|
||||
|
||||
```
|
||||
┌─a─┬─b─┬─c─┐
|
||||
│ 2 │ │ 2 │
|
||||
└───┴───┴───┘
|
||||
┌─a─┬─b─┬─c─┐
|
||||
│ 1 │ a │ 1 │
|
||||
└───┴───┴───┘
|
||||
```
|
||||
|
||||
在这个示例中,我们看到插入的第二行的`a`和`c`列的值由传递的值填充,而`b`列由默认值填充。
|
||||
|
||||
对于存在于表结构中但不存在于插入列表中的列,它们将会按照如下方式填充数据:
|
||||
|
||||
- 如果存在`DEFAULT`表达式,根据`DEFAULT`表达式计算被填充的值。
|
||||
- 如果没有定义`DEFAULT`表达式,则填充零或空字符串。
|
||||
|
17
docs/zh/sql-reference/statements/select/all.md
Normal file
17
docs/zh/sql-reference/statements/select/all.md
Normal file
@ -0,0 +1,17 @@
|
||||
# ALL 子句 {#select-all}
|
||||
|
||||
`SELECT ALL` 和 `SELECT` 不带 `DISTINCT` 是一样的。
|
||||
|
||||
- 如果指定了 `ALL` ,则忽略它。
|
||||
- 如果同时指定了 `ALL` 和 `DISTINCT` ,则会抛出异常。
|
||||
|
||||
`ALL` 也可以在聚合函数中指定,具有相同的效果(空操作)。例如:
|
||||
|
||||
```sql
|
||||
SELECT sum(ALL number) FROM numbers(10);
|
||||
```
|
||||
等于
|
||||
|
||||
```sql
|
||||
SELECT sum(number) FROM numbers(10);
|
||||
```
|
@ -159,6 +159,7 @@ enum class AccessType
|
||||
M(REMOTE, "", GLOBAL, SOURCES) \
|
||||
M(MONGO, "", GLOBAL, SOURCES) \
|
||||
M(MYSQL, "", GLOBAL, SOURCES) \
|
||||
M(POSTGRES, "", GLOBAL, SOURCES) \
|
||||
M(ODBC, "", GLOBAL, SOURCES) \
|
||||
M(JDBC, "", GLOBAL, SOURCES) \
|
||||
M(HDFS, "", GLOBAL, SOURCES) \
|
||||
|
@ -79,6 +79,11 @@ if (USE_AMQPCPP)
|
||||
add_headers_and_sources(dbms Storages/RabbitMQ)
|
||||
endif()
|
||||
|
||||
if (USE_LIBPQXX)
|
||||
add_headers_and_sources(dbms Databases/PostgreSQL)
|
||||
add_headers_and_sources(dbms Storages/PostgreSQL)
|
||||
endif()
|
||||
|
||||
if (USE_ROCKSDB)
|
||||
add_headers_and_sources(dbms Storages/RocksDB)
|
||||
endif()
|
||||
@ -439,6 +444,11 @@ if (USE_ROCKSDB)
|
||||
dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${ROCKSDB_INCLUDE_DIR})
|
||||
endif()
|
||||
|
||||
if (USE_LIBPQXX)
|
||||
dbms_target_link_libraries(PUBLIC ${LIBPQXX_LIBRARY})
|
||||
dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${LIBPQXX_INCLUDE_DIR})
|
||||
endif()
|
||||
|
||||
dbms_target_link_libraries(PRIVATE _boost_context)
|
||||
|
||||
if (ENABLE_TESTS AND USE_GTEST)
|
||||
|
@ -86,6 +86,9 @@ public:
|
||||
const ColumnArray & getNestedColumn() const { return assert_cast<const ColumnArray &>(*nested); }
|
||||
ColumnArray & getNestedColumn() { return assert_cast<ColumnArray &>(*nested); }
|
||||
|
||||
const ColumnPtr & getNestedColumnPtr() const { return nested; }
|
||||
ColumnPtr & getNestedColumnPtr() { return nested; }
|
||||
|
||||
const ColumnTuple & getNestedData() const { return assert_cast<const ColumnTuple &>(getNestedColumn().getData()); }
|
||||
ColumnTuple & getNestedData() { return assert_cast<ColumnTuple &>(getNestedColumn().getData()); }
|
||||
};
|
||||
|
@ -143,9 +143,11 @@ public:
|
||||
const IColumn & getNestedColumn() const { return *nested_column; }
|
||||
|
||||
const ColumnPtr & getNestedColumnPtr() const { return nested_column; }
|
||||
ColumnPtr & getNestedColumnPtr() { return nested_column; }
|
||||
|
||||
/// Return the column that represents the byte map.
|
||||
const ColumnPtr & getNullMapColumnPtr() const { return null_map; }
|
||||
ColumnPtr & getNullMapColumnPtr() { return null_map; }
|
||||
|
||||
ColumnUInt8 & getNullMapColumn() { return assert_cast<ColumnUInt8 &>(*null_map); }
|
||||
const ColumnUInt8 & getNullMapColumn() const { return assert_cast<const ColumnUInt8 &>(*null_map); }
|
||||
|
@ -99,6 +99,7 @@ public:
|
||||
Columns getColumnsCopy() const { return {columns.begin(), columns.end()}; }
|
||||
|
||||
const ColumnPtr & getColumnPtr(size_t idx) const { return columns[idx]; }
|
||||
ColumnPtr & getColumnPtr(size_t idx) { return columns[idx]; }
|
||||
|
||||
private:
|
||||
int compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator=nullptr) const;
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <DataTypes/DataTypeDateTime64.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeFixedString.h>
|
||||
#include <DataTypes/DataTypeUUID.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
@ -35,49 +36,53 @@ void ExternalResultDescription::init(const Block & sample_block_)
|
||||
DataTypePtr type_not_nullable = removeNullable(elem.type);
|
||||
const IDataType * type = type_not_nullable.get();
|
||||
|
||||
if (typeid_cast<const DataTypeUInt8 *>(type))
|
||||
WhichDataType which(type);
|
||||
|
||||
if (which.isUInt8())
|
||||
types.emplace_back(ValueType::vtUInt8, is_nullable);
|
||||
else if (typeid_cast<const DataTypeUInt16 *>(type))
|
||||
else if (which.isUInt16())
|
||||
types.emplace_back(ValueType::vtUInt16, is_nullable);
|
||||
else if (typeid_cast<const DataTypeUInt32 *>(type))
|
||||
else if (which.isUInt32())
|
||||
types.emplace_back(ValueType::vtUInt32, is_nullable);
|
||||
else if (typeid_cast<const DataTypeUInt64 *>(type))
|
||||
else if (which.isUInt64())
|
||||
types.emplace_back(ValueType::vtUInt64, is_nullable);
|
||||
else if (typeid_cast<const DataTypeInt8 *>(type))
|
||||
else if (which.isInt8())
|
||||
types.emplace_back(ValueType::vtInt8, is_nullable);
|
||||
else if (typeid_cast<const DataTypeInt16 *>(type))
|
||||
else if (which.isInt16())
|
||||
types.emplace_back(ValueType::vtInt16, is_nullable);
|
||||
else if (typeid_cast<const DataTypeInt32 *>(type))
|
||||
else if (which.isInt32())
|
||||
types.emplace_back(ValueType::vtInt32, is_nullable);
|
||||
else if (typeid_cast<const DataTypeInt64 *>(type))
|
||||
else if (which.isInt64())
|
||||
types.emplace_back(ValueType::vtInt64, is_nullable);
|
||||
else if (typeid_cast<const DataTypeFloat32 *>(type))
|
||||
else if (which.isFloat32())
|
||||
types.emplace_back(ValueType::vtFloat32, is_nullable);
|
||||
else if (typeid_cast<const DataTypeFloat64 *>(type))
|
||||
else if (which.isFloat64())
|
||||
types.emplace_back(ValueType::vtFloat64, is_nullable);
|
||||
else if (typeid_cast<const DataTypeString *>(type))
|
||||
else if (which.isString())
|
||||
types.emplace_back(ValueType::vtString, is_nullable);
|
||||
else if (typeid_cast<const DataTypeDate *>(type))
|
||||
else if (which.isDate())
|
||||
types.emplace_back(ValueType::vtDate, is_nullable);
|
||||
else if (typeid_cast<const DataTypeDateTime *>(type))
|
||||
else if (which.isDateTime())
|
||||
types.emplace_back(ValueType::vtDateTime, is_nullable);
|
||||
else if (typeid_cast<const DataTypeUUID *>(type))
|
||||
else if (which.isUUID())
|
||||
types.emplace_back(ValueType::vtUUID, is_nullable);
|
||||
else if (typeid_cast<const DataTypeEnum8 *>(type))
|
||||
else if (which.isEnum8())
|
||||
types.emplace_back(ValueType::vtString, is_nullable);
|
||||
else if (typeid_cast<const DataTypeEnum16 *>(type))
|
||||
else if (which.isEnum16())
|
||||
types.emplace_back(ValueType::vtString, is_nullable);
|
||||
else if (typeid_cast<const DataTypeDateTime64 *>(type))
|
||||
else if (which.isDateTime64())
|
||||
types.emplace_back(ValueType::vtDateTime64, is_nullable);
|
||||
else if (typeid_cast<const DataTypeDecimal<Decimal32> *>(type))
|
||||
else if (which.isDecimal32())
|
||||
types.emplace_back(ValueType::vtDecimal32, is_nullable);
|
||||
else if (typeid_cast<const DataTypeDecimal<Decimal64> *>(type))
|
||||
else if (which.isDecimal64())
|
||||
types.emplace_back(ValueType::vtDecimal64, is_nullable);
|
||||
else if (typeid_cast<const DataTypeDecimal<Decimal128> *>(type))
|
||||
else if (which.isDecimal128())
|
||||
types.emplace_back(ValueType::vtDecimal128, is_nullable);
|
||||
else if (typeid_cast<const DataTypeDecimal<Decimal256> *>(type))
|
||||
else if (which.isDecimal256())
|
||||
types.emplace_back(ValueType::vtDecimal256, is_nullable);
|
||||
else if (typeid_cast<const DataTypeFixedString *>(type))
|
||||
else if (which.isArray())
|
||||
types.emplace_back(ValueType::vtArray, is_nullable);
|
||||
else if (which.isFixedString())
|
||||
types.emplace_back(ValueType::vtFixedString, is_nullable);
|
||||
else
|
||||
throw Exception{"Unsupported type " + type->getName(), ErrorCodes::UNKNOWN_TYPE};
|
||||
|
@ -31,6 +31,7 @@ struct ExternalResultDescription
|
||||
vtDecimal64,
|
||||
vtDecimal128,
|
||||
vtDecimal256,
|
||||
vtArray,
|
||||
vtFixedString
|
||||
};
|
||||
|
||||
|
@ -17,6 +17,29 @@ namespace ErrorCodes
|
||||
extern const int THERE_IS_NO_COLUMN;
|
||||
}
|
||||
|
||||
NameAndTypePair::NameAndTypePair(
|
||||
const String & name_in_storage_, const String & subcolumn_name_,
|
||||
const DataTypePtr & type_in_storage_, const DataTypePtr & subcolumn_type_)
|
||||
: name(name_in_storage_ + (subcolumn_name_.empty() ? "" : "." + subcolumn_name_))
|
||||
, type(subcolumn_type_)
|
||||
, type_in_storage(type_in_storage_)
|
||||
, subcolumn_delimiter_position(name_in_storage_.size()) {}
|
||||
|
||||
String NameAndTypePair::getNameInStorage() const
|
||||
{
|
||||
if (!subcolumn_delimiter_position)
|
||||
return name;
|
||||
|
||||
return name.substr(0, *subcolumn_delimiter_position);
|
||||
}
|
||||
|
||||
String NameAndTypePair::getSubcolumnName() const
|
||||
{
|
||||
if (!subcolumn_delimiter_position)
|
||||
return "";
|
||||
|
||||
return name.substr(*subcolumn_delimiter_position + 1, name.size() - *subcolumn_delimiter_position);
|
||||
}
|
||||
|
||||
void NamesAndTypesList::readText(ReadBuffer & buf)
|
||||
{
|
||||
@ -137,25 +160,20 @@ NamesAndTypesList NamesAndTypesList::filter(const Names & names) const
|
||||
|
||||
NamesAndTypesList NamesAndTypesList::addTypes(const Names & names) const
|
||||
{
|
||||
/// NOTE: It's better to make a map in `IStorage` than to create it here every time again.
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
google::dense_hash_map<StringRef, const DataTypePtr *, StringRefHash> types;
|
||||
#else
|
||||
google::sparsehash::dense_hash_map<StringRef, const DataTypePtr *, StringRefHash> types;
|
||||
#endif
|
||||
types.set_empty_key(StringRef());
|
||||
std::unordered_map<std::string_view, const NameAndTypePair *> self_columns;
|
||||
|
||||
for (const NameAndTypePair & column : *this)
|
||||
types[column.name] = &column.type;
|
||||
for (const auto & column : *this)
|
||||
self_columns[column.name] = &column;
|
||||
|
||||
NamesAndTypesList res;
|
||||
for (const String & name : names)
|
||||
{
|
||||
auto it = types.find(name);
|
||||
if (it == types.end())
|
||||
auto it = self_columns.find(name);
|
||||
if (it == self_columns.end())
|
||||
throw Exception("No column " + name, ErrorCodes::THERE_IS_NO_COLUMN);
|
||||
res.emplace_back(name, *it->second);
|
||||
res.emplace_back(*it->second);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -15,11 +15,19 @@ namespace DB
|
||||
|
||||
struct NameAndTypePair
|
||||
{
|
||||
String name;
|
||||
DataTypePtr type;
|
||||
public:
|
||||
NameAndTypePair() = default;
|
||||
NameAndTypePair(const String & name_, const DataTypePtr & type_)
|
||||
: name(name_), type(type_), type_in_storage(type_) {}
|
||||
|
||||
NameAndTypePair() {}
|
||||
NameAndTypePair(const String & name_, const DataTypePtr & type_) : name(name_), type(type_) {}
|
||||
NameAndTypePair(const String & name_in_storage_, const String & subcolumn_name_,
|
||||
const DataTypePtr & type_in_storage_, const DataTypePtr & subcolumn_type_);
|
||||
|
||||
String getNameInStorage() const;
|
||||
String getSubcolumnName() const;
|
||||
|
||||
bool isSubcolumn() const { return subcolumn_delimiter_position != std::nullopt; }
|
||||
DataTypePtr getTypeInStorage() const { return type_in_storage; }
|
||||
|
||||
bool operator<(const NameAndTypePair & rhs) const
|
||||
{
|
||||
@ -30,8 +38,26 @@ struct NameAndTypePair
|
||||
{
|
||||
return name == rhs.name && type->equals(*rhs.type);
|
||||
}
|
||||
|
||||
String name;
|
||||
DataTypePtr type;
|
||||
|
||||
private:
|
||||
DataTypePtr type_in_storage;
|
||||
std::optional<size_t> subcolumn_delimiter_position;
|
||||
};
|
||||
|
||||
/// This needed to use structured bindings for NameAndTypePair
|
||||
/// const auto & [name, type] = name_and_type
|
||||
template <int I>
|
||||
decltype(auto) get(const NameAndTypePair & name_and_type)
|
||||
{
|
||||
if constexpr (I == 0)
|
||||
return name_and_type.name;
|
||||
else if constexpr (I == 1)
|
||||
return name_and_type.type;
|
||||
}
|
||||
|
||||
using NamesAndTypes = std::vector<NameAndTypePair>;
|
||||
|
||||
class NamesAndTypesList : public std::list<NameAndTypePair>
|
||||
@ -81,3 +107,10 @@ public:
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
namespace std
|
||||
{
|
||||
template <> struct tuple_size<DB::NameAndTypePair> : std::integral_constant<size_t, 2> {};
|
||||
template <> struct tuple_element<0, DB::NameAndTypePair> { using type = DB::String; };
|
||||
template <> struct tuple_element<1, DB::NameAndTypePair> { using type = DB::DataTypePtr; };
|
||||
}
|
||||
|
@ -405,6 +405,7 @@ class IColumn;
|
||||
M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \
|
||||
M(Bool, enable_global_with_statement, false, "Propagate WITH statements to UNION queries and all subqueries", 0) \
|
||||
M(Bool, aggregate_functions_null_for_empty, false, "Rewrite all aggregate functions in a query, adding -OrNull suffix to them", 0) \
|
||||
M(Bool, flatten_nested, true, "If true, columns of type Nested will be flatten to separate array columns instead of one array of tuples", 0) \
|
||||
M(Bool, asterisk_include_materialized_columns, false, "Include MATERIALIZED columns for wildcard query", 0) \
|
||||
M(Bool, asterisk_include_alias_columns, false, "Include ALIAS columns for wildcard query", 0) \
|
||||
M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \
|
||||
|
@ -12,4 +12,4 @@
|
||||
#cmakedefine01 USE_OPENCL
|
||||
#cmakedefine01 USE_LDAP
|
||||
#cmakedefine01 USE_ROCKSDB
|
||||
|
||||
#cmakedefine01 USE_LIBPQXX
|
||||
|
@ -71,7 +71,7 @@ void NativeBlockInputStream::resetParser()
|
||||
is_killed.store(false);
|
||||
}
|
||||
|
||||
void NativeBlockInputStream::readData(const IDataType & type, IColumn & column, ReadBuffer & istr, size_t rows, double avg_value_size_hint)
|
||||
void NativeBlockInputStream::readData(const IDataType & type, ColumnPtr & column, ReadBuffer & istr, size_t rows, double avg_value_size_hint)
|
||||
{
|
||||
IDataType::DeserializeBinaryBulkSettings settings;
|
||||
settings.getter = [&](IDataType::SubstreamPath) -> ReadBuffer * { return &istr; };
|
||||
@ -82,8 +82,8 @@ void NativeBlockInputStream::readData(const IDataType & type, IColumn & column,
|
||||
type.deserializeBinaryBulkStatePrefix(settings, state);
|
||||
type.deserializeBinaryBulkWithMultipleStreams(column, rows, settings, state);
|
||||
|
||||
if (column.size() != rows)
|
||||
throw Exception("Cannot read all data in NativeBlockInputStream. Rows read: " + toString(column.size()) + ". Rows expected: " + toString(rows) + ".",
|
||||
if (column->size() != rows)
|
||||
throw Exception("Cannot read all data in NativeBlockInputStream. Rows read: " + toString(column->size()) + ". Rows expected: " + toString(rows) + ".",
|
||||
ErrorCodes::CANNOT_READ_ALL_DATA);
|
||||
}
|
||||
|
||||
@ -158,11 +158,11 @@ Block NativeBlockInputStream::readImpl()
|
||||
}
|
||||
|
||||
/// Data
|
||||
MutableColumnPtr read_column = column.type->createColumn();
|
||||
ColumnPtr read_column = column.type->createColumn();
|
||||
|
||||
double avg_value_size_hint = avg_value_size_hints.empty() ? 0 : avg_value_size_hints[i];
|
||||
if (rows) /// If no rows, nothing to read.
|
||||
readData(*column.type, *read_column, istr, rows, avg_value_size_hint);
|
||||
readData(*column.type, read_column, istr, rows, avg_value_size_hint);
|
||||
|
||||
column.column = std::move(read_column);
|
||||
|
||||
|
@ -74,7 +74,7 @@ public:
|
||||
|
||||
String getName() const override { return "Native"; }
|
||||
|
||||
static void readData(const IDataType & type, IColumn & column, ReadBuffer & istr, size_t rows, double avg_value_size_hint);
|
||||
static void readData(const IDataType & type, ColumnPtr & column, ReadBuffer & istr, size_t rows, double avg_value_size_hint);
|
||||
|
||||
Block getHeader() const override;
|
||||
|
||||
|
297
src/DataStreams/PostgreSQLBlockInputStream.cpp
Normal file
297
src/DataStreams/PostgreSQLBlockInputStream.cpp
Normal file
@ -0,0 +1,297 @@
|
||||
#include "PostgreSQLBlockInputStream.h"
|
||||
|
||||
#if USE_LIBPQXX
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <Interpreters/convertFieldToType.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <ext/range.h>
|
||||
#include <common/logger_useful.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
PostgreSQLBlockInputStream::PostgreSQLBlockInputStream(
|
||||
ConnectionPtr connection_,
|
||||
const std::string & query_str_,
|
||||
const Block & sample_block,
|
||||
const UInt64 max_block_size_)
|
||||
: query_str(query_str_)
|
||||
, max_block_size(max_block_size_)
|
||||
, connection(connection_)
|
||||
{
|
||||
description.init(sample_block);
|
||||
for (const auto idx : ext::range(0, description.sample_block.columns()))
|
||||
if (description.types[idx].first == ValueType::vtArray)
|
||||
prepareArrayInfo(idx, description.sample_block.getByPosition(idx).type);
|
||||
/// pqxx::stream_from uses COPY command, will get error if ';' is present
|
||||
if (query_str.ends_with(';'))
|
||||
query_str.resize(query_str.size() - 1);
|
||||
}
|
||||
|
||||
|
||||
void PostgreSQLBlockInputStream::readPrefix()
|
||||
{
|
||||
tx = std::make_unique<pqxx::read_transaction>(*connection);
|
||||
stream = std::make_unique<pqxx::stream_from>(*tx, pqxx::from_query, std::string_view(query_str));
|
||||
}
|
||||
|
||||
|
||||
Block PostgreSQLBlockInputStream::readImpl()
|
||||
{
|
||||
/// Check if pqxx::stream_from is finished
|
||||
if (!stream || !(*stream))
|
||||
return Block();
|
||||
|
||||
MutableColumns columns = description.sample_block.cloneEmptyColumns();
|
||||
size_t num_rows = 0;
|
||||
|
||||
while (true)
|
||||
{
|
||||
const std::vector<pqxx::zview> * row{stream->read_row()};
|
||||
|
||||
/// row is nullptr if pqxx::stream_from is finished
|
||||
if (!row)
|
||||
break;
|
||||
|
||||
for (const auto idx : ext::range(0, row->size()))
|
||||
{
|
||||
const auto & sample = description.sample_block.getByPosition(idx);
|
||||
|
||||
/// if got NULL type, then pqxx::zview will return nullptr in c_str()
|
||||
if ((*row)[idx].c_str())
|
||||
{
|
||||
if (description.types[idx].second)
|
||||
{
|
||||
ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[idx]);
|
||||
const auto & data_type = assert_cast<const DataTypeNullable &>(*sample.type);
|
||||
insertValue(column_nullable.getNestedColumn(), (*row)[idx], description.types[idx].first, data_type.getNestedType(), idx);
|
||||
column_nullable.getNullMapData().emplace_back(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
insertValue(*columns[idx], (*row)[idx], description.types[idx].first, sample.type, idx);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
insertDefaultValue(*columns[idx], *sample.column);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (++num_rows == max_block_size)
|
||||
break;
|
||||
}
|
||||
|
||||
return description.sample_block.cloneWithColumns(std::move(columns));
|
||||
}
|
||||
|
||||
|
||||
void PostgreSQLBlockInputStream::readSuffix()
|
||||
{
|
||||
if (stream)
|
||||
{
|
||||
stream->complete();
|
||||
tx->commit();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void PostgreSQLBlockInputStream::insertValue(IColumn & column, std::string_view value,
|
||||
const ExternalResultDescription::ValueType type, const DataTypePtr data_type, size_t idx)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case ValueType::vtUInt8:
|
||||
assert_cast<ColumnUInt8 &>(column).insertValue(pqxx::from_string<uint16_t>(value));
|
||||
break;
|
||||
case ValueType::vtUInt16:
|
||||
assert_cast<ColumnUInt16 &>(column).insertValue(pqxx::from_string<uint16_t>(value));
|
||||
break;
|
||||
case ValueType::vtUInt32:
|
||||
assert_cast<ColumnUInt32 &>(column).insertValue(pqxx::from_string<uint32_t>(value));
|
||||
break;
|
||||
case ValueType::vtUInt64:
|
||||
assert_cast<ColumnUInt64 &>(column).insertValue(pqxx::from_string<uint64_t>(value));
|
||||
break;
|
||||
case ValueType::vtInt8:
|
||||
assert_cast<ColumnInt8 &>(column).insertValue(pqxx::from_string<int16_t>(value));
|
||||
break;
|
||||
case ValueType::vtInt16:
|
||||
assert_cast<ColumnInt16 &>(column).insertValue(pqxx::from_string<int16_t>(value));
|
||||
break;
|
||||
case ValueType::vtInt32:
|
||||
assert_cast<ColumnInt32 &>(column).insertValue(pqxx::from_string<int32_t>(value));
|
||||
break;
|
||||
case ValueType::vtInt64:
|
||||
assert_cast<ColumnInt64 &>(column).insertValue(pqxx::from_string<int64_t>(value));
|
||||
break;
|
||||
case ValueType::vtFloat32:
|
||||
assert_cast<ColumnFloat32 &>(column).insertValue(pqxx::from_string<float>(value));
|
||||
break;
|
||||
case ValueType::vtFloat64:
|
||||
assert_cast<ColumnFloat64 &>(column).insertValue(pqxx::from_string<double>(value));
|
||||
break;
|
||||
case ValueType::vtFixedString:[[fallthrough]];
|
||||
case ValueType::vtString:
|
||||
assert_cast<ColumnString &>(column).insertData(value.data(), value.size());
|
||||
break;
|
||||
case ValueType::vtUUID:
|
||||
assert_cast<ColumnUInt128 &>(column).insert(parse<UUID>(value.data(), value.size()));
|
||||
break;
|
||||
case ValueType::vtDate:
|
||||
assert_cast<ColumnUInt16 &>(column).insertValue(UInt16{LocalDate{std::string(value)}.getDayNum()});
|
||||
break;
|
||||
case ValueType::vtDateTime:
|
||||
assert_cast<ColumnUInt32 &>(column).insertValue(time_t{LocalDateTime{std::string(value)}});
|
||||
break;
|
||||
case ValueType::vtDateTime64:[[fallthrough]];
|
||||
case ValueType::vtDecimal32: [[fallthrough]];
|
||||
case ValueType::vtDecimal64: [[fallthrough]];
|
||||
case ValueType::vtDecimal128: [[fallthrough]];
|
||||
case ValueType::vtDecimal256:
|
||||
{
|
||||
ReadBufferFromString istr(value);
|
||||
data_type->deserializeAsWholeText(column, istr, FormatSettings{});
|
||||
break;
|
||||
}
|
||||
case ValueType::vtArray:
|
||||
{
|
||||
pqxx::array_parser parser{value};
|
||||
std::pair<pqxx::array_parser::juncture, std::string> parsed = parser.get_next();
|
||||
|
||||
size_t dimension = 0, max_dimension = 0, expected_dimensions = array_info[idx].num_dimensions;
|
||||
const auto parse_value = array_info[idx].pqxx_parser;
|
||||
std::vector<std::vector<Field>> dimensions(expected_dimensions + 1);
|
||||
|
||||
while (parsed.first != pqxx::array_parser::juncture::done)
|
||||
{
|
||||
if ((parsed.first == pqxx::array_parser::juncture::row_start) && (++dimension > expected_dimensions))
|
||||
throw Exception("Got more dimensions than expected", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
else if (parsed.first == pqxx::array_parser::juncture::string_value)
|
||||
dimensions[dimension].emplace_back(parse_value(parsed.second));
|
||||
|
||||
else if (parsed.first == pqxx::array_parser::juncture::null_value)
|
||||
dimensions[dimension].emplace_back(array_info[idx].default_value);
|
||||
|
||||
else if (parsed.first == pqxx::array_parser::juncture::row_end)
|
||||
{
|
||||
max_dimension = std::max(max_dimension, dimension);
|
||||
|
||||
if (--dimension == 0)
|
||||
break;
|
||||
|
||||
dimensions[dimension].emplace_back(Array(dimensions[dimension + 1].begin(), dimensions[dimension + 1].end()));
|
||||
dimensions[dimension + 1].clear();
|
||||
}
|
||||
|
||||
parsed = parser.get_next();
|
||||
}
|
||||
|
||||
if (max_dimension < expected_dimensions)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Got less dimensions than expected. ({} instead of {})", max_dimension, expected_dimensions);
|
||||
|
||||
assert_cast<ColumnArray &>(column).insert(Array(dimensions[1].begin(), dimensions[1].end()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void PostgreSQLBlockInputStream::prepareArrayInfo(size_t column_idx, const DataTypePtr data_type)
|
||||
{
|
||||
const auto * array_type = typeid_cast<const DataTypeArray *>(data_type.get());
|
||||
auto nested = array_type->getNestedType();
|
||||
|
||||
size_t count_dimensions = 1;
|
||||
while (isArray(nested))
|
||||
{
|
||||
++count_dimensions;
|
||||
nested = typeid_cast<const DataTypeArray *>(nested.get())->getNestedType();
|
||||
}
|
||||
|
||||
Field default_value = nested->getDefault();
|
||||
if (nested->isNullable())
|
||||
nested = static_cast<const DataTypeNullable *>(nested.get())->getNestedType();
|
||||
|
||||
WhichDataType which(nested);
|
||||
std::function<Field(std::string & fields)> parser;
|
||||
|
||||
if (which.isUInt8() || which.isUInt16())
|
||||
parser = [](std::string & field) -> Field { return pqxx::from_string<uint16_t>(field); };
|
||||
else if (which.isInt8() || which.isInt16())
|
||||
parser = [](std::string & field) -> Field { return pqxx::from_string<int16_t>(field); };
|
||||
else if (which.isUInt32())
|
||||
parser = [](std::string & field) -> Field { return pqxx::from_string<uint32_t>(field); };
|
||||
else if (which.isInt32())
|
||||
parser = [](std::string & field) -> Field { return pqxx::from_string<int32_t>(field); };
|
||||
else if (which.isUInt64())
|
||||
parser = [](std::string & field) -> Field { return pqxx::from_string<uint64_t>(field); };
|
||||
else if (which.isInt64())
|
||||
parser = [](std::string & field) -> Field { return pqxx::from_string<int64_t>(field); };
|
||||
else if (which.isFloat32())
|
||||
parser = [](std::string & field) -> Field { return pqxx::from_string<float>(field); };
|
||||
else if (which.isFloat64())
|
||||
parser = [](std::string & field) -> Field { return pqxx::from_string<double>(field); };
|
||||
else if (which.isString() || which.isFixedString())
|
||||
parser = [](std::string & field) -> Field { return field; };
|
||||
else if (which.isDate())
|
||||
parser = [](std::string & field) -> Field { return UInt16{LocalDate{field}.getDayNum()}; };
|
||||
else if (which.isDateTime())
|
||||
parser = [](std::string & field) -> Field { return time_t{LocalDateTime{field}}; };
|
||||
else if (which.isDecimal32())
|
||||
parser = [nested](std::string & field) -> Field
|
||||
{
|
||||
const auto & type = typeid_cast<const DataTypeDecimal<Decimal32> *>(nested.get());
|
||||
DataTypeDecimal<Decimal32> res(getDecimalPrecision(*type), getDecimalScale(*type));
|
||||
return convertFieldToType(field, res);
|
||||
};
|
||||
else if (which.isDecimal64())
|
||||
parser = [nested](std::string & field) -> Field
|
||||
{
|
||||
const auto & type = typeid_cast<const DataTypeDecimal<Decimal64> *>(nested.get());
|
||||
DataTypeDecimal<Decimal64> res(getDecimalPrecision(*type), getDecimalScale(*type));
|
||||
return convertFieldToType(field, res);
|
||||
};
|
||||
else if (which.isDecimal128())
|
||||
parser = [nested](std::string & field) -> Field
|
||||
{
|
||||
const auto & type = typeid_cast<const DataTypeDecimal<Decimal128> *>(nested.get());
|
||||
DataTypeDecimal<Decimal128> res(getDecimalPrecision(*type), getDecimalScale(*type));
|
||||
return convertFieldToType(field, res);
|
||||
};
|
||||
else if (which.isDecimal256())
|
||||
parser = [nested](std::string & field) -> Field
|
||||
{
|
||||
const auto & type = typeid_cast<const DataTypeDecimal<Decimal256> *>(nested.get());
|
||||
DataTypeDecimal<Decimal256> res(getDecimalPrecision(*type), getDecimalScale(*type));
|
||||
return convertFieldToType(field, res);
|
||||
};
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Type conversion to {} is not supported", nested->getName());
|
||||
|
||||
array_info[column_idx] = {count_dimensions, default_value, parser};
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
65
src/DataStreams/PostgreSQLBlockInputStream.h
Normal file
65
src/DataStreams/PostgreSQLBlockInputStream.h
Normal file
@ -0,0 +1,65 @@
|
||||
#pragma once
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
#include "config_core.h"
|
||||
#endif
|
||||
|
||||
#if USE_LIBPQXX
|
||||
#include <Core/Block.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <Core/ExternalResultDescription.h>
|
||||
#include <Core/Field.h>
|
||||
#include <pqxx/pqxx>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
using ConnectionPtr = std::shared_ptr<pqxx::connection>;
|
||||
|
||||
class PostgreSQLBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
PostgreSQLBlockInputStream(
|
||||
ConnectionPtr connection_,
|
||||
const std::string & query_str,
|
||||
const Block & sample_block,
|
||||
const UInt64 max_block_size_);
|
||||
|
||||
String getName() const override { return "PostgreSQL"; }
|
||||
Block getHeader() const override { return description.sample_block.cloneEmpty(); }
|
||||
|
||||
private:
|
||||
using ValueType = ExternalResultDescription::ValueType;
|
||||
|
||||
void readPrefix() override;
|
||||
Block readImpl() override;
|
||||
void readSuffix() override;
|
||||
|
||||
void insertValue(IColumn & column, std::string_view value,
|
||||
const ExternalResultDescription::ValueType type, const DataTypePtr data_type, size_t idx);
|
||||
void insertDefaultValue(IColumn & column, const IColumn & sample_column)
|
||||
{
|
||||
column.insertFrom(sample_column, 0);
|
||||
}
|
||||
void prepareArrayInfo(size_t column_idx, const DataTypePtr data_type);
|
||||
|
||||
String query_str;
|
||||
const UInt64 max_block_size;
|
||||
ExternalResultDescription description;
|
||||
|
||||
ConnectionPtr connection;
|
||||
std::unique_ptr<pqxx::read_transaction> tx;
|
||||
std::unique_ptr<pqxx::stream_from> stream;
|
||||
|
||||
struct ArrayInfo
|
||||
{
|
||||
size_t num_dimensions;
|
||||
Field default_value;
|
||||
std::function<Field(std::string & field)> pqxx_parser;
|
||||
};
|
||||
std::unordered_map<size_t, ArrayInfo> array_info;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -12,7 +12,7 @@ NO_COMPILER_WARNINGS()
|
||||
|
||||
|
||||
SRCS(
|
||||
<? find . -name '*.cpp' | grep -v -F tests | sed 's/^\.\// /' | sort ?>
|
||||
<? find . -name '*.cpp' | grep -v -P 'tests|PostgreSQL' | sed 's/^\.\// /' | sort ?>
|
||||
)
|
||||
|
||||
END()
|
||||
|
@ -10,12 +10,15 @@
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeOneElementTuple.h>
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
#include <Core/NamesAndTypes.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -145,10 +148,57 @@ namespace
|
||||
|
||||
offset_values.resize(i);
|
||||
}
|
||||
|
||||
ColumnPtr arrayOffsetsToSizes(const IColumn & column)
|
||||
{
|
||||
const auto & column_offsets = assert_cast<const ColumnArray::ColumnOffsets &>(column);
|
||||
MutableColumnPtr column_sizes = column_offsets.cloneEmpty();
|
||||
|
||||
if (column_offsets.empty())
|
||||
return column_sizes;
|
||||
|
||||
const auto & offsets_data = column_offsets.getData();
|
||||
auto & sizes_data = assert_cast<ColumnArray::ColumnOffsets &>(*column_sizes).getData();
|
||||
|
||||
sizes_data.resize(offsets_data.size());
|
||||
|
||||
IColumn::Offset prev_offset = 0;
|
||||
for (size_t i = 0, size = offsets_data.size(); i < size; ++i)
|
||||
{
|
||||
auto current_offset = offsets_data[i];
|
||||
sizes_data[i] = current_offset - prev_offset;
|
||||
prev_offset = current_offset;
|
||||
}
|
||||
|
||||
return column_sizes;
|
||||
}
|
||||
|
||||
ColumnPtr arraySizesToOffsets(const IColumn & column)
|
||||
{
|
||||
const auto & column_sizes = assert_cast<const ColumnArray::ColumnOffsets &>(column);
|
||||
MutableColumnPtr column_offsets = column_sizes.cloneEmpty();
|
||||
|
||||
if (column_sizes.empty())
|
||||
return column_offsets;
|
||||
|
||||
const auto & sizes_data = column_sizes.getData();
|
||||
auto & offsets_data = assert_cast<ColumnArray::ColumnOffsets &>(*column_offsets).getData();
|
||||
|
||||
offsets_data.resize(sizes_data.size());
|
||||
|
||||
IColumn::Offset prev_offset = 0;
|
||||
for (size_t i = 0, size = sizes_data.size(); i < size; ++i)
|
||||
{
|
||||
prev_offset += sizes_data[i];
|
||||
offsets_data[i] = prev_offset;
|
||||
}
|
||||
|
||||
return column_offsets;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void DataTypeArray::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const
|
||||
void DataTypeArray::enumerateStreamsImpl(const StreamCallback & callback, SubstreamPath & path) const
|
||||
{
|
||||
path.push_back(Substream::ArraySizes);
|
||||
callback(path, *this);
|
||||
@ -158,7 +208,7 @@ void DataTypeArray::enumerateStreams(const StreamCallback & callback, SubstreamP
|
||||
}
|
||||
|
||||
|
||||
void DataTypeArray::serializeBinaryBulkStatePrefix(
|
||||
void DataTypeArray::serializeBinaryBulkStatePrefixImpl(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
@ -168,7 +218,7 @@ void DataTypeArray::serializeBinaryBulkStatePrefix(
|
||||
}
|
||||
|
||||
|
||||
void DataTypeArray::serializeBinaryBulkStateSuffix(
|
||||
void DataTypeArray::serializeBinaryBulkStateSuffixImpl(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
@ -178,7 +228,7 @@ void DataTypeArray::serializeBinaryBulkStateSuffix(
|
||||
}
|
||||
|
||||
|
||||
void DataTypeArray::deserializeBinaryBulkStatePrefix(
|
||||
void DataTypeArray::deserializeBinaryBulkStatePrefixImpl(
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
@ -188,7 +238,7 @@ void DataTypeArray::deserializeBinaryBulkStatePrefix(
|
||||
}
|
||||
|
||||
|
||||
void DataTypeArray::serializeBinaryBulkWithMultipleStreams(
|
||||
void DataTypeArray::serializeBinaryBulkWithMultipleStreamsImpl(
|
||||
const IColumn & column,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
@ -235,44 +285,52 @@ void DataTypeArray::serializeBinaryBulkWithMultipleStreams(
|
||||
}
|
||||
|
||||
|
||||
void DataTypeArray::deserializeBinaryBulkWithMultipleStreams(
|
||||
void DataTypeArray::deserializeBinaryBulkWithMultipleStreamsImpl(
|
||||
IColumn & column,
|
||||
size_t limit,
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache) const
|
||||
{
|
||||
ColumnArray & column_array = typeid_cast<ColumnArray &>(column);
|
||||
|
||||
settings.path.push_back(Substream::ArraySizes);
|
||||
if (auto * stream = settings.getter(settings.path))
|
||||
|
||||
if (auto cached_column = getFromSubstreamsCache(cache, settings.path))
|
||||
{
|
||||
column_array.getOffsetsPtr() = arraySizesToOffsets(*cached_column);
|
||||
}
|
||||
else if (auto * stream = settings.getter(settings.path))
|
||||
{
|
||||
if (settings.position_independent_encoding)
|
||||
deserializeArraySizesPositionIndependent(column, *stream, limit);
|
||||
else
|
||||
DataTypeNumber<ColumnArray::Offset>().deserializeBinaryBulk(column_array.getOffsetsColumn(), *stream, limit, 0);
|
||||
|
||||
addToSubstreamsCache(cache, settings.path, arrayOffsetsToSizes(column_array.getOffsetsColumn()));
|
||||
}
|
||||
|
||||
settings.path.back() = Substream::ArrayElements;
|
||||
|
||||
ColumnArray::Offsets & offset_values = column_array.getOffsets();
|
||||
IColumn & nested_column = column_array.getData();
|
||||
ColumnPtr & nested_column = column_array.getDataPtr();
|
||||
|
||||
/// Number of values corresponding with `offset_values` must be read.
|
||||
size_t last_offset = offset_values.back();
|
||||
if (last_offset < nested_column.size())
|
||||
if (last_offset < nested_column->size())
|
||||
throw Exception("Nested column is longer than last offset", ErrorCodes::LOGICAL_ERROR);
|
||||
size_t nested_limit = last_offset - nested_column.size();
|
||||
size_t nested_limit = last_offset - nested_column->size();
|
||||
|
||||
/// Adjust value size hint. Divide it to the average array size.
|
||||
settings.avg_value_size_hint = nested_limit ? settings.avg_value_size_hint / nested_limit * offset_values.size() : 0;
|
||||
|
||||
nested->deserializeBinaryBulkWithMultipleStreams(nested_column, nested_limit, settings, state);
|
||||
nested->deserializeBinaryBulkWithMultipleStreams(nested_column, nested_limit, settings, state, cache);
|
||||
|
||||
settings.path.pop_back();
|
||||
|
||||
/// Check consistency between offsets and elements subcolumns.
|
||||
/// But if elements column is empty - it's ok for columns of Nested types that was added by ALTER.
|
||||
if (!nested_column.empty() && nested_column.size() != last_offset)
|
||||
throw ParsingException("Cannot read all array values: read just " + toString(nested_column.size()) + " of " + toString(last_offset),
|
||||
if (!nested_column->empty() && nested_column->size() != last_offset)
|
||||
throw ParsingException("Cannot read all array values: read just " + toString(nested_column->size()) + " of " + toString(last_offset),
|
||||
ErrorCodes::CANNOT_READ_ALL_DATA);
|
||||
}
|
||||
|
||||
@ -530,6 +588,44 @@ bool DataTypeArray::equals(const IDataType & rhs) const
|
||||
return typeid(rhs) == typeid(*this) && nested->equals(*static_cast<const DataTypeArray &>(rhs).nested);
|
||||
}
|
||||
|
||||
DataTypePtr DataTypeArray::tryGetSubcolumnType(const String & subcolumn_name) const
|
||||
{
|
||||
return tryGetSubcolumnTypeImpl(subcolumn_name, 0);
|
||||
}
|
||||
|
||||
DataTypePtr DataTypeArray::tryGetSubcolumnTypeImpl(const String & subcolumn_name, size_t level) const
|
||||
{
|
||||
if (subcolumn_name == "size" + std::to_string(level))
|
||||
return createOneElementTuple(std::make_shared<DataTypeUInt64>(), subcolumn_name, false);
|
||||
|
||||
DataTypePtr subcolumn;
|
||||
if (const auto * nested_array = typeid_cast<const DataTypeArray *>(nested.get()))
|
||||
subcolumn = nested_array->tryGetSubcolumnTypeImpl(subcolumn_name, level + 1);
|
||||
else
|
||||
subcolumn = nested->tryGetSubcolumnType(subcolumn_name);
|
||||
|
||||
return (subcolumn ? std::make_shared<DataTypeArray>(std::move(subcolumn)) : subcolumn);
|
||||
}
|
||||
|
||||
ColumnPtr DataTypeArray::getSubcolumn(const String & subcolumn_name, const IColumn & column) const
|
||||
{
|
||||
return getSubcolumnImpl(subcolumn_name, column, 0);
|
||||
}
|
||||
|
||||
ColumnPtr DataTypeArray::getSubcolumnImpl(const String & subcolumn_name, const IColumn & column, size_t level) const
|
||||
{
|
||||
const auto & column_array = assert_cast<const ColumnArray &>(column);
|
||||
if (subcolumn_name == "size" + std::to_string(level))
|
||||
return arrayOffsetsToSizes(column_array.getOffsetsColumn());
|
||||
|
||||
ColumnPtr subcolumn;
|
||||
if (const auto * nested_array = typeid_cast<const DataTypeArray *>(nested.get()))
|
||||
subcolumn = nested_array->getSubcolumnImpl(subcolumn_name, column_array.getData(), level + 1);
|
||||
else
|
||||
subcolumn = nested->getSubcolumn(subcolumn_name, column_array.getData());
|
||||
|
||||
return ColumnArray::create(subcolumn, column_array.getOffsetsPtr());
|
||||
}
|
||||
|
||||
size_t DataTypeArray::getNumberOfDimensions() const
|
||||
{
|
||||
|
@ -57,32 +57,33 @@ public:
|
||||
* This is necessary, because when implementing nested structures, several arrays can have common sizes.
|
||||
*/
|
||||
|
||||
void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const override;
|
||||
void enumerateStreamsImpl(const StreamCallback & callback, SubstreamPath & path) const override;
|
||||
|
||||
void serializeBinaryBulkStatePrefix(
|
||||
void serializeBinaryBulkStatePrefixImpl(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void serializeBinaryBulkStateSuffix(
|
||||
void serializeBinaryBulkStateSuffixImpl(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void deserializeBinaryBulkStatePrefix(
|
||||
void deserializeBinaryBulkStatePrefixImpl(
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void serializeBinaryBulkWithMultipleStreams(
|
||||
void serializeBinaryBulkWithMultipleStreamsImpl(
|
||||
const IColumn & column,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void deserializeBinaryBulkWithMultipleStreams(
|
||||
void deserializeBinaryBulkWithMultipleStreamsImpl(
|
||||
IColumn & column,
|
||||
size_t limit,
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const override;
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache) const override;
|
||||
|
||||
void serializeProtobuf(const IColumn & column,
|
||||
size_t row_num,
|
||||
@ -111,10 +112,17 @@ public:
|
||||
return nested->isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion();
|
||||
}
|
||||
|
||||
DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override;
|
||||
ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override;
|
||||
|
||||
const DataTypePtr & getNestedType() const { return nested; }
|
||||
|
||||
/// 1 for plain array, 2 for array of arrays and so on.
|
||||
size_t getNumberOfDimensions() const;
|
||||
|
||||
private:
|
||||
ColumnPtr getSubcolumnImpl(const String & subcolumn_name, const IColumn & column, size_t level) const;
|
||||
DataTypePtr tryGetSubcolumnTypeImpl(const String & subcolumn_name, size_t level) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <memory>
|
||||
#include <cstddef>
|
||||
#include <Core/Types.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -62,8 +63,51 @@ public:
|
||||
virtual void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const = 0;
|
||||
};
|
||||
|
||||
/** Allows to customize an existing data type by representation with custom substreams.
|
||||
* Customized data type will be serialized/deserialized to files with different names than base type,
|
||||
* but binary and text representation will be unchanged.
|
||||
* E.g it can be used for reading single subcolumns of complex types.
|
||||
*/
|
||||
class IDataTypeCustomStreams
|
||||
{
|
||||
public:
|
||||
virtual ~IDataTypeCustomStreams() = default;
|
||||
|
||||
virtual void enumerateStreams(
|
||||
const IDataType::StreamCallback & callback,
|
||||
IDataType::SubstreamPath & path) const = 0;
|
||||
|
||||
virtual void serializeBinaryBulkStatePrefix(
|
||||
IDataType::SerializeBinaryBulkSettings & settings,
|
||||
IDataType::SerializeBinaryBulkStatePtr & state) const = 0;
|
||||
|
||||
virtual void serializeBinaryBulkStateSuffix(
|
||||
IDataType::SerializeBinaryBulkSettings & settings,
|
||||
IDataType::SerializeBinaryBulkStatePtr & state) const = 0;
|
||||
|
||||
virtual void deserializeBinaryBulkStatePrefix(
|
||||
IDataType::DeserializeBinaryBulkSettings & settings,
|
||||
IDataType::DeserializeBinaryBulkStatePtr & state) const = 0;
|
||||
|
||||
virtual void serializeBinaryBulkWithMultipleStreams(
|
||||
const IColumn & column,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
IDataType::SerializeBinaryBulkSettings & settings,
|
||||
IDataType::SerializeBinaryBulkStatePtr & state) const = 0;
|
||||
|
||||
virtual void deserializeBinaryBulkWithMultipleStreams(
|
||||
ColumnPtr & column,
|
||||
size_t limit,
|
||||
IDataType::DeserializeBinaryBulkSettings & settings,
|
||||
IDataType::DeserializeBinaryBulkStatePtr & state,
|
||||
IDataType::SubstreamsCache * cache) const = 0;
|
||||
};
|
||||
|
||||
using DataTypeCustomNamePtr = std::unique_ptr<const IDataTypeCustomName>;
|
||||
using DataTypeCustomTextSerializationPtr = std::unique_ptr<const IDataTypeCustomTextSerialization>;
|
||||
using DataTypeCustomStreamsPtr = std::unique_ptr<const IDataTypeCustomStreams>;
|
||||
|
||||
|
||||
/** Describe a data type customization
|
||||
*/
|
||||
@ -71,9 +115,15 @@ struct DataTypeCustomDesc
|
||||
{
|
||||
DataTypeCustomNamePtr name;
|
||||
DataTypeCustomTextSerializationPtr text_serialization;
|
||||
DataTypeCustomStreamsPtr streams;
|
||||
|
||||
DataTypeCustomDesc(DataTypeCustomNamePtr name_, DataTypeCustomTextSerializationPtr text_serialization_)
|
||||
: name(std::move(name_)), text_serialization(std::move(text_serialization_)) {}
|
||||
DataTypeCustomDesc(
|
||||
DataTypeCustomNamePtr name_,
|
||||
DataTypeCustomTextSerializationPtr text_serialization_ = nullptr,
|
||||
DataTypeCustomStreamsPtr streams_ = nullptr)
|
||||
: name(std::move(name_))
|
||||
, text_serialization(std::move(text_serialization_))
|
||||
, streams(std::move(streams_)) {}
|
||||
};
|
||||
|
||||
using DataTypeCustomDescPtr = std::unique_ptr<DataTypeCustomDesc>;
|
||||
|
18
src/DataTypes/DataTypeCustom_fwd.h
Normal file
18
src/DataTypes/DataTypeCustom_fwd.h
Normal file
@ -0,0 +1,18 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class IDataTypeCustomName;
|
||||
class IDataTypeCustomTextSerialization;
|
||||
class IDataTypeCustomStreams;
|
||||
struct DataTypeCustomDesc;
|
||||
|
||||
using DataTypeCustomNamePtr = std::unique_ptr<const IDataTypeCustomName>;
|
||||
using DataTypeCustomTextSerializationPtr = std::unique_ptr<const IDataTypeCustomTextSerialization>;
|
||||
using DataTypeCustomStreamsPtr = std::unique_ptr<const IDataTypeCustomStreams>;
|
||||
using DataTypeCustomDescPtr = std::unique_ptr<DataTypeCustomDesc>;
|
||||
|
||||
}
|
@ -79,6 +79,16 @@ DataTypePtr DataTypeFactory::get(const String & family_name_param, const ASTPtr
|
||||
return findCreatorByName(family_name)(parameters);
|
||||
}
|
||||
|
||||
DataTypePtr DataTypeFactory::getCustom(DataTypeCustomDescPtr customization) const
|
||||
{
|
||||
if (!customization->name)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create custom type without name");
|
||||
|
||||
auto type = get(customization->name->getName());
|
||||
type->setCustomization(std::move(customization));
|
||||
return type;
|
||||
}
|
||||
|
||||
|
||||
void DataTypeFactory::registerDataType(const String & family_name, Value creator, CaseSensitiveness case_sensitiveness)
|
||||
{
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
#include <Common/IFactoryWithAliases.h>
|
||||
#include <DataTypes/DataTypeCustom_fwd.h>
|
||||
|
||||
|
||||
#include <functional>
|
||||
@ -33,6 +34,7 @@ public:
|
||||
DataTypePtr get(const String & full_name) const;
|
||||
DataTypePtr get(const String & family_name, const ASTPtr & parameters) const;
|
||||
DataTypePtr get(const ASTPtr & ast) const;
|
||||
DataTypePtr getCustom(DataTypeCustomDescPtr customization) const;
|
||||
|
||||
/// Register a type family by its name.
|
||||
void registerDataType(const String & family_name, Value creator, CaseSensitiveness case_sensitiveness = CaseSensitive);
|
||||
@ -84,5 +86,6 @@ void registerDataTypeLowCardinality(DataTypeFactory & factory);
|
||||
void registerDataTypeDomainIPv4AndIPv6(DataTypeFactory & factory);
|
||||
void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory);
|
||||
void registerDataTypeDomainGeo(DataTypeFactory & factory);
|
||||
void registerDataTypeOneElementTuple(DataTypeFactory & factory);
|
||||
|
||||
}
|
||||
|
@ -50,7 +50,7 @@ DataTypeLowCardinality::DataTypeLowCardinality(DataTypePtr dictionary_type_)
|
||||
+ dictionary_type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
void DataTypeLowCardinality::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const
|
||||
void DataTypeLowCardinality::enumerateStreamsImpl(const StreamCallback & callback, SubstreamPath & path) const
|
||||
{
|
||||
path.push_back(Substream::DictionaryKeys);
|
||||
dictionary_type->enumerateStreams(callback, path);
|
||||
@ -243,7 +243,7 @@ static DeserializeStateLowCardinality * checkAndGetLowCardinalityDeserializeStat
|
||||
return low_cardinality_state;
|
||||
}
|
||||
|
||||
void DataTypeLowCardinality::serializeBinaryBulkStatePrefix(
|
||||
void DataTypeLowCardinality::serializeBinaryBulkStatePrefixImpl(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
@ -263,7 +263,7 @@ void DataTypeLowCardinality::serializeBinaryBulkStatePrefix(
|
||||
state = std::make_shared<SerializeStateLowCardinality>(key_version);
|
||||
}
|
||||
|
||||
void DataTypeLowCardinality::serializeBinaryBulkStateSuffix(
|
||||
void DataTypeLowCardinality::serializeBinaryBulkStateSuffixImpl(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
@ -289,7 +289,7 @@ void DataTypeLowCardinality::serializeBinaryBulkStateSuffix(
|
||||
}
|
||||
}
|
||||
|
||||
void DataTypeLowCardinality::deserializeBinaryBulkStatePrefix(
|
||||
void DataTypeLowCardinality::deserializeBinaryBulkStatePrefixImpl(
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
@ -482,7 +482,7 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
void DataTypeLowCardinality::serializeBinaryBulkWithMultipleStreams(
|
||||
void DataTypeLowCardinality::serializeBinaryBulkWithMultipleStreamsImpl(
|
||||
const IColumn & column,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
@ -579,11 +579,12 @@ void DataTypeLowCardinality::serializeBinaryBulkWithMultipleStreams(
|
||||
index_version.getDataType()->serializeBinaryBulk(*positions, *indexes_stream, 0, num_rows);
|
||||
}
|
||||
|
||||
void DataTypeLowCardinality::deserializeBinaryBulkWithMultipleStreams(
|
||||
void DataTypeLowCardinality::deserializeBinaryBulkWithMultipleStreamsImpl(
|
||||
IColumn & column,
|
||||
size_t limit,
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * /* cache */) const
|
||||
{
|
||||
ColumnLowCardinality & low_cardinality_column = typeid_cast<ColumnLowCardinality &>(column);
|
||||
|
||||
|
@ -22,32 +22,33 @@ public:
|
||||
const char * getFamilyName() const override { return "LowCardinality"; }
|
||||
TypeIndex getTypeId() const override { return TypeIndex::LowCardinality; }
|
||||
|
||||
void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const override;
|
||||
void enumerateStreamsImpl(const StreamCallback & callback, SubstreamPath & path) const override;
|
||||
|
||||
void serializeBinaryBulkStatePrefix(
|
||||
void serializeBinaryBulkStatePrefixImpl(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void serializeBinaryBulkStateSuffix(
|
||||
void serializeBinaryBulkStateSuffixImpl(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void deserializeBinaryBulkStatePrefix(
|
||||
void deserializeBinaryBulkStatePrefixImpl(
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void serializeBinaryBulkWithMultipleStreams(
|
||||
void serializeBinaryBulkWithMultipleStreamsImpl(
|
||||
const IColumn & column,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void deserializeBinaryBulkWithMultipleStreams(
|
||||
void deserializeBinaryBulkWithMultipleStreamsImpl(
|
||||
IColumn & column,
|
||||
size_t limit,
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const override;
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache) const override;
|
||||
|
||||
void serializeBinary(const Field & field, WriteBuffer & ostr) const override;
|
||||
void deserializeBinary(Field & field, ReadBuffer & istr) const override;
|
||||
|
@ -278,34 +278,34 @@ void DataTypeMap::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const
|
||||
}
|
||||
|
||||
|
||||
void DataTypeMap::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const
|
||||
void DataTypeMap::enumerateStreamsImpl(const StreamCallback & callback, SubstreamPath & path) const
|
||||
{
|
||||
nested->enumerateStreams(callback, path);
|
||||
}
|
||||
|
||||
void DataTypeMap::serializeBinaryBulkStatePrefix(
|
||||
void DataTypeMap::serializeBinaryBulkStatePrefixImpl(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
nested->serializeBinaryBulkStatePrefix(settings, state);
|
||||
}
|
||||
|
||||
void DataTypeMap::serializeBinaryBulkStateSuffix(
|
||||
void DataTypeMap::serializeBinaryBulkStateSuffixImpl(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
nested->serializeBinaryBulkStateSuffix(settings, state);
|
||||
}
|
||||
|
||||
void DataTypeMap::deserializeBinaryBulkStatePrefix(
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const
|
||||
void DataTypeMap::deserializeBinaryBulkStatePrefixImpl(
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
nested->deserializeBinaryBulkStatePrefix(settings, state);
|
||||
}
|
||||
|
||||
|
||||
void DataTypeMap::serializeBinaryBulkWithMultipleStreams(
|
||||
void DataTypeMap::serializeBinaryBulkWithMultipleStreamsImpl(
|
||||
const IColumn & column,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
@ -315,13 +315,15 @@ void DataTypeMap::serializeBinaryBulkWithMultipleStreams(
|
||||
nested->serializeBinaryBulkWithMultipleStreams(extractNestedColumn(column), offset, limit, settings, state);
|
||||
}
|
||||
|
||||
void DataTypeMap::deserializeBinaryBulkWithMultipleStreams(
|
||||
void DataTypeMap::deserializeBinaryBulkWithMultipleStreamsImpl(
|
||||
IColumn & column,
|
||||
size_t limit,
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache) const
|
||||
{
|
||||
nested->deserializeBinaryBulkWithMultipleStreams(extractNestedColumn(column), limit, settings, state);
|
||||
auto & column_map = assert_cast<ColumnMap &>(column);
|
||||
nested->deserializeBinaryBulkWithMultipleStreams(column_map.getNestedColumnPtr(), limit, settings, state, cache);
|
||||
}
|
||||
|
||||
void DataTypeMap::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
|
||||
|
@ -46,34 +46,33 @@ public:
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
|
||||
|
||||
|
||||
/** Each sub-column in a map is serialized in separate stream.
|
||||
*/
|
||||
void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const override;
|
||||
void enumerateStreamsImpl(const StreamCallback & callback, SubstreamPath & path) const override;
|
||||
|
||||
void serializeBinaryBulkStatePrefix(
|
||||
void serializeBinaryBulkStatePrefixImpl(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void serializeBinaryBulkStateSuffix(
|
||||
void serializeBinaryBulkStateSuffixImpl(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void deserializeBinaryBulkStatePrefix(
|
||||
void deserializeBinaryBulkStatePrefixImpl(
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void serializeBinaryBulkWithMultipleStreams(
|
||||
void serializeBinaryBulkWithMultipleStreamsImpl(
|
||||
const IColumn & column,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void deserializeBinaryBulkWithMultipleStreams(
|
||||
void deserializeBinaryBulkWithMultipleStreamsImpl(
|
||||
IColumn & column,
|
||||
size_t limit,
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const override;
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache) const override;
|
||||
|
||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
||||
|
76
src/DataTypes/DataTypeNested.cpp
Normal file
76
src/DataTypes/DataTypeNested.cpp
Normal file
@ -0,0 +1,76 @@
|
||||
#include <DataTypes/DataTypeNested.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <Parsers/ASTNameTypePair.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int EMPTY_DATA_PASSED;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
String DataTypeNestedCustomName::getName() const
|
||||
{
|
||||
WriteBufferFromOwnString s;
|
||||
s << "Nested(";
|
||||
for (size_t i = 0; i < elems.size(); ++i)
|
||||
{
|
||||
if (i != 0)
|
||||
s << ", ";
|
||||
|
||||
s << backQuoteIfNeed(names[i]) << ' ';
|
||||
s << elems[i]->getName();
|
||||
}
|
||||
s << ")";
|
||||
|
||||
return s.str();
|
||||
}
|
||||
|
||||
static std::pair<DataTypePtr, DataTypeCustomDescPtr> create(const ASTPtr & arguments)
|
||||
{
|
||||
if (!arguments || arguments->children.empty())
|
||||
throw Exception("Nested cannot be empty", ErrorCodes::EMPTY_DATA_PASSED);
|
||||
|
||||
DataTypes nested_types;
|
||||
Strings nested_names;
|
||||
nested_types.reserve(arguments->children.size());
|
||||
nested_names.reserve(arguments->children.size());
|
||||
|
||||
for (const auto & child : arguments->children)
|
||||
{
|
||||
const auto * name_type = child->as<ASTNameTypePair>();
|
||||
if (!name_type)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Data type Nested accepts only pairs with name and type");
|
||||
|
||||
auto nested_type = DataTypeFactory::instance().get(name_type->type);
|
||||
nested_types.push_back(std::move(nested_type));
|
||||
nested_names.push_back(name_type->name);
|
||||
}
|
||||
|
||||
auto data_type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(nested_types, nested_names));
|
||||
auto custom_name = std::make_unique<DataTypeNestedCustomName>(nested_types, nested_names);
|
||||
|
||||
return std::make_pair(std::move(data_type), std::make_unique<DataTypeCustomDesc>(std::move(custom_name), nullptr));
|
||||
}
|
||||
|
||||
void registerDataTypeNested(DataTypeFactory & factory)
|
||||
{
|
||||
return factory.registerDataTypeCustom("Nested", create);
|
||||
}
|
||||
|
||||
DataTypePtr createNested(const DataTypes & types, const Names & names)
|
||||
{
|
||||
auto custom_desc = std::make_unique<DataTypeCustomDesc>(
|
||||
std::make_unique<DataTypeNestedCustomName>(types, names));
|
||||
|
||||
return DataTypeFactory::instance().getCustom(std::move(custom_desc));
|
||||
}
|
||||
|
||||
}
|
34
src/DataTypes/DataTypeNested.h
Normal file
34
src/DataTypes/DataTypeNested.h
Normal file
@ -0,0 +1,34 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataTypes/DataTypeWithSimpleSerialization.h>
|
||||
#include <DataTypes/DataTypeCustom.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class DataTypeNestedCustomName final : public IDataTypeCustomName
|
||||
{
|
||||
private:
|
||||
DataTypes elems;
|
||||
Strings names;
|
||||
|
||||
public:
|
||||
DataTypeNestedCustomName(const DataTypes & elems_, const Strings & names_)
|
||||
: elems(elems_), names(names_)
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override;
|
||||
};
|
||||
|
||||
DataTypePtr createNested(const DataTypes & types, const Names & names);
|
||||
|
||||
template <typename DataType>
|
||||
inline bool isNested(const DataType & data_type)
|
||||
{
|
||||
return typeid_cast<const DataTypeNestedCustomName *>(data_type->getCustomName()) != nullptr;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <DataTypes/DataTypeNothing.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeOneElementTuple.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Core/Field.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
@ -41,7 +42,7 @@ bool DataTypeNullable::onlyNull() const
|
||||
}
|
||||
|
||||
|
||||
void DataTypeNullable::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const
|
||||
void DataTypeNullable::enumerateStreamsImpl(const StreamCallback & callback, SubstreamPath & path) const
|
||||
{
|
||||
path.push_back(Substream::NullMap);
|
||||
callback(path, *this);
|
||||
@ -51,7 +52,7 @@ void DataTypeNullable::enumerateStreams(const StreamCallback & callback, Substre
|
||||
}
|
||||
|
||||
|
||||
void DataTypeNullable::serializeBinaryBulkStatePrefix(
|
||||
void DataTypeNullable::serializeBinaryBulkStatePrefixImpl(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
@ -61,7 +62,7 @@ void DataTypeNullable::serializeBinaryBulkStatePrefix(
|
||||
}
|
||||
|
||||
|
||||
void DataTypeNullable::serializeBinaryBulkStateSuffix(
|
||||
void DataTypeNullable::serializeBinaryBulkStateSuffixImpl(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
@ -71,7 +72,7 @@ void DataTypeNullable::serializeBinaryBulkStateSuffix(
|
||||
}
|
||||
|
||||
|
||||
void DataTypeNullable::deserializeBinaryBulkStatePrefix(
|
||||
void DataTypeNullable::deserializeBinaryBulkStatePrefixImpl(
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
@ -81,7 +82,7 @@ void DataTypeNullable::deserializeBinaryBulkStatePrefix(
|
||||
}
|
||||
|
||||
|
||||
void DataTypeNullable::serializeBinaryBulkWithMultipleStreams(
|
||||
void DataTypeNullable::serializeBinaryBulkWithMultipleStreamsImpl(
|
||||
const IColumn & column,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
@ -103,20 +104,28 @@ void DataTypeNullable::serializeBinaryBulkWithMultipleStreams(
|
||||
}
|
||||
|
||||
|
||||
void DataTypeNullable::deserializeBinaryBulkWithMultipleStreams(
|
||||
void DataTypeNullable::deserializeBinaryBulkWithMultipleStreamsImpl(
|
||||
IColumn & column,
|
||||
size_t limit,
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache) const
|
||||
{
|
||||
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
|
||||
|
||||
settings.path.push_back(Substream::NullMap);
|
||||
if (auto * stream = settings.getter(settings.path))
|
||||
if (auto cached_column = getFromSubstreamsCache(cache, settings.path))
|
||||
{
|
||||
col.getNullMapColumnPtr() = cached_column;
|
||||
}
|
||||
else if (auto * stream = settings.getter(settings.path))
|
||||
{
|
||||
DataTypeUInt8().deserializeBinaryBulk(col.getNullMapColumn(), *stream, limit, 0);
|
||||
addToSubstreamsCache(cache, settings.path, col.getNullMapColumnPtr());
|
||||
}
|
||||
|
||||
settings.path.back() = Substream::NullableElements;
|
||||
nested_data_type->deserializeBinaryBulkWithMultipleStreams(col.getNestedColumn(), limit, settings, state);
|
||||
nested_data_type->deserializeBinaryBulkWithMultipleStreams(col.getNestedColumnPtr(), limit, settings, state, cache);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
@ -525,6 +534,23 @@ bool DataTypeNullable::equals(const IDataType & rhs) const
|
||||
return rhs.isNullable() && nested_data_type->equals(*static_cast<const DataTypeNullable &>(rhs).nested_data_type);
|
||||
}
|
||||
|
||||
DataTypePtr DataTypeNullable::tryGetSubcolumnType(const String & subcolumn_name) const
|
||||
{
|
||||
if (subcolumn_name == "null")
|
||||
return createOneElementTuple(std::make_shared<DataTypeUInt8>(), subcolumn_name, false);
|
||||
|
||||
return nested_data_type->tryGetSubcolumnType(subcolumn_name);
|
||||
}
|
||||
|
||||
ColumnPtr DataTypeNullable::getSubcolumn(const String & subcolumn_name, const IColumn & column) const
|
||||
{
|
||||
const auto & column_nullable = assert_cast<const ColumnNullable &>(column);
|
||||
if (subcolumn_name == "null")
|
||||
return column_nullable.getNullMapColumnPtr()->assumeMutable();
|
||||
|
||||
return nested_data_type->getSubcolumn(subcolumn_name, column_nullable.getNestedColumn());
|
||||
}
|
||||
|
||||
|
||||
static DataTypePtr create(const ASTPtr & arguments)
|
||||
{
|
||||
|
@ -18,32 +18,33 @@ public:
|
||||
const char * getFamilyName() const override { return "Nullable"; }
|
||||
TypeIndex getTypeId() const override { return TypeIndex::Nullable; }
|
||||
|
||||
void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const override;
|
||||
void enumerateStreamsImpl(const StreamCallback & callback, SubstreamPath & path) const override;
|
||||
|
||||
void serializeBinaryBulkStatePrefix(
|
||||
void serializeBinaryBulkStatePrefixImpl(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void serializeBinaryBulkStateSuffix(
|
||||
void serializeBinaryBulkStateSuffixImpl(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void deserializeBinaryBulkStatePrefix(
|
||||
void deserializeBinaryBulkStatePrefixImpl(
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void serializeBinaryBulkWithMultipleStreams(
|
||||
void serializeBinaryBulkWithMultipleStreamsImpl(
|
||||
const IColumn & column,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void deserializeBinaryBulkWithMultipleStreams(
|
||||
void deserializeBinaryBulkWithMultipleStreamsImpl(
|
||||
IColumn & column,
|
||||
size_t limit,
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const override;
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache) const override;
|
||||
|
||||
void serializeBinary(const Field & field, WriteBuffer & ostr) const override;
|
||||
void deserializeBinary(Field & field, ReadBuffer & istr) const override;
|
||||
@ -97,6 +98,8 @@ public:
|
||||
size_t getSizeOfValueInMemory() const override;
|
||||
bool onlyNull() const override;
|
||||
bool canBeInsideLowCardinality() const override { return nested_data_type->canBeInsideLowCardinality(); }
|
||||
DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override;
|
||||
ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override;
|
||||
|
||||
const DataTypePtr & getNestedType() const { return nested_data_type; }
|
||||
|
||||
|
112
src/DataTypes/DataTypeOneElementTuple.cpp
Normal file
112
src/DataTypes/DataTypeOneElementTuple.cpp
Normal file
@ -0,0 +1,112 @@
|
||||
#include <DataTypes/DataTypeOneElementTuple.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeCustom.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <Parsers/ASTNameTypePair.h>
|
||||
#include <Columns/IColumn.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
/** Custom substreams representation for single subcolumn.
|
||||
* It serializes/deserializes column as a nested type, but in that way
|
||||
* if it was a named tuple with one element and a given name.
|
||||
*/
|
||||
class DataTypeOneElementTupleStreams : public IDataTypeCustomStreams
|
||||
{
|
||||
private:
|
||||
DataTypePtr nested;
|
||||
String name;
|
||||
bool escape_delimiter;
|
||||
|
||||
public:
|
||||
DataTypeOneElementTupleStreams(const DataTypePtr & nested_, const String & name_, bool escape_delimiter_)
|
||||
: nested(nested_), name(name_), escape_delimiter(escape_delimiter_) {}
|
||||
|
||||
void enumerateStreams(
|
||||
const IDataType::StreamCallback & callback,
|
||||
IDataType::SubstreamPath & path) const override
|
||||
{
|
||||
addToPath(path);
|
||||
nested->enumerateStreams(callback, path);
|
||||
path.pop_back();
|
||||
}
|
||||
|
||||
void serializeBinaryBulkStatePrefix(
|
||||
IDataType:: SerializeBinaryBulkSettings & settings,
|
||||
IDataType::SerializeBinaryBulkStatePtr & state) const override
|
||||
{
|
||||
addToPath(settings.path);
|
||||
nested->serializeBinaryBulkStatePrefix(settings, state);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
void serializeBinaryBulkStateSuffix(
|
||||
IDataType::SerializeBinaryBulkSettings & settings,
|
||||
IDataType::SerializeBinaryBulkStatePtr & state) const override
|
||||
{
|
||||
addToPath(settings.path);
|
||||
nested->serializeBinaryBulkStateSuffix(settings, state);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
void deserializeBinaryBulkStatePrefix(
|
||||
IDataType::DeserializeBinaryBulkSettings & settings,
|
||||
IDataType::DeserializeBinaryBulkStatePtr & state) const override
|
||||
{
|
||||
addToPath(settings.path);
|
||||
nested->deserializeBinaryBulkStatePrefix(settings, state);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
void serializeBinaryBulkWithMultipleStreams(
|
||||
const IColumn & column,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
IDataType::SerializeBinaryBulkSettings & settings,
|
||||
IDataType::SerializeBinaryBulkStatePtr & state) const override
|
||||
{
|
||||
addToPath(settings.path);
|
||||
nested->serializeBinaryBulkWithMultipleStreams(column, offset, limit, settings, state);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
void deserializeBinaryBulkWithMultipleStreams(
|
||||
ColumnPtr & column,
|
||||
size_t limit,
|
||||
IDataType::DeserializeBinaryBulkSettings & settings,
|
||||
IDataType::DeserializeBinaryBulkStatePtr & state,
|
||||
IDataType::SubstreamsCache * cache) const override
|
||||
{
|
||||
addToPath(settings.path);
|
||||
nested->deserializeBinaryBulkWithMultipleStreams(column, limit, settings, state, cache);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
private:
|
||||
void addToPath(IDataType::SubstreamPath & path) const
|
||||
{
|
||||
path.push_back(IDataType::Substream::TupleElement);
|
||||
path.back().tuple_element_name = name;
|
||||
path.back().escape_tuple_delimiter = escape_delimiter;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
DataTypePtr createOneElementTuple(const DataTypePtr & type, const String & name, bool escape_delimiter)
|
||||
{
|
||||
auto custom_desc = std::make_unique<DataTypeCustomDesc>(
|
||||
std::make_unique<DataTypeCustomFixedName>(type->getName()),nullptr,
|
||||
std::make_unique<DataTypeOneElementTupleStreams>(type, name, escape_delimiter));
|
||||
|
||||
return DataTypeFactory::instance().getCustom(std::move(custom_desc));
|
||||
}
|
||||
|
||||
}
|
10
src/DataTypes/DataTypeOneElementTuple.h
Normal file
10
src/DataTypes/DataTypeOneElementTuple.h
Normal file
@ -0,0 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataTypes/IDataType.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
DataTypePtr createOneElementTuple(const DataTypePtr & type, const String & name, bool escape_delimiter = true);
|
||||
|
||||
}
|
@ -5,6 +5,7 @@
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeOneElementTuple.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/ASTNameTypePair.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
@ -30,6 +31,7 @@ namespace ErrorCodes
|
||||
extern const int EMPTY_DATA_PASSED;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int NOT_FOUND_COLUMN_IN_BLOCK;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH;
|
||||
}
|
||||
@ -357,7 +359,7 @@ void DataTypeTuple::deserializeTextCSV(IColumn & column, ReadBuffer & istr, cons
|
||||
});
|
||||
}
|
||||
|
||||
void DataTypeTuple::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const
|
||||
void DataTypeTuple::enumerateStreamsImpl(const StreamCallback & callback, SubstreamPath & path) const
|
||||
{
|
||||
path.push_back(Substream::TupleElement);
|
||||
for (const auto i : ext::range(0, ext::size(elems)))
|
||||
@ -412,7 +414,7 @@ static DeserializeBinaryBulkStateTuple * checkAndGetTupleDeserializeState(IDataT
|
||||
return tuple_state;
|
||||
}
|
||||
|
||||
void DataTypeTuple::serializeBinaryBulkStatePrefix(
|
||||
void DataTypeTuple::serializeBinaryBulkStatePrefixImpl(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
@ -430,7 +432,7 @@ void DataTypeTuple::serializeBinaryBulkStatePrefix(
|
||||
state = std::move(tuple_state);
|
||||
}
|
||||
|
||||
void DataTypeTuple::serializeBinaryBulkStateSuffix(
|
||||
void DataTypeTuple::serializeBinaryBulkStateSuffixImpl(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
@ -445,7 +447,7 @@ void DataTypeTuple::serializeBinaryBulkStateSuffix(
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
void DataTypeTuple::deserializeBinaryBulkStatePrefix(
|
||||
void DataTypeTuple::deserializeBinaryBulkStatePrefixImpl(
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
@ -463,7 +465,7 @@ void DataTypeTuple::deserializeBinaryBulkStatePrefix(
|
||||
state = std::move(tuple_state);
|
||||
}
|
||||
|
||||
void DataTypeTuple::serializeBinaryBulkWithMultipleStreams(
|
||||
void DataTypeTuple::serializeBinaryBulkWithMultipleStreamsImpl(
|
||||
const IColumn & column,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
@ -482,21 +484,22 @@ void DataTypeTuple::serializeBinaryBulkWithMultipleStreams(
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
void DataTypeTuple::deserializeBinaryBulkWithMultipleStreams(
|
||||
void DataTypeTuple::deserializeBinaryBulkWithMultipleStreamsImpl(
|
||||
IColumn & column,
|
||||
size_t limit,
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache) const
|
||||
{
|
||||
auto * tuple_state = checkAndGetTupleDeserializeState(state);
|
||||
auto & column_tuple = assert_cast<ColumnTuple &>(column);
|
||||
|
||||
settings.path.push_back(Substream::TupleElement);
|
||||
settings.avg_value_size_hint = 0;
|
||||
for (const auto i : ext::range(0, ext::size(elems)))
|
||||
{
|
||||
settings.path.back().tuple_element_name = names[i];
|
||||
auto & element_col = extractElementColumn(column, i);
|
||||
elems[i]->deserializeBinaryBulkWithMultipleStreams(element_col, limit, settings, tuple_state->states[i]);
|
||||
elems[i]->deserializeBinaryBulkWithMultipleStreams(column_tuple.getColumnPtr(i), limit, settings, tuple_state->states[i], cache);
|
||||
}
|
||||
settings.path.pop_back();
|
||||
}
|
||||
@ -611,6 +614,47 @@ size_t DataTypeTuple::getSizeOfValueInMemory() const
|
||||
return res;
|
||||
}
|
||||
|
||||
DataTypePtr DataTypeTuple::tryGetSubcolumnType(const String & subcolumn_name) const
|
||||
{
|
||||
for (size_t i = 0; i < names.size(); ++i)
|
||||
{
|
||||
if (startsWith(subcolumn_name, names[i]))
|
||||
{
|
||||
size_t name_length = names[i].size();
|
||||
DataTypePtr subcolumn_type;
|
||||
if (subcolumn_name.size() == name_length)
|
||||
subcolumn_type = elems[i];
|
||||
else if (subcolumn_name[name_length] == '.')
|
||||
subcolumn_type = elems[i]->tryGetSubcolumnType(subcolumn_name.substr(name_length + 1));
|
||||
|
||||
if (subcolumn_type)
|
||||
return createOneElementTuple(std::move(subcolumn_type), names[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ColumnPtr DataTypeTuple::getSubcolumn(const String & subcolumn_name, const IColumn & column) const
|
||||
{
|
||||
for (size_t i = 0; i < names.size(); ++i)
|
||||
{
|
||||
if (startsWith(subcolumn_name, names[i]))
|
||||
{
|
||||
size_t name_length = names[i].size();
|
||||
const auto & subcolumn = extractElementColumn(column, i);
|
||||
|
||||
if (subcolumn_name.size() == name_length)
|
||||
return subcolumn.assumeMutable();
|
||||
|
||||
if (subcolumn_name[name_length] == '.')
|
||||
return elems[i]->getSubcolumn(subcolumn_name.substr(name_length + 1), subcolumn);
|
||||
}
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName());
|
||||
}
|
||||
|
||||
|
||||
static DataTypePtr create(const ASTPtr & arguments)
|
||||
{
|
||||
@ -648,13 +692,4 @@ void registerDataTypeTuple(DataTypeFactory & factory)
|
||||
factory.registerDataType("Tuple", create);
|
||||
}
|
||||
|
||||
void registerDataTypeNested(DataTypeFactory & factory)
|
||||
{
|
||||
/// Nested(...) data type is just a sugar for Array(Tuple(...))
|
||||
factory.registerDataType("Nested", [&factory](const ASTPtr & arguments)
|
||||
{
|
||||
return std::make_shared<DataTypeArray>(factory.get("Tuple", arguments));
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -53,32 +53,33 @@ public:
|
||||
|
||||
/** Each sub-column in a tuple is serialized in separate stream.
|
||||
*/
|
||||
void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const override;
|
||||
void enumerateStreamsImpl(const StreamCallback & callback, SubstreamPath & path) const override;
|
||||
|
||||
void serializeBinaryBulkStatePrefix(
|
||||
void serializeBinaryBulkStatePrefixImpl(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void serializeBinaryBulkStateSuffix(
|
||||
void serializeBinaryBulkStateSuffixImpl(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void deserializeBinaryBulkStatePrefix(
|
||||
void deserializeBinaryBulkStatePrefixImpl(
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void serializeBinaryBulkWithMultipleStreams(
|
||||
void serializeBinaryBulkWithMultipleStreamsImpl(
|
||||
const IColumn & column,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void deserializeBinaryBulkWithMultipleStreams(
|
||||
void deserializeBinaryBulkWithMultipleStreamsImpl(
|
||||
IColumn & column,
|
||||
size_t limit,
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const override;
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache) const override;
|
||||
|
||||
void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
|
||||
void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
|
||||
@ -98,6 +99,9 @@ public:
|
||||
size_t getMaximumSizeOfValueInMemory() const override;
|
||||
size_t getSizeOfValueInMemory() const override;
|
||||
|
||||
DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override;
|
||||
ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override;
|
||||
|
||||
const DataTypes & getElements() const { return elems; }
|
||||
const Strings & getElementNames() const { return names; }
|
||||
|
||||
|
@ -3,8 +3,10 @@
|
||||
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Common/SipHash.h>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/Operators.h>
|
||||
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <DataTypes/DataTypeCustom.h>
|
||||
@ -19,9 +21,48 @@ namespace ErrorCodes
|
||||
extern const int MULTIPLE_STREAMS_REQUIRED;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int DATA_TYPE_CANNOT_BE_PROMOTED;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
IDataType::IDataType() : custom_name(nullptr), custom_text_serialization(nullptr)
|
||||
String IDataType::Substream::toString() const
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case ArrayElements:
|
||||
return "ArrayElements";
|
||||
case ArraySizes:
|
||||
return "ArraySizes";
|
||||
case NullableElements:
|
||||
return "NullableElements";
|
||||
case NullMap:
|
||||
return "NullMap";
|
||||
case TupleElement:
|
||||
return "TupleElement(" + tuple_element_name + ", "
|
||||
+ std::to_string(escape_tuple_delimiter) + ")";
|
||||
case DictionaryKeys:
|
||||
return "DictionaryKeys";
|
||||
case DictionaryIndexes:
|
||||
return "DictionaryIndexes";
|
||||
}
|
||||
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
||||
String IDataType::SubstreamPath::toString() const
|
||||
{
|
||||
WriteBufferFromOwnString wb;
|
||||
wb << "{";
|
||||
for (size_t i = 0; i < size(); ++i)
|
||||
{
|
||||
if (i != 0)
|
||||
wb << ", ";
|
||||
wb << at(i).toString();
|
||||
}
|
||||
wb << "}";
|
||||
return wb.str();
|
||||
}
|
||||
|
||||
IDataType::IDataType() : custom_name(nullptr), custom_text_serialization(nullptr), custom_streams(nullptr)
|
||||
{
|
||||
}
|
||||
|
||||
@ -93,42 +134,89 @@ size_t IDataType::getSizeOfValueInMemory() const
|
||||
throw Exception("Value of type " + getName() + " in memory is not of fixed size.", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
|
||||
String IDataType::getFileNameForStream(const String & column_name, const IDataType::SubstreamPath & path)
|
||||
DataTypePtr IDataType::getSubcolumnType(const String & subcolumn_name) const
|
||||
{
|
||||
/// Sizes of arrays (elements of Nested type) are shared (all reside in single file).
|
||||
String nested_table_name = Nested::extractTableName(column_name);
|
||||
auto subcolumn_type = tryGetSubcolumnType(subcolumn_name);
|
||||
if (subcolumn_type)
|
||||
return subcolumn_type;
|
||||
|
||||
bool is_sizes_of_nested_type =
|
||||
path.size() == 1 /// Nested structure may have arrays as nested elements (so effectively we have multidimensional arrays).
|
||||
/// Sizes of arrays are shared only at first level.
|
||||
&& path[0].type == IDataType::Substream::ArraySizes
|
||||
&& nested_table_name != column_name;
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName());
|
||||
}
|
||||
|
||||
size_t array_level = 0;
|
||||
String stream_name = escapeForFileName(is_sizes_of_nested_type ? nested_table_name : column_name);
|
||||
for (const Substream & elem : path)
|
||||
ColumnPtr IDataType::getSubcolumn(const String & subcolumn_name, const IColumn &) const
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName());
|
||||
}
|
||||
|
||||
Names IDataType::getSubcolumnNames() const
|
||||
{
|
||||
NameSet res;
|
||||
enumerateStreams([&res, this](const SubstreamPath & substream_path, const IDataType & /* substream_type */)
|
||||
{
|
||||
if (elem.type == Substream::NullMap)
|
||||
stream_name += ".null";
|
||||
else if (elem.type == Substream::ArraySizes)
|
||||
stream_name += ".size" + toString(array_level);
|
||||
else if (elem.type == Substream::ArrayElements)
|
||||
++array_level;
|
||||
else if (elem.type == Substream::TupleElement)
|
||||
SubstreamPath new_path;
|
||||
/// Iterate over path to try to get intermediate subcolumns for complex nested types.
|
||||
for (const auto & elem : substream_path)
|
||||
{
|
||||
/// For compatibility reasons, we use %2E instead of dot.
|
||||
new_path.push_back(elem);
|
||||
auto subcolumn_name = getSubcolumnNameForStream(new_path);
|
||||
if (!subcolumn_name.empty() && tryGetSubcolumnType(subcolumn_name))
|
||||
res.insert(subcolumn_name);
|
||||
}
|
||||
});
|
||||
|
||||
return Names(std::make_move_iterator(res.begin()), std::make_move_iterator(res.end()));
|
||||
}
|
||||
|
||||
static String getNameForSubstreamPath(
|
||||
String stream_name,
|
||||
const IDataType::SubstreamPath & path,
|
||||
bool escape_tuple_delimiter)
|
||||
{
|
||||
size_t array_level = 0;
|
||||
for (const auto & elem : path)
|
||||
{
|
||||
if (elem.type == IDataType::Substream::NullMap)
|
||||
stream_name += ".null";
|
||||
else if (elem.type == IDataType::Substream::ArraySizes)
|
||||
stream_name += ".size" + toString(array_level);
|
||||
else if (elem.type == IDataType::Substream::ArrayElements)
|
||||
++array_level;
|
||||
else if (elem.type == IDataType::Substream::DictionaryKeys)
|
||||
stream_name += ".dict";
|
||||
else if (elem.type == IDataType::Substream::TupleElement)
|
||||
{
|
||||
/// For compatibility reasons, we use %2E (escaped dot) instead of dot.
|
||||
/// Because nested data may be represented not by Array of Tuple,
|
||||
/// but by separate Array columns with names in a form of a.b,
|
||||
/// and name is encoded as a whole.
|
||||
stream_name += "%2E" + escapeForFileName(elem.tuple_element_name);
|
||||
stream_name += (escape_tuple_delimiter && elem.escape_tuple_delimiter ?
|
||||
escapeForFileName(".") : ".") + escapeForFileName(elem.tuple_element_name);
|
||||
}
|
||||
else if (elem.type == Substream::DictionaryKeys)
|
||||
stream_name += ".dict";
|
||||
}
|
||||
|
||||
return stream_name;
|
||||
}
|
||||
|
||||
String IDataType::getFileNameForStream(const NameAndTypePair & column, const SubstreamPath & path)
|
||||
{
|
||||
auto name_in_storage = column.getNameInStorage();
|
||||
auto nested_storage_name = Nested::extractTableName(name_in_storage);
|
||||
|
||||
if (name_in_storage != nested_storage_name && (path.size() == 1 && path[0].type == IDataType::Substream::ArraySizes))
|
||||
name_in_storage = nested_storage_name;
|
||||
|
||||
auto stream_name = escapeForFileName(name_in_storage);
|
||||
return getNameForSubstreamPath(std::move(stream_name), path, true);
|
||||
}
|
||||
|
||||
String IDataType::getSubcolumnNameForStream(const SubstreamPath & path)
|
||||
{
|
||||
auto subcolumn_name = getNameForSubstreamPath("", path, false);
|
||||
if (!subcolumn_name.empty())
|
||||
subcolumn_name = subcolumn_name.substr(1); // It starts with a dot.
|
||||
|
||||
return subcolumn_name;
|
||||
}
|
||||
|
||||
bool IDataType::isSpecialCompressionAllowed(const SubstreamPath & path)
|
||||
{
|
||||
@ -147,6 +235,102 @@ void IDataType::insertDefaultInto(IColumn & column) const
|
||||
column.insertDefault();
|
||||
}
|
||||
|
||||
void IDataType::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const
|
||||
{
|
||||
if (custom_streams)
|
||||
custom_streams->enumerateStreams(callback, path);
|
||||
else
|
||||
enumerateStreamsImpl(callback, path);
|
||||
}
|
||||
|
||||
void IDataType::serializeBinaryBulkStatePrefix(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
if (custom_streams)
|
||||
custom_streams->serializeBinaryBulkStatePrefix(settings, state);
|
||||
else
|
||||
serializeBinaryBulkStatePrefixImpl(settings, state);
|
||||
}
|
||||
|
||||
void IDataType::serializeBinaryBulkStateSuffix(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
if (custom_streams)
|
||||
custom_streams->serializeBinaryBulkStateSuffix(settings, state);
|
||||
else
|
||||
serializeBinaryBulkStateSuffixImpl(settings, state);
|
||||
}
|
||||
|
||||
void IDataType::deserializeBinaryBulkStatePrefix(
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
if (custom_streams)
|
||||
custom_streams->deserializeBinaryBulkStatePrefix(settings, state);
|
||||
else
|
||||
deserializeBinaryBulkStatePrefixImpl(settings, state);
|
||||
}
|
||||
|
||||
void IDataType::serializeBinaryBulkWithMultipleStreams(
|
||||
const IColumn & column,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
if (custom_streams)
|
||||
custom_streams->serializeBinaryBulkWithMultipleStreams(column, offset, limit, settings, state);
|
||||
else
|
||||
serializeBinaryBulkWithMultipleStreamsImpl(column, offset, limit, settings, state);
|
||||
}
|
||||
|
||||
void IDataType::deserializeBinaryBulkWithMultipleStreamsImpl(
|
||||
IColumn & column,
|
||||
size_t limit,
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & /* state */,
|
||||
SubstreamsCache * /* cache */) const
|
||||
{
|
||||
if (ReadBuffer * stream = settings.getter(settings.path))
|
||||
deserializeBinaryBulk(column, *stream, limit, settings.avg_value_size_hint);
|
||||
}
|
||||
|
||||
|
||||
void IDataType::deserializeBinaryBulkWithMultipleStreams(
|
||||
ColumnPtr & column,
|
||||
size_t limit,
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache) const
|
||||
{
|
||||
if (custom_streams)
|
||||
{
|
||||
custom_streams->deserializeBinaryBulkWithMultipleStreams(column, limit, settings, state, cache);
|
||||
return;
|
||||
}
|
||||
|
||||
/// Do not cache complex type, because they can be constructed
|
||||
/// from their subcolumns, which are in cache.
|
||||
if (!haveSubtypes())
|
||||
{
|
||||
auto cached_column = getFromSubstreamsCache(cache, settings.path);
|
||||
if (cached_column)
|
||||
{
|
||||
column = cached_column;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
auto mutable_column = column->assumeMutable();
|
||||
deserializeBinaryBulkWithMultipleStreamsImpl(*mutable_column, limit, settings, state, cache);
|
||||
column = std::move(mutable_column);
|
||||
|
||||
if (!haveSubtypes())
|
||||
addToSubstreamsCache(cache, settings.path, column);
|
||||
}
|
||||
|
||||
void IDataType::serializeAsTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
if (custom_text_serialization)
|
||||
@ -243,6 +427,27 @@ void IDataType::setCustomization(DataTypeCustomDescPtr custom_desc_) const
|
||||
|
||||
if (custom_desc_->text_serialization)
|
||||
custom_text_serialization = std::move(custom_desc_->text_serialization);
|
||||
|
||||
if (custom_desc_->streams)
|
||||
custom_streams = std::move(custom_desc_->streams);
|
||||
}
|
||||
|
||||
void IDataType::addToSubstreamsCache(SubstreamsCache * cache, const SubstreamPath & path, ColumnPtr column)
|
||||
{
|
||||
if (cache && !path.empty())
|
||||
cache->emplace(getSubcolumnNameForStream(path), column);
|
||||
}
|
||||
|
||||
ColumnPtr IDataType::getFromSubstreamsCache(SubstreamsCache * cache, const SubstreamPath & path)
|
||||
{
|
||||
if (!cache || path.empty())
|
||||
return nullptr;
|
||||
|
||||
auto it = cache->find(getSubcolumnNameForStream(path));
|
||||
if (it == cache->end())
|
||||
return nullptr;
|
||||
|
||||
return it->second;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -3,7 +3,9 @@
|
||||
#include <memory>
|
||||
#include <Common/COW.h>
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <DataTypes/DataTypeCustom.h>
|
||||
#include <Core/Names.h>
|
||||
#include <Core/Types.h>
|
||||
#include <DataTypes/DataTypeCustom_fwd.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -27,6 +29,8 @@ using DataTypes = std::vector<DataTypePtr>;
|
||||
class ProtobufReader;
|
||||
class ProtobufWriter;
|
||||
|
||||
struct NameAndTypePair;
|
||||
|
||||
|
||||
/** Properties of data type.
|
||||
* Contains methods for serialization/deserialization.
|
||||
@ -91,30 +95,42 @@ public:
|
||||
|
||||
TupleElement,
|
||||
|
||||
MapElement,
|
||||
|
||||
DictionaryKeys,
|
||||
DictionaryIndexes,
|
||||
};
|
||||
Type type;
|
||||
|
||||
/// Index of tuple element, starting at 1.
|
||||
/// Index of tuple element, starting at 1 or name.
|
||||
String tuple_element_name;
|
||||
|
||||
/// Do we need to escape a dot in filenames for tuple elements.
|
||||
bool escape_tuple_delimiter = true;
|
||||
|
||||
Substream(Type type_) : type(type_) {}
|
||||
|
||||
String toString() const;
|
||||
};
|
||||
|
||||
using SubstreamPath = std::vector<Substream>;
|
||||
struct SubstreamPath : public std::vector<Substream>
|
||||
{
|
||||
String toString() const;
|
||||
};
|
||||
|
||||
/// Cache for common substreams of one type, but possible different its subcolumns.
|
||||
/// E.g. sizes of arrays of Nested data type.
|
||||
using SubstreamsCache = std::unordered_map<String, ColumnPtr>;
|
||||
|
||||
using StreamCallback = std::function<void(const SubstreamPath &, const IDataType &)>;
|
||||
|
||||
virtual void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const
|
||||
{
|
||||
callback(path, *this);
|
||||
}
|
||||
void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const;
|
||||
void enumerateStreams(const StreamCallback & callback, SubstreamPath && path) const { enumerateStreams(callback, path); }
|
||||
void enumerateStreams(const StreamCallback & callback) const { enumerateStreams(callback, {}); }
|
||||
|
||||
virtual DataTypePtr tryGetSubcolumnType(const String & /* subcolumn_name */) const { return nullptr; }
|
||||
DataTypePtr getSubcolumnType(const String & subcolumn_name) const;
|
||||
virtual ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const;
|
||||
Names getSubcolumnNames() const;
|
||||
|
||||
using OutputStreamGetter = std::function<WriteBuffer*(const SubstreamPath &)>;
|
||||
using InputStreamGetter = std::function<ReadBuffer*(const SubstreamPath &)>;
|
||||
|
||||
@ -155,19 +171,19 @@ public:
|
||||
};
|
||||
|
||||
/// Call before serializeBinaryBulkWithMultipleStreams chain to write something before first mark.
|
||||
virtual void serializeBinaryBulkStatePrefix(
|
||||
SerializeBinaryBulkSettings & /*settings*/,
|
||||
SerializeBinaryBulkStatePtr & /*state*/) const {}
|
||||
void serializeBinaryBulkStatePrefix(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const;
|
||||
|
||||
/// Call after serializeBinaryBulkWithMultipleStreams chain to finish serialization.
|
||||
virtual void serializeBinaryBulkStateSuffix(
|
||||
SerializeBinaryBulkSettings & /*settings*/,
|
||||
SerializeBinaryBulkStatePtr & /*state*/) const {}
|
||||
void serializeBinaryBulkStateSuffix(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const;
|
||||
|
||||
/// Call before before deserializeBinaryBulkWithMultipleStreams chain to get DeserializeBinaryBulkStatePtr.
|
||||
virtual void deserializeBinaryBulkStatePrefix(
|
||||
DeserializeBinaryBulkSettings & /*settings*/,
|
||||
DeserializeBinaryBulkStatePtr & /*state*/) const {}
|
||||
void deserializeBinaryBulkStatePrefix(
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state) const;
|
||||
|
||||
/** 'offset' and 'limit' are used to specify range.
|
||||
* limit = 0 - means no limit.
|
||||
@ -175,27 +191,20 @@ public:
|
||||
* offset + limit could be greater than size of column
|
||||
* - in that case, column is serialized till the end.
|
||||
*/
|
||||
virtual void serializeBinaryBulkWithMultipleStreams(
|
||||
void serializeBinaryBulkWithMultipleStreams(
|
||||
const IColumn & column,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & /*state*/) const
|
||||
{
|
||||
if (WriteBuffer * stream = settings.getter(settings.path))
|
||||
serializeBinaryBulk(column, *stream, offset, limit);
|
||||
}
|
||||
SerializeBinaryBulkStatePtr & state) const;
|
||||
|
||||
/// Read no more than limit values and append them into column.
|
||||
virtual void deserializeBinaryBulkWithMultipleStreams(
|
||||
IColumn & column,
|
||||
void deserializeBinaryBulkWithMultipleStreams(
|
||||
ColumnPtr & column,
|
||||
size_t limit,
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & /*state*/) const
|
||||
{
|
||||
if (ReadBuffer * stream = settings.getter(settings.path))
|
||||
deserializeBinaryBulk(column, *stream, limit, settings.avg_value_size_hint);
|
||||
}
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache = nullptr) const;
|
||||
|
||||
/** Override these methods for data types that require just single stream (most of data types).
|
||||
*/
|
||||
@ -268,6 +277,41 @@ public:
|
||||
protected:
|
||||
virtual String doGetName() const;
|
||||
|
||||
virtual void enumerateStreamsImpl(const StreamCallback & callback, SubstreamPath & path) const
|
||||
{
|
||||
callback(path, *this);
|
||||
}
|
||||
|
||||
virtual void serializeBinaryBulkStatePrefixImpl(
|
||||
SerializeBinaryBulkSettings & /*settings*/,
|
||||
SerializeBinaryBulkStatePtr & /*state*/) const {}
|
||||
|
||||
virtual void serializeBinaryBulkStateSuffixImpl(
|
||||
SerializeBinaryBulkSettings & /*settings*/,
|
||||
SerializeBinaryBulkStatePtr & /*state*/) const {}
|
||||
|
||||
virtual void deserializeBinaryBulkStatePrefixImpl(
|
||||
DeserializeBinaryBulkSettings & /*settings*/,
|
||||
DeserializeBinaryBulkStatePtr & /*state*/) const {}
|
||||
|
||||
virtual void serializeBinaryBulkWithMultipleStreamsImpl(
|
||||
const IColumn & column,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & /*state*/) const
|
||||
{
|
||||
if (WriteBuffer * stream = settings.getter(settings.path))
|
||||
serializeBinaryBulk(column, *stream, offset, limit);
|
||||
}
|
||||
|
||||
virtual void deserializeBinaryBulkWithMultipleStreamsImpl(
|
||||
IColumn & column,
|
||||
size_t limit,
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache) const;
|
||||
|
||||
/// Default implementations of text serialization in case of 'custom_text_serialization' is not set.
|
||||
|
||||
virtual void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
|
||||
@ -286,6 +330,9 @@ protected:
|
||||
}
|
||||
|
||||
public:
|
||||
static void addToSubstreamsCache(SubstreamsCache * cache, const SubstreamPath & path, ColumnPtr column);
|
||||
static ColumnPtr getFromSubstreamsCache(SubstreamsCache * cache, const SubstreamPath & path);
|
||||
|
||||
/** Create empty column for corresponding type.
|
||||
*/
|
||||
virtual MutableColumnPtr createColumn() const = 0;
|
||||
@ -443,7 +490,8 @@ public:
|
||||
/// Updates avg_value_size_hint for newly read column. Uses to optimize deserialization. Zero expected for first column.
|
||||
static void updateAvgValueSizeHint(const IColumn & column, double & avg_value_size_hint);
|
||||
|
||||
static String getFileNameForStream(const String & column_name, const SubstreamPath & path);
|
||||
static String getFileNameForStream(const NameAndTypePair & column, const SubstreamPath & path);
|
||||
static String getSubcolumnNameForStream(const SubstreamPath & path);
|
||||
|
||||
/// Substream path supports special compression methods like codec Delta.
|
||||
/// For all other substreams (like ArraySizes, NullMasks, etc.) we use only
|
||||
@ -458,9 +506,11 @@ private:
|
||||
/// This is mutable to allow setting custom name and serialization on `const IDataType` post construction.
|
||||
mutable DataTypeCustomNamePtr custom_name;
|
||||
mutable DataTypeCustomTextSerializationPtr custom_text_serialization;
|
||||
mutable DataTypeCustomStreamsPtr custom_streams;
|
||||
|
||||
public:
|
||||
const IDataTypeCustomName * getCustomName() const { return custom_name.get(); }
|
||||
const IDataTypeCustomStreams * getCustomStreams() const { return custom_streams.get(); }
|
||||
};
|
||||
|
||||
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <DataTypes/DataTypeNested.h>
|
||||
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
@ -84,7 +85,8 @@ Block flatten(const Block & block)
|
||||
|
||||
for (const auto & elem : block)
|
||||
{
|
||||
if (const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(elem.type.get()))
|
||||
const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(elem.type.get());
|
||||
if (type_arr)
|
||||
{
|
||||
const DataTypeTuple * type_tuple = typeid_cast<const DataTypeTuple *>(type_arr->getNestedType().get());
|
||||
if (type_tuple && type_tuple->haveExplicitNames())
|
||||
@ -128,32 +130,67 @@ Block flatten(const Block & block)
|
||||
return res;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
using NameToDataType = std::map<String, DataTypePtr>;
|
||||
|
||||
NameToDataType getSubcolumnsOfNested(const NamesAndTypesList & names_and_types)
|
||||
{
|
||||
std::unordered_map<String, NamesAndTypesList> nested;
|
||||
for (const auto & name_type : names_and_types)
|
||||
{
|
||||
const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(name_type.type.get());
|
||||
|
||||
/// Ignore true Nested type, but try to unite flatten arrays to Nested type.
|
||||
if (!isNested(name_type.type) && type_arr)
|
||||
{
|
||||
auto split = splitName(name_type.name);
|
||||
if (!split.second.empty())
|
||||
nested[split.first].emplace_back(split.second, type_arr->getNestedType());
|
||||
}
|
||||
}
|
||||
|
||||
std::map<String, DataTypePtr> nested_types;
|
||||
|
||||
for (const auto & [name, elems] : nested)
|
||||
nested_types.emplace(name, createNested(elems.getTypes(), elems.getNames()));
|
||||
|
||||
return nested_types;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
NamesAndTypesList collect(const NamesAndTypesList & names_and_types)
|
||||
{
|
||||
NamesAndTypesList res;
|
||||
auto nested_types = getSubcolumnsOfNested(names_and_types);
|
||||
|
||||
std::map<std::string, NamesAndTypesList> nested;
|
||||
for (const auto & name_type : names_and_types)
|
||||
{
|
||||
bool collected = false;
|
||||
if (const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(name_type.type.get()))
|
||||
{
|
||||
auto split = splitName(name_type.name);
|
||||
if (!split.second.empty())
|
||||
{
|
||||
nested[split.first].emplace_back(split.second, type_arr->getNestedType());
|
||||
collected = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!collected)
|
||||
if (!nested_types.count(splitName(name_type.name).first))
|
||||
res.push_back(name_type);
|
||||
}
|
||||
|
||||
for (const auto & name_elems : nested)
|
||||
res.emplace_back(name_elems.first, std::make_shared<DataTypeArray>(
|
||||
std::make_shared<DataTypeTuple>(name_elems.second.getTypes(), name_elems.second.getNames())));
|
||||
for (const auto & name_type : nested_types)
|
||||
res.emplace_back(name_type.first, name_type.second);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
NamesAndTypesList convertToSubcolumns(const NamesAndTypesList & names_and_types)
|
||||
{
|
||||
auto nested_types = getSubcolumnsOfNested(names_and_types);
|
||||
auto res = names_and_types;
|
||||
|
||||
for (auto & name_type : res)
|
||||
{
|
||||
auto split = splitName(name_type.name);
|
||||
if (name_type.isSubcolumn() || split.second.empty())
|
||||
continue;
|
||||
|
||||
auto it = nested_types.find(split.first);
|
||||
if (it != nested_types.end())
|
||||
name_type = NameAndTypePair{split.first, split.second, it->second, it->second->getSubcolumnType(split.second)};
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
@ -23,6 +23,9 @@ namespace Nested
|
||||
/// Collect Array columns in a form of `column_name.element_name` to single Array(Tuple(...)) column.
|
||||
NamesAndTypesList collect(const NamesAndTypesList & names_and_types);
|
||||
|
||||
/// Convert old-style nested (single arrays with same prefix, `n.a`, `n.b`...) to subcolumns of data type Nested.
|
||||
NamesAndTypesList convertToSubcolumns(const NamesAndTypesList & names_and_types);
|
||||
|
||||
/// Check that sizes of arrays - elements of nested data structures - are equal.
|
||||
void validateArraySizes(const Block & block);
|
||||
}
|
||||
|
@ -28,9 +28,11 @@ SRCS(
|
||||
DataTypeLowCardinality.cpp
|
||||
DataTypeLowCardinalityHelpers.cpp
|
||||
DataTypeMap.cpp
|
||||
DataTypeNested.cpp
|
||||
DataTypeNothing.cpp
|
||||
DataTypeNullable.cpp
|
||||
DataTypeNumberBase.cpp
|
||||
DataTypeOneElementTuple.cpp
|
||||
DataTypeString.cpp
|
||||
DataTypeTuple.cpp
|
||||
DataTypeUUID.cpp
|
||||
|
@ -23,11 +23,19 @@
|
||||
# include <Databases/MySQL/DatabaseConnectionMySQL.h>
|
||||
# include <Databases/MySQL/MaterializeMySQLSettings.h>
|
||||
# include <Databases/MySQL/DatabaseMaterializeMySQL.h>
|
||||
# include <Interpreters/evaluateConstantExpression.h>
|
||||
# include <Common/parseAddress.h>
|
||||
# include <mysqlxx/Pool.h>
|
||||
#endif
|
||||
|
||||
#if USE_MYSQL || USE_LIBPQXX
|
||||
#include <Interpreters/evaluateConstantExpression.h>
|
||||
#include <Common/parseAddress.h>
|
||||
#endif
|
||||
|
||||
#if USE_LIBPQXX
|
||||
#include <Databases/PostgreSQL/DatabasePostgreSQL.h> // Y_IGNORE
|
||||
#include <Storages/PostgreSQL/PostgreSQLConnection.h>
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -80,7 +88,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
|
||||
const String & engine_name = engine_define->engine->name;
|
||||
const UUID & uuid = create.uuid;
|
||||
|
||||
if (engine_name != "MySQL" && engine_name != "MaterializeMySQL" && engine_name != "Lazy" && engine_define->engine->arguments)
|
||||
if (engine_name != "MySQL" && engine_name != "MaterializeMySQL" && engine_name != "Lazy" && engine_name != "PostgreSQL" && engine_define->engine->arguments)
|
||||
throw Exception("Database engine " + engine_name + " cannot have arguments", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
if (engine_define->engine->parameters || engine_define->partition_by || engine_define->primary_key || engine_define->order_by ||
|
||||
@ -168,6 +176,44 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
|
||||
return std::make_shared<DatabaseLazy>(database_name, metadata_path, cache_expiration_time_seconds, context);
|
||||
}
|
||||
|
||||
#if USE_LIBPQXX
|
||||
|
||||
else if (engine_name == "PostgreSQL")
|
||||
{
|
||||
const ASTFunction * engine = engine_define->engine;
|
||||
|
||||
if (!engine->arguments || engine->arguments->children.size() < 4 || engine->arguments->children.size() > 5)
|
||||
throw Exception(fmt::format(
|
||||
"{} Database require host:port, database_name, username, password arguments "
|
||||
"[, use_table_cache = 0].", engine_name),
|
||||
ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
ASTs & engine_args = engine->arguments->children;
|
||||
|
||||
for (auto & engine_arg : engine_args)
|
||||
engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, context);
|
||||
|
||||
const auto & host_port = safeGetLiteralValue<String>(engine_args[0], engine_name);
|
||||
const auto & postgres_database_name = safeGetLiteralValue<String>(engine_args[1], engine_name);
|
||||
const auto & username = safeGetLiteralValue<String>(engine_args[2], engine_name);
|
||||
const auto & password = safeGetLiteralValue<String>(engine_args[3], engine_name);
|
||||
|
||||
auto use_table_cache = 0;
|
||||
if (engine->arguments->children.size() == 5)
|
||||
use_table_cache = safeGetLiteralValue<UInt64>(engine_args[4], engine_name);
|
||||
|
||||
auto parsed_host_port = parseAddress(host_port, 5432);
|
||||
|
||||
/// no connection is made here
|
||||
auto connection = std::make_shared<PostgreSQLConnection>(
|
||||
postgres_database_name, parsed_host_port.first, parsed_host_port.second, username, password);
|
||||
|
||||
return std::make_shared<DatabasePostgreSQL>(
|
||||
context, metadata_path, engine_define, database_name, postgres_database_name, connection, use_table_cache);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
throw Exception("Unknown database engine: " + engine_name, ErrorCodes::UNKNOWN_DATABASE_ENGINE);
|
||||
}
|
||||
|
||||
|
415
src/Databases/PostgreSQL/DatabasePostgreSQL.cpp
Normal file
415
src/Databases/PostgreSQL/DatabasePostgreSQL.cpp
Normal file
@ -0,0 +1,415 @@
|
||||
#include <Databases/PostgreSQL/DatabasePostgreSQL.h>
|
||||
|
||||
#if USE_LIBPQXX
|
||||
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <Storages/StoragePostgreSQL.h>
|
||||
#include <Storages/PostgreSQL/PostgreSQLConnection.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ParserCreateQuery.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Poco/DirectoryIterator.h>
|
||||
#include <Poco/File.h>
|
||||
#include <Databases/PostgreSQL/fetchPostgreSQLTableStructure.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int UNKNOWN_TABLE;
|
||||
extern const int TABLE_IS_DROPPED;
|
||||
extern const int TABLE_ALREADY_EXISTS;
|
||||
}
|
||||
|
||||
static const auto suffix = ".removed";
|
||||
static const auto cleaner_reschedule_ms = 60000;
|
||||
|
||||
DatabasePostgreSQL::DatabasePostgreSQL(
|
||||
const Context & context,
|
||||
const String & metadata_path_,
|
||||
const ASTStorage * database_engine_define_,
|
||||
const String & dbname_,
|
||||
const String & postgres_dbname,
|
||||
PostgreSQLConnectionPtr connection_,
|
||||
const bool cache_tables_)
|
||||
: IDatabase(dbname_)
|
||||
, global_context(context.getGlobalContext())
|
||||
, metadata_path(metadata_path_)
|
||||
, database_engine_define(database_engine_define_->clone())
|
||||
, dbname(postgres_dbname)
|
||||
, connection(std::move(connection_))
|
||||
, cache_tables(cache_tables_)
|
||||
{
|
||||
cleaner_task = context.getSchedulePool().createTask("PostgreSQLCleanerTask", [this]{ removeOutdatedTables(); });
|
||||
cleaner_task->deactivate();
|
||||
}
|
||||
|
||||
|
||||
bool DatabasePostgreSQL::empty() const
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
|
||||
auto tables_list = fetchTablesList();
|
||||
|
||||
for (const auto & table_name : tables_list)
|
||||
if (!detached_or_dropped.count(table_name))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
DatabaseTablesIteratorPtr DatabasePostgreSQL::getTablesIterator(
|
||||
const Context & context, const FilterByNameFunction & /* filter_by_table_name */)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
|
||||
Tables tables;
|
||||
auto table_names = fetchTablesList();
|
||||
|
||||
for (const auto & table_name : table_names)
|
||||
if (!detached_or_dropped.count(table_name))
|
||||
tables[table_name] = fetchTable(table_name, context, true);
|
||||
|
||||
return std::make_unique<DatabaseTablesSnapshotIterator>(tables, database_name);
|
||||
}
|
||||
|
||||
|
||||
std::unordered_set<std::string> DatabasePostgreSQL::fetchTablesList() const
|
||||
{
|
||||
std::unordered_set<std::string> tables;
|
||||
std::string query = "SELECT tablename FROM pg_catalog.pg_tables "
|
||||
"WHERE schemaname != 'pg_catalog' AND schemaname != 'information_schema'";
|
||||
pqxx::read_transaction tx(*connection->conn());
|
||||
|
||||
for (auto table_name : tx.stream<std::string>(query))
|
||||
tables.insert(std::get<0>(table_name));
|
||||
|
||||
return tables;
|
||||
}
|
||||
|
||||
|
||||
bool DatabasePostgreSQL::checkPostgresTable(const String & table_name) const
|
||||
{
|
||||
if (table_name.find('\'') != std::string::npos
|
||||
|| table_name.find('\\') != std::string::npos)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"PostgreSQL table name cannot contain single quote or backslash characters, passed {}", table_name);
|
||||
}
|
||||
|
||||
pqxx::nontransaction tx(*connection->conn());
|
||||
|
||||
try
|
||||
{
|
||||
/// Casting table_name::regclass throws pqxx::indefined_table exception if table_name is incorrect.
|
||||
pqxx::result result = tx.exec(fmt::format(
|
||||
"SELECT '{}'::regclass, tablename "
|
||||
"FROM pg_catalog.pg_tables "
|
||||
"WHERE schemaname != 'pg_catalog' AND schemaname != 'information_schema' "
|
||||
"AND tablename = '{}'", table_name, table_name));
|
||||
}
|
||||
catch (pqxx::undefined_table const &)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage("while checking postgresql table existence");
|
||||
throw;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool DatabasePostgreSQL::isTableExist(const String & table_name, const Context & /* context */) const
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
|
||||
if (detached_or_dropped.count(table_name))
|
||||
return false;
|
||||
|
||||
return checkPostgresTable(table_name);
|
||||
}
|
||||
|
||||
|
||||
StoragePtr DatabasePostgreSQL::tryGetTable(const String & table_name, const Context & context) const
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
|
||||
if (!detached_or_dropped.count(table_name))
|
||||
return fetchTable(table_name, context, false);
|
||||
|
||||
return StoragePtr{};
|
||||
}
|
||||
|
||||
|
||||
StoragePtr DatabasePostgreSQL::fetchTable(const String & table_name, const Context & context, const bool table_checked) const
|
||||
{
|
||||
if (!cache_tables || !cached_tables.count(table_name))
|
||||
{
|
||||
if (!table_checked && !checkPostgresTable(table_name))
|
||||
return StoragePtr{};
|
||||
|
||||
auto use_nulls = context.getSettingsRef().external_table_functions_use_nulls;
|
||||
auto columns = fetchPostgreSQLTableStructure(connection->conn(), table_name, use_nulls);
|
||||
|
||||
if (!columns)
|
||||
return StoragePtr{};
|
||||
|
||||
auto storage = StoragePostgreSQL::create(
|
||||
StorageID(database_name, table_name), table_name, std::make_shared<PostgreSQLConnection>(connection->conn_str()),
|
||||
ColumnsDescription{*columns}, ConstraintsDescription{}, context);
|
||||
|
||||
if (cache_tables)
|
||||
cached_tables[table_name] = storage;
|
||||
|
||||
return storage;
|
||||
}
|
||||
|
||||
if (table_checked || checkPostgresTable(table_name))
|
||||
{
|
||||
return cached_tables[table_name];
|
||||
}
|
||||
|
||||
/// Table does not exist anymore
|
||||
cached_tables.erase(table_name);
|
||||
return StoragePtr{};
|
||||
}
|
||||
|
||||
|
||||
void DatabasePostgreSQL::attachTable(const String & table_name, const StoragePtr & storage, const String &)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock{mutex};
|
||||
|
||||
if (!checkPostgresTable(table_name))
|
||||
throw Exception(fmt::format("Cannot attach table {}.{} because it does not exist", database_name, table_name), ErrorCodes::UNKNOWN_TABLE);
|
||||
|
||||
if (!detached_or_dropped.count(table_name))
|
||||
throw Exception(fmt::format("Cannot attach table {}.{}. It already exists", database_name, table_name), ErrorCodes::TABLE_ALREADY_EXISTS);
|
||||
|
||||
if (cache_tables)
|
||||
cached_tables[table_name] = storage;
|
||||
|
||||
detached_or_dropped.erase(table_name);
|
||||
|
||||
Poco::File table_marked_as_removed(getMetadataPath() + '/' + escapeForFileName(table_name) + suffix);
|
||||
if (table_marked_as_removed.exists())
|
||||
table_marked_as_removed.remove();
|
||||
}
|
||||
|
||||
|
||||
StoragePtr DatabasePostgreSQL::detachTable(const String & table_name)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock{mutex};
|
||||
|
||||
if (detached_or_dropped.count(table_name))
|
||||
throw Exception(fmt::format("Cannot detach table {}.{}. It is already dropped/detached", database_name, table_name), ErrorCodes::TABLE_IS_DROPPED);
|
||||
|
||||
if (!checkPostgresTable(table_name))
|
||||
throw Exception(fmt::format("Cannot detach table {}.{} because it does not exist", database_name, table_name), ErrorCodes::UNKNOWN_TABLE);
|
||||
|
||||
if (cache_tables)
|
||||
cached_tables.erase(table_name);
|
||||
|
||||
detached_or_dropped.emplace(table_name);
|
||||
|
||||
/// not used anywhere (for postgres database)
|
||||
return StoragePtr{};
|
||||
}
|
||||
|
||||
|
||||
void DatabasePostgreSQL::createTable(const Context &, const String & table_name, const StoragePtr & storage, const ASTPtr & create_query)
|
||||
{
|
||||
const auto & create = create_query->as<ASTCreateQuery>();
|
||||
|
||||
if (!create->attach)
|
||||
throw Exception("PostgreSQL database engine does not support create table", ErrorCodes::NOT_IMPLEMENTED);
|
||||
|
||||
attachTable(table_name, storage, {});
|
||||
}
|
||||
|
||||
|
||||
void DatabasePostgreSQL::dropTable(const Context &, const String & table_name, bool /* no_delay */)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock{mutex};
|
||||
|
||||
if (!checkPostgresTable(table_name))
|
||||
throw Exception(fmt::format("Cannot drop table {}.{} because it does not exist", database_name, table_name), ErrorCodes::UNKNOWN_TABLE);
|
||||
|
||||
if (detached_or_dropped.count(table_name))
|
||||
throw Exception(fmt::format("Table {}.{} is already dropped/detached", database_name, table_name), ErrorCodes::TABLE_IS_DROPPED);
|
||||
|
||||
Poco::File mark_table_removed(getMetadataPath() + '/' + escapeForFileName(table_name) + suffix);
|
||||
|
||||
try
|
||||
{
|
||||
mark_table_removed.createFile();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
|
||||
if (cache_tables)
|
||||
cached_tables.erase(table_name);
|
||||
|
||||
detached_or_dropped.emplace(table_name);
|
||||
}
|
||||
|
||||
|
||||
void DatabasePostgreSQL::drop(const Context & /*context*/)
|
||||
{
|
||||
Poco::File(getMetadataPath()).remove(true);
|
||||
}
|
||||
|
||||
|
||||
void DatabasePostgreSQL::loadStoredObjects(Context & /* context */, bool, bool /*force_attach*/)
|
||||
{
|
||||
{
|
||||
std::lock_guard<std::mutex> lock{mutex};
|
||||
Poco::DirectoryIterator iterator(getMetadataPath());
|
||||
|
||||
/// Check for previously dropped tables
|
||||
for (Poco::DirectoryIterator end; iterator != end; ++iterator)
|
||||
{
|
||||
if (iterator->isFile() && endsWith(iterator.name(), suffix))
|
||||
{
|
||||
const auto & file_name = iterator.name();
|
||||
const auto & table_name = unescapeForFileName(file_name.substr(0, file_name.size() - strlen(suffix)));
|
||||
detached_or_dropped.emplace(table_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cleaner_task->activateAndSchedule();
|
||||
}
|
||||
|
||||
|
||||
void DatabasePostgreSQL::removeOutdatedTables()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock{mutex};
|
||||
auto actual_tables = fetchTablesList();
|
||||
|
||||
if (cache_tables)
|
||||
{
|
||||
/// (Tables are cached only after being accessed at least once)
|
||||
for (auto iter = cached_tables.begin(); iter != cached_tables.end();)
|
||||
{
|
||||
if (!actual_tables.count(iter->first))
|
||||
iter = cached_tables.erase(iter);
|
||||
else
|
||||
++iter;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto iter = detached_or_dropped.begin(); iter != detached_or_dropped.end();)
|
||||
{
|
||||
if (!actual_tables.count(*iter))
|
||||
{
|
||||
auto table_name = *iter;
|
||||
iter = detached_or_dropped.erase(iter);
|
||||
Poco::File table_marked_as_removed(getMetadataPath() + '/' + escapeForFileName(table_name) + suffix);
|
||||
if (table_marked_as_removed.exists())
|
||||
table_marked_as_removed.remove();
|
||||
}
|
||||
else
|
||||
++iter;
|
||||
}
|
||||
|
||||
cleaner_task->scheduleAfter(cleaner_reschedule_ms);
|
||||
}
|
||||
|
||||
|
||||
void DatabasePostgreSQL::shutdown()
|
||||
{
|
||||
cleaner_task->deactivate();
|
||||
}
|
||||
|
||||
|
||||
ASTPtr DatabasePostgreSQL::getCreateDatabaseQuery() const
|
||||
{
|
||||
const auto & create_query = std::make_shared<ASTCreateQuery>();
|
||||
create_query->database = getDatabaseName();
|
||||
create_query->set(create_query->storage, database_engine_define);
|
||||
return create_query;
|
||||
}
|
||||
|
||||
|
||||
ASTPtr DatabasePostgreSQL::getCreateTableQueryImpl(const String & table_name, const Context & context, bool throw_on_error) const
|
||||
{
|
||||
auto storage = fetchTable(table_name, context, false);
|
||||
if (!storage)
|
||||
{
|
||||
if (throw_on_error)
|
||||
throw Exception(fmt::format("PostgreSQL table {}.{} does not exist", database_name, table_name), ErrorCodes::UNKNOWN_TABLE);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto create_table_query = std::make_shared<ASTCreateQuery>();
|
||||
auto table_storage_define = database_engine_define->clone();
|
||||
create_table_query->set(create_table_query->storage, table_storage_define);
|
||||
|
||||
auto columns_declare_list = std::make_shared<ASTColumns>();
|
||||
auto columns_expression_list = std::make_shared<ASTExpressionList>();
|
||||
|
||||
columns_declare_list->set(columns_declare_list->columns, columns_expression_list);
|
||||
create_table_query->set(create_table_query->columns_list, columns_declare_list);
|
||||
|
||||
/// init create query.
|
||||
auto table_id = storage->getStorageID();
|
||||
create_table_query->table = table_id.table_name;
|
||||
create_table_query->database = table_id.database_name;
|
||||
|
||||
auto metadata_snapshot = storage->getInMemoryMetadataPtr();
|
||||
for (const auto & column_type_and_name : metadata_snapshot->getColumns().getOrdinary())
|
||||
{
|
||||
const auto & column_declaration = std::make_shared<ASTColumnDeclaration>();
|
||||
column_declaration->name = column_type_and_name.name;
|
||||
column_declaration->type = getColumnDeclaration(column_type_and_name.type);
|
||||
columns_expression_list->children.emplace_back(column_declaration);
|
||||
}
|
||||
|
||||
ASTStorage * ast_storage = table_storage_define->as<ASTStorage>();
|
||||
ASTs storage_children = ast_storage->children;
|
||||
auto storage_engine_arguments = ast_storage->engine->arguments;
|
||||
|
||||
/// Remove extra engine argument (`use_table_cache`)
|
||||
if (storage_engine_arguments->children.size() > 4)
|
||||
storage_engine_arguments->children.resize(storage_engine_arguments->children.size() - 1);
|
||||
|
||||
/// Add table_name to engine arguments
|
||||
assert(storage_engine_arguments->children.size() >= 2);
|
||||
storage_engine_arguments->children.insert(storage_engine_arguments->children.begin() + 2, std::make_shared<ASTLiteral>(table_id.table_name));
|
||||
|
||||
return create_table_query;
|
||||
}
|
||||
|
||||
|
||||
ASTPtr DatabasePostgreSQL::getColumnDeclaration(const DataTypePtr & data_type) const
|
||||
{
|
||||
WhichDataType which(data_type);
|
||||
|
||||
if (which.isNullable())
|
||||
return makeASTFunction("Nullable", getColumnDeclaration(typeid_cast<const DataTypeNullable *>(data_type.get())->getNestedType()));
|
||||
|
||||
if (which.isArray())
|
||||
return makeASTFunction("Array", getColumnDeclaration(typeid_cast<const DataTypeArray *>(data_type.get())->getNestedType()));
|
||||
|
||||
return std::make_shared<ASTIdentifier>(data_type->getName());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
91
src/Databases/PostgreSQL/DatabasePostgreSQL.h
Normal file
91
src/Databases/PostgreSQL/DatabasePostgreSQL.h
Normal file
@ -0,0 +1,91 @@
|
||||
#pragma once
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
#include "config_core.h"
|
||||
#endif
|
||||
|
||||
#if USE_LIBPQXX
|
||||
|
||||
#include <Databases/DatabasesCommon.h>
|
||||
#include <Core/BackgroundSchedulePool.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
class PostgreSQLConnection;
|
||||
using PostgreSQLConnectionPtr = std::shared_ptr<PostgreSQLConnection>;
|
||||
|
||||
|
||||
/** Real-time access to table list and table structure from remote PostgreSQL.
|
||||
* All tables are created after pull-out structure from remote PostgreSQL.
|
||||
* If `cache_tables` == 1 (default: 0) table structure is cached and not checked for being modififed,
|
||||
* but it will be updated during detach->attach.
|
||||
*/
|
||||
class DatabasePostgreSQL final : public IDatabase
|
||||
{
|
||||
|
||||
public:
|
||||
DatabasePostgreSQL(
|
||||
const Context & context,
|
||||
const String & metadata_path_,
|
||||
const ASTStorage * database_engine_define,
|
||||
const String & dbname_,
|
||||
const String & postgres_dbname,
|
||||
PostgreSQLConnectionPtr connection_,
|
||||
const bool cache_tables_);
|
||||
|
||||
String getEngineName() const override { return "PostgreSQL"; }
|
||||
String getMetadataPath() const override { return metadata_path; }
|
||||
|
||||
bool canContainMergeTreeTables() const override { return false; }
|
||||
bool canContainDistributedTables() const override { return false; }
|
||||
bool shouldBeEmptyOnDetach() const override { return false; }
|
||||
|
||||
ASTPtr getCreateDatabaseQuery() const override;
|
||||
|
||||
bool empty() const override;
|
||||
|
||||
void loadStoredObjects(Context &, bool, bool force_attach) override;
|
||||
|
||||
DatabaseTablesIteratorPtr getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name) override;
|
||||
|
||||
bool isTableExist(const String & name, const Context & context) const override;
|
||||
StoragePtr tryGetTable(const String & name, const Context & context) const override;
|
||||
|
||||
void createTable(const Context &, const String & table_name, const StoragePtr & storage, const ASTPtr & create_query) override;
|
||||
void dropTable(const Context &, const String & table_name, bool no_delay) override;
|
||||
|
||||
void attachTable(const String & table_name, const StoragePtr & storage, const String & relative_table_path) override;
|
||||
StoragePtr detachTable(const String & table_name) override;
|
||||
|
||||
void drop(const Context & /*context*/) override;
|
||||
void shutdown() override;
|
||||
|
||||
protected:
|
||||
ASTPtr getCreateTableQueryImpl(const String & table_name, const Context & context, bool throw_on_error) const override;
|
||||
|
||||
private:
|
||||
const Context & global_context;
|
||||
String metadata_path;
|
||||
ASTPtr database_engine_define;
|
||||
String dbname;
|
||||
PostgreSQLConnectionPtr connection;
|
||||
const bool cache_tables;
|
||||
|
||||
mutable Tables cached_tables;
|
||||
std::unordered_set<std::string> detached_or_dropped;
|
||||
BackgroundSchedulePool::TaskHolder cleaner_task;
|
||||
|
||||
bool checkPostgresTable(const String & table_name) const;
|
||||
std::unordered_set<std::string> fetchTablesList() const;
|
||||
StoragePtr fetchTable(const String & table_name, const Context & context, const bool table_checked) const;
|
||||
void removeOutdatedTables();
|
||||
ASTPtr getColumnDeclaration(const DataTypePtr & data_type) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
139
src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
Normal file
139
src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
Normal file
@ -0,0 +1,139 @@
|
||||
#include <Databases/PostgreSQL/fetchPostgreSQLTableStructure.h>
|
||||
|
||||
#if USE_LIBPQXX
|
||||
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <boost/algorithm/string/split.hpp>
|
||||
#include <boost/algorithm/string/trim.hpp>
|
||||
#include <pqxx/pqxx>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int UNKNOWN_TABLE;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
|
||||
static DataTypePtr convertPostgreSQLDataType(std::string & type, bool is_nullable, uint16_t dimensions)
|
||||
{
|
||||
DataTypePtr res;
|
||||
|
||||
/// Get rid of trailing '[]' for arrays
|
||||
if (dimensions)
|
||||
while (type.ends_with("[]"))
|
||||
type.resize(type.size() - 2);
|
||||
|
||||
if (type == "smallint")
|
||||
res = std::make_shared<DataTypeInt16>();
|
||||
else if (type == "integer")
|
||||
res = std::make_shared<DataTypeInt32>();
|
||||
else if (type == "bigint")
|
||||
res = std::make_shared<DataTypeInt64>();
|
||||
else if (type == "real")
|
||||
res = std::make_shared<DataTypeFloat32>();
|
||||
else if (type == "double precision")
|
||||
res = std::make_shared<DataTypeFloat64>();
|
||||
else if (type == "serial")
|
||||
res = std::make_shared<DataTypeUInt32>();
|
||||
else if (type == "bigserial")
|
||||
res = std::make_shared<DataTypeUInt64>();
|
||||
else if (type.starts_with("timestamp"))
|
||||
res = std::make_shared<DataTypeDateTime>();
|
||||
else if (type == "date")
|
||||
res = std::make_shared<DataTypeDate>();
|
||||
else if (type.starts_with("numeric"))
|
||||
{
|
||||
/// Numeric and decimal will both end up here as numeric.
|
||||
res = DataTypeFactory::instance().get(type);
|
||||
uint32_t precision = getDecimalPrecision(*res);
|
||||
uint32_t scale = getDecimalScale(*res);
|
||||
|
||||
if (precision <= DecimalUtils::maxPrecision<Decimal32>())
|
||||
res = std::make_shared<DataTypeDecimal<Decimal32>>(precision, scale);
|
||||
else if (precision <= DecimalUtils::maxPrecision<Decimal64>())
|
||||
res = std::make_shared<DataTypeDecimal<Decimal64>>(precision, scale);
|
||||
else if (precision <= DecimalUtils::maxPrecision<Decimal128>())
|
||||
res = std::make_shared<DataTypeDecimal<Decimal128>>(precision, scale);
|
||||
else if (precision <= DecimalUtils::maxPrecision<Decimal256>())
|
||||
res = std::make_shared<DataTypeDecimal<Decimal256>>(precision, scale);
|
||||
}
|
||||
|
||||
if (!res)
|
||||
res = std::make_shared<DataTypeString>();
|
||||
if (is_nullable)
|
||||
res = std::make_shared<DataTypeNullable>(res);
|
||||
while (dimensions--)
|
||||
res = std::make_shared<DataTypeArray>(res);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
std::shared_ptr<NamesAndTypesList> fetchPostgreSQLTableStructure(
|
||||
std::shared_ptr<pqxx::connection> connection, const String & postgres_table_name, bool use_nulls)
|
||||
{
|
||||
auto columns = NamesAndTypesList();
|
||||
|
||||
if (postgres_table_name.find('\'') != std::string::npos
|
||||
|| postgres_table_name.find('\\') != std::string::npos)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "PostgreSQL table name cannot contain single quote or backslash characters, passed {}",
|
||||
postgres_table_name);
|
||||
}
|
||||
|
||||
std::string query = fmt::format(
|
||||
"SELECT attname AS name, format_type(atttypid, atttypmod) AS type, "
|
||||
"attnotnull AS not_null, attndims AS dims "
|
||||
"FROM pg_attribute "
|
||||
"WHERE attrelid = '{}'::regclass "
|
||||
"AND NOT attisdropped AND attnum > 0", postgres_table_name);
|
||||
try
|
||||
{
|
||||
pqxx::read_transaction tx(*connection);
|
||||
pqxx::stream_from stream(tx, pqxx::from_query, std::string_view(query));
|
||||
|
||||
std::tuple<std::string, std::string, std::string, uint16_t> row;
|
||||
while (stream >> row)
|
||||
{
|
||||
columns.push_back(NameAndTypePair(
|
||||
std::get<0>(row),
|
||||
convertPostgreSQLDataType(
|
||||
std::get<1>(row),
|
||||
use_nulls && (std::get<2>(row) == "f"), /// 'f' means that postgres `not_null` is false, i.e. value is nullable
|
||||
std::get<3>(row))));
|
||||
}
|
||||
stream.complete();
|
||||
tx.commit();
|
||||
}
|
||||
catch (const pqxx::undefined_table &)
|
||||
{
|
||||
throw Exception(fmt::format(
|
||||
"PostgreSQL table {}.{} does not exist",
|
||||
connection->dbname(), postgres_table_name), ErrorCodes::UNKNOWN_TABLE);
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage("while fetching postgresql table structure");
|
||||
throw;
|
||||
}
|
||||
|
||||
if (columns.empty())
|
||||
return nullptr;
|
||||
|
||||
return std::make_shared<NamesAndTypesList>(columns);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
19
src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h
Normal file
19
src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h
Normal file
@ -0,0 +1,19 @@
|
||||
#pragma once
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
#include "config_core.h"
|
||||
#endif
|
||||
|
||||
#if USE_LIBPQXX
|
||||
#include <Storages/StoragePostgreSQL.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
std::shared_ptr<NamesAndTypesList> fetchPostgreSQLTableStructure(
|
||||
std::shared_ptr<pqxx::connection> connection, const String & postgres_table_name, bool use_nulls);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -8,7 +8,7 @@ PEERDIR(
|
||||
|
||||
|
||||
SRCS(
|
||||
<? find . -name '*.cpp' | sed 's/^\.\// /' | sort ?>
|
||||
<? find . -name '*.cpp' | grep -v -F 'PostgreSQL' | sed 's/^\.\// /' | sort ?>
|
||||
)
|
||||
|
||||
END()
|
||||
|
196
src/Dictionaries/PostgreSQLDictionarySource.cpp
Normal file
196
src/Dictionaries/PostgreSQLDictionarySource.cpp
Normal file
@ -0,0 +1,196 @@
|
||||
#include "PostgreSQLDictionarySource.h"
|
||||
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include "DictionarySourceFactory.h"
|
||||
#include "registerDictionaries.h"
|
||||
|
||||
#if USE_LIBPQXX
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataStreams/PostgreSQLBlockInputStream.h>
|
||||
#include <Storages/PostgreSQL/PostgreSQLConnection.h>
|
||||
#include "readInvalidateQuery.h"
|
||||
#endif
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int SUPPORT_IS_DISABLED;
|
||||
}
|
||||
|
||||
#if USE_LIBPQXX
|
||||
|
||||
static const UInt64 max_block_size = 8192;
|
||||
|
||||
PostgreSQLDictionarySource::PostgreSQLDictionarySource(
|
||||
const DictionaryStructure & dict_struct_,
|
||||
const Poco::Util::AbstractConfiguration & config_,
|
||||
const std::string & config_prefix,
|
||||
PostgreSQLConnectionPtr connection_,
|
||||
const Block & sample_block_)
|
||||
: dict_struct{dict_struct_}
|
||||
, sample_block(sample_block_)
|
||||
, connection(std::move(connection_))
|
||||
, log(&Poco::Logger::get("PostgreSQLDictionarySource"))
|
||||
, db(config_.getString(fmt::format("{}.db", config_prefix), ""))
|
||||
, table(config_.getString(fmt::format("{}.table", config_prefix), ""))
|
||||
, where(config_.getString(fmt::format("{}.where", config_prefix), ""))
|
||||
, query_builder(dict_struct, "", "", table, where, IdentifierQuotingStyle::DoubleQuotes)
|
||||
, load_all_query(query_builder.composeLoadAllQuery())
|
||||
, invalidate_query(config_.getString(fmt::format("{}.invalidate_query", config_prefix), ""))
|
||||
, update_field(config_.getString(fmt::format("{}.update_field", config_prefix), ""))
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
/// copy-constructor is provided in order to support cloneability
|
||||
PostgreSQLDictionarySource::PostgreSQLDictionarySource(const PostgreSQLDictionarySource & other)
|
||||
: dict_struct(other.dict_struct)
|
||||
, sample_block(other.sample_block)
|
||||
, connection(std::make_shared<PostgreSQLConnection>(other.connection->conn_str()))
|
||||
, log(&Poco::Logger::get("PostgreSQLDictionarySource"))
|
||||
, db(other.db)
|
||||
, table(other.table)
|
||||
, where(other.where)
|
||||
, query_builder(dict_struct, "", "", table, where, IdentifierQuotingStyle::DoubleQuotes)
|
||||
, load_all_query(query_builder.composeLoadAllQuery())
|
||||
, invalidate_query(other.invalidate_query)
|
||||
, update_time(other.update_time)
|
||||
, update_field(other.update_field)
|
||||
, invalidate_query_response(other.invalidate_query_response)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
BlockInputStreamPtr PostgreSQLDictionarySource::loadAll()
|
||||
{
|
||||
LOG_TRACE(log, load_all_query);
|
||||
return std::make_shared<PostgreSQLBlockInputStream>(
|
||||
connection->conn(), load_all_query, sample_block, max_block_size);
|
||||
}
|
||||
|
||||
|
||||
BlockInputStreamPtr PostgreSQLDictionarySource::loadUpdatedAll()
|
||||
{
|
||||
auto load_update_query = getUpdateFieldAndDate();
|
||||
LOG_TRACE(log, load_update_query);
|
||||
return std::make_shared<PostgreSQLBlockInputStream>(connection->conn(), load_update_query, sample_block, max_block_size);
|
||||
}
|
||||
|
||||
BlockInputStreamPtr PostgreSQLDictionarySource::loadIds(const std::vector<UInt64> & ids)
|
||||
{
|
||||
const auto query = query_builder.composeLoadIdsQuery(ids);
|
||||
return std::make_shared<PostgreSQLBlockInputStream>(connection->conn(), query, sample_block, max_block_size);
|
||||
}
|
||||
|
||||
|
||||
BlockInputStreamPtr PostgreSQLDictionarySource::loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows)
|
||||
{
|
||||
const auto query = query_builder.composeLoadKeysQuery(key_columns, requested_rows, ExternalQueryBuilder::AND_OR_CHAIN);
|
||||
return std::make_shared<PostgreSQLBlockInputStream>(connection->conn(), query, sample_block, max_block_size);
|
||||
}
|
||||
|
||||
|
||||
bool PostgreSQLDictionarySource::isModified() const
|
||||
{
|
||||
if (!invalidate_query.empty())
|
||||
{
|
||||
auto response = doInvalidateQuery(invalidate_query);
|
||||
if (response == invalidate_query_response)
|
||||
return false;
|
||||
invalidate_query_response = response;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
std::string PostgreSQLDictionarySource::doInvalidateQuery(const std::string & request) const
|
||||
{
|
||||
Block invalidate_sample_block;
|
||||
ColumnPtr column(ColumnString::create());
|
||||
invalidate_sample_block.insert(ColumnWithTypeAndName(column, std::make_shared<DataTypeString>(), "Sample Block"));
|
||||
PostgreSQLBlockInputStream block_input_stream(connection->conn(), request, invalidate_sample_block, 1);
|
||||
return readInvalidateQuery(block_input_stream);
|
||||
}
|
||||
|
||||
|
||||
bool PostgreSQLDictionarySource::hasUpdateField() const
|
||||
{
|
||||
return !update_field.empty();
|
||||
}
|
||||
|
||||
|
||||
std::string PostgreSQLDictionarySource::getUpdateFieldAndDate()
|
||||
{
|
||||
if (update_time != std::chrono::system_clock::from_time_t(0))
|
||||
{
|
||||
auto tmp_time = update_time;
|
||||
update_time = std::chrono::system_clock::now();
|
||||
time_t hr_time = std::chrono::system_clock::to_time_t(tmp_time) - 1;
|
||||
std::string str_time = std::to_string(LocalDateTime(hr_time));
|
||||
return query_builder.composeUpdateQuery(update_field, str_time);
|
||||
}
|
||||
else
|
||||
{
|
||||
update_time = std::chrono::system_clock::now();
|
||||
return query_builder.composeLoadAllQuery();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool PostgreSQLDictionarySource::supportsSelectiveLoad() const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
DictionarySourcePtr PostgreSQLDictionarySource::clone() const
|
||||
{
|
||||
return std::make_unique<PostgreSQLDictionarySource>(*this);
|
||||
}
|
||||
|
||||
|
||||
std::string PostgreSQLDictionarySource::toString() const
|
||||
{
|
||||
return "PostgreSQL: " + db + '.' + table + (where.empty() ? "" : ", where: " + where);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void registerDictionarySourcePostgreSQL(DictionarySourceFactory & factory)
|
||||
{
|
||||
auto create_table_source = [=](const DictionaryStructure & dict_struct,
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const std::string & root_config_prefix,
|
||||
Block & sample_block,
|
||||
const Context & /* context */,
|
||||
const std::string & /* default_database */,
|
||||
bool /* check_config */) -> DictionarySourcePtr
|
||||
{
|
||||
#if USE_LIBPQXX
|
||||
const auto config_prefix = root_config_prefix + ".postgresql";
|
||||
auto connection = std::make_shared<PostgreSQLConnection>(
|
||||
config.getString(fmt::format("{}.db", config_prefix), ""),
|
||||
config.getString(fmt::format("{}.host", config_prefix), ""),
|
||||
config.getUInt(fmt::format("{}.port", config_prefix), 0),
|
||||
config.getString(fmt::format("{}.user", config_prefix), ""),
|
||||
config.getString(fmt::format("{}.password", config_prefix), ""));
|
||||
|
||||
return std::make_unique<PostgreSQLDictionarySource>(
|
||||
dict_struct, config, config_prefix, connection, sample_block);
|
||||
#else
|
||||
(void)dict_struct;
|
||||
(void)config;
|
||||
(void)root_config_prefix;
|
||||
(void)sample_block;
|
||||
throw Exception{"Dictionary source of type `postgresql` is disabled because ClickHouse was built without postgresql support.",
|
||||
ErrorCodes::SUPPORT_IS_DISABLED};
|
||||
#endif
|
||||
};
|
||||
factory.registerSource("postgresql", create_table_source);
|
||||
}
|
||||
|
||||
}
|
70
src/Dictionaries/PostgreSQLDictionarySource.h
Normal file
70
src/Dictionaries/PostgreSQLDictionarySource.h
Normal file
@ -0,0 +1,70 @@
|
||||
#pragma once
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
#include "config_core.h"
|
||||
#endif
|
||||
#include "DictionaryStructure.h"
|
||||
#include "IDictionarySource.h"
|
||||
|
||||
#if USE_LIBPQXX
|
||||
#include "ExternalQueryBuilder.h"
|
||||
#include <Core/Block.h>
|
||||
#include <common/LocalDateTime.h>
|
||||
#include <common/logger_useful.h>
|
||||
#include <Storages/StoragePostgreSQL.h>
|
||||
#include <pqxx/pqxx>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Allows loading dictionaries from a PostgreSQL database
|
||||
class PostgreSQLDictionarySource final : public IDictionarySource
|
||||
{
|
||||
public:
|
||||
PostgreSQLDictionarySource(
|
||||
const DictionaryStructure & dict_struct_,
|
||||
const Poco::Util::AbstractConfiguration & config_,
|
||||
const std::string & config_prefix,
|
||||
PostgreSQLConnectionPtr connection_,
|
||||
const Block & sample_block_);
|
||||
|
||||
/// copy-constructor is provided in order to support cloneability
|
||||
PostgreSQLDictionarySource(const PostgreSQLDictionarySource & other);
|
||||
PostgreSQLDictionarySource & operator=(const PostgreSQLDictionarySource &) = delete;
|
||||
|
||||
BlockInputStreamPtr loadAll() override;
|
||||
BlockInputStreamPtr loadUpdatedAll() override;
|
||||
BlockInputStreamPtr loadIds(const std::vector<UInt64> & ids) override;
|
||||
BlockInputStreamPtr loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows) override;
|
||||
|
||||
bool isModified() const override;
|
||||
bool supportsSelectiveLoad() const override;
|
||||
bool hasUpdateField() const override;
|
||||
|
||||
DictionarySourcePtr clone() const override;
|
||||
std::string toString() const override;
|
||||
|
||||
private:
|
||||
std::string getUpdateFieldAndDate();
|
||||
std::string doInvalidateQuery(const std::string & request) const;
|
||||
|
||||
const DictionaryStructure dict_struct;
|
||||
Block sample_block;
|
||||
PostgreSQLConnectionPtr connection;
|
||||
Poco::Logger * log;
|
||||
|
||||
const std::string db;
|
||||
const std::string table;
|
||||
const std::string where;
|
||||
ExternalQueryBuilder query_builder;
|
||||
const std::string load_all_query;
|
||||
std::string invalidate_query;
|
||||
std::chrono::time_point<std::chrono::system_clock> update_time;
|
||||
const std::string update_field;
|
||||
mutable std::string invalidate_query_response;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
@ -1644,6 +1644,8 @@ void SSDComplexKeyCacheDictionary::has(
|
||||
const DataTypes & key_types,
|
||||
PaddedPODArray<UInt8> & out) const
|
||||
{
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
std::unordered_map<KeyRef, std::vector<size_t>> not_found_keys;
|
||||
|
@ -4,6 +4,40 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class DictionarySourceFactory;
|
||||
|
||||
void registerDictionarySourceFile(DictionarySourceFactory & source_factory);
|
||||
void registerDictionarySourceMysql(DictionarySourceFactory & source_factory);
|
||||
void registerDictionarySourceClickHouse(DictionarySourceFactory & source_factory);
|
||||
void registerDictionarySourceMongoDB(DictionarySourceFactory & source_factory);
|
||||
void registerDictionarySourceCassandra(DictionarySourceFactory & source_factory);
|
||||
void registerDictionarySourceRedis(DictionarySourceFactory & source_factory);
|
||||
void registerDictionarySourceXDBC(DictionarySourceFactory & source_factory);
|
||||
void registerDictionarySourceJDBC(DictionarySourceFactory & source_factory);
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
void registerDictionarySourcePostgreSQL(DictionarySourceFactory & source_factory);
|
||||
#endif
|
||||
void registerDictionarySourceExecutable(DictionarySourceFactory & source_factory);
|
||||
void registerDictionarySourceHTTP(DictionarySourceFactory & source_factory);
|
||||
void registerDictionarySourceLibrary(DictionarySourceFactory & source_factory);
|
||||
|
||||
class DictionaryFactory;
|
||||
void registerDictionaryRangeHashed(DictionaryFactory & factory);
|
||||
void registerDictionaryComplexKeyHashed(DictionaryFactory & factory);
|
||||
void registerDictionaryComplexKeyCache(DictionaryFactory & factory);
|
||||
void registerDictionaryComplexKeyDirect(DictionaryFactory & factory);
|
||||
void registerDictionaryTrie(DictionaryFactory & factory);
|
||||
void registerDictionaryFlat(DictionaryFactory & factory);
|
||||
void registerDictionaryHashed(DictionaryFactory & factory);
|
||||
void registerDictionaryCache(DictionaryFactory & factory);
|
||||
#if defined(__linux__) || defined(__FreeBSD__)
|
||||
void registerDictionarySSDCache(DictionaryFactory & factory);
|
||||
void registerDictionarySSDComplexKeyCache(DictionaryFactory & factory);
|
||||
#endif
|
||||
void registerDictionaryPolygon(DictionaryFactory & factory);
|
||||
void registerDictionaryDirect(DictionaryFactory & factory);
|
||||
|
||||
|
||||
void registerDictionaries()
|
||||
{
|
||||
{
|
||||
@ -16,6 +50,9 @@ void registerDictionaries()
|
||||
registerDictionarySourceCassandra(source_factory);
|
||||
registerDictionarySourceXDBC(source_factory);
|
||||
registerDictionarySourceJDBC(source_factory);
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
registerDictionarySourcePostgreSQL(source_factory);
|
||||
#endif
|
||||
registerDictionarySourceExecutable(source_factory);
|
||||
registerDictionarySourceHTTP(source_factory);
|
||||
registerDictionarySourceLibrary(source_factory);
|
||||
|
@ -2,36 +2,5 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class DictionarySourceFactory;
|
||||
|
||||
void registerDictionarySourceFile(DictionarySourceFactory & source_factory);
|
||||
void registerDictionarySourceMysql(DictionarySourceFactory & source_factory);
|
||||
void registerDictionarySourceClickHouse(DictionarySourceFactory & source_factory);
|
||||
void registerDictionarySourceMongoDB(DictionarySourceFactory & source_factory);
|
||||
void registerDictionarySourceCassandra(DictionarySourceFactory & source_factory);
|
||||
void registerDictionarySourceRedis(DictionarySourceFactory & source_factory);
|
||||
void registerDictionarySourceXDBC(DictionarySourceFactory & source_factory);
|
||||
void registerDictionarySourceJDBC(DictionarySourceFactory & source_factory);
|
||||
void registerDictionarySourceExecutable(DictionarySourceFactory & source_factory);
|
||||
void registerDictionarySourceHTTP(DictionarySourceFactory & source_factory);
|
||||
void registerDictionarySourceLibrary(DictionarySourceFactory & source_factory);
|
||||
|
||||
class DictionaryFactory;
|
||||
void registerDictionaryRangeHashed(DictionaryFactory & factory);
|
||||
void registerDictionaryComplexKeyHashed(DictionaryFactory & factory);
|
||||
void registerDictionaryComplexKeyCache(DictionaryFactory & factory);
|
||||
void registerDictionaryComplexKeyDirect(DictionaryFactory & factory);
|
||||
void registerDictionaryTrie(DictionaryFactory & factory);
|
||||
void registerDictionaryFlat(DictionaryFactory & factory);
|
||||
void registerDictionaryHashed(DictionaryFactory & factory);
|
||||
void registerDictionaryCache(DictionaryFactory & factory);
|
||||
#if defined(__linux__) || defined(__FreeBSD__)
|
||||
void registerDictionarySSDCache(DictionaryFactory & factory);
|
||||
void registerDictionarySSDComplexKeyCache(DictionaryFactory & factory);
|
||||
#endif
|
||||
void registerDictionaryPolygon(DictionaryFactory & factory);
|
||||
void registerDictionaryDirect(DictionaryFactory & factory);
|
||||
|
||||
void registerDictionaries();
|
||||
}
|
||||
|
@ -15,7 +15,7 @@ NO_COMPILER_WARNINGS()
|
||||
|
||||
|
||||
SRCS(
|
||||
<? find . -name '*.cpp' | grep -v -F tests | grep -v -F Trie | sed 's/^\.\// /' | sort ?>
|
||||
<? find . -name '*.cpp' | grep -v -P 'tests|PostgreSQL' | sed 's/^\.\// /' | sort ?>
|
||||
)
|
||||
|
||||
END()
|
||||
|
@ -23,6 +23,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
MySQLBlockInputStream::Connection::Connection(
|
||||
@ -114,6 +115,8 @@ namespace
|
||||
case ValueType::vtFixedString:
|
||||
assert_cast<ColumnFixedString &>(column).insertData(value.data(), value.size());
|
||||
break;
|
||||
default:
|
||||
throw Exception("Unsupported value type", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -465,7 +465,8 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
|
||||
res.add(std::move(column));
|
||||
}
|
||||
|
||||
res.flattenNested();
|
||||
if (context.getSettingsRef().flatten_nested)
|
||||
res.flattenNested();
|
||||
|
||||
if (res.getAllPhysical().empty())
|
||||
throw Exception{"Cannot CREATE table without physical columns", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED};
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <Parsers/parseQuery.h>
|
||||
|
||||
#include <Access/AccessFlags.h>
|
||||
#include <Access/ContextAccess.h>
|
||||
|
||||
#include <AggregateFunctions/AggregateFunctionCount.h>
|
||||
|
||||
@ -100,6 +101,7 @@ namespace ErrorCodes
|
||||
extern const int PARAMETER_OUT_OF_BOUND;
|
||||
extern const int INVALID_LIMIT_EXPRESSION;
|
||||
extern const int INVALID_WITH_FILL_EXPRESSION;
|
||||
extern const int ACCESS_DENIED;
|
||||
}
|
||||
|
||||
/// Assumes `storage` is set and the table filter (row-level security) is not empty.
|
||||
@ -212,6 +214,36 @@ static void rewriteMultipleJoins(ASTPtr & query, const TablesWithColumns & table
|
||||
JoinToSubqueryTransformVisitor(join_to_subs_data).visit(query);
|
||||
}
|
||||
|
||||
/// Checks that the current user has the SELECT privilege.
|
||||
static void checkAccessRightsForSelect(
|
||||
const Context & context,
|
||||
const StorageID & table_id,
|
||||
const StorageMetadataPtr & table_metadata,
|
||||
const Strings & required_columns,
|
||||
const TreeRewriterResult & syntax_analyzer_result)
|
||||
{
|
||||
if (!syntax_analyzer_result.has_explicit_columns && table_metadata && !table_metadata->getColumns().empty())
|
||||
{
|
||||
/// For a trivial query like "SELECT count() FROM table" access is granted if at least
|
||||
/// one column is accessible.
|
||||
/// In this case just checking access for `required_columns` doesn't work correctly
|
||||
/// because `required_columns` will contain the name of a column of minimum size (see TreeRewriterResult::collectUsedColumns())
|
||||
/// which is probably not the same column as the column the current user has access to.
|
||||
auto access = context.getAccess();
|
||||
for (const auto & column : table_metadata->getColumns())
|
||||
{
|
||||
if (access->isGranted(AccessType::SELECT, table_id.database_name, table_id.table_name, column.name))
|
||||
return;
|
||||
}
|
||||
throw Exception(context.getUserName() + ": Not enough privileges. "
|
||||
"To execute this query it's necessary to have grant SELECT for at least one column on " + table_id.getFullTableName(),
|
||||
ErrorCodes::ACCESS_DENIED);
|
||||
}
|
||||
|
||||
/// General check.
|
||||
context.checkAccess(AccessType::SELECT, table_id, required_columns);
|
||||
}
|
||||
|
||||
/// Returns true if we should ignore quotas and limits for a specified table in the system database.
|
||||
static bool shouldIgnoreQuotaAndLimits(const StorageID & table_id)
|
||||
{
|
||||
@ -467,7 +499,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
{
|
||||
/// The current user should have the SELECT privilege.
|
||||
/// If this table_id is for a table function we don't check access rights here because in this case they have been already checked in ITableFunction::execute().
|
||||
context->checkAccess(AccessType::SELECT, table_id, required_columns);
|
||||
checkAccessRightsForSelect(*context, table_id, metadata_snapshot, required_columns, *syntax_analyzer_result);
|
||||
|
||||
/// Remove limits for some tables in the `system` database.
|
||||
if (shouldIgnoreQuotaAndLimits(table_id) && (joined_tables.tablesCount() <= 1))
|
||||
|
@ -526,7 +526,12 @@ void TreeRewriterResult::collectSourceColumns(bool add_special)
|
||||
{
|
||||
const ColumnsDescription & columns = metadata_snapshot->getColumns();
|
||||
|
||||
auto columns_from_storage = add_special ? columns.getAll() : columns.getAllPhysical();
|
||||
NamesAndTypesList columns_from_storage;
|
||||
if (storage->supportsSubcolumns())
|
||||
columns_from_storage = add_special ? columns.getAllWithSubcolumns() : columns.getAllPhysicalWithSubcolumns();
|
||||
else
|
||||
columns_from_storage = add_special ? columns.getAll() : columns.getAllPhysical();
|
||||
|
||||
if (source_columns.empty())
|
||||
source_columns.swap(columns_from_storage);
|
||||
else
|
||||
@ -590,11 +595,13 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select
|
||||
required.insert(column_name_type.name);
|
||||
}
|
||||
|
||||
/// You need to read at least one column to find the number of rows.
|
||||
if (is_select && required.empty())
|
||||
/// Figure out if we're able to use the trivial count optimization.
|
||||
has_explicit_columns = !required.empty();
|
||||
if (is_select && !has_explicit_columns)
|
||||
{
|
||||
optimize_trivial_count = true;
|
||||
|
||||
/// You need to read at least one column to find the number of rows.
|
||||
/// We will find a column with minimum <compressed_size, type_size, uncompressed_size>.
|
||||
/// Because it is the column that is cheapest to read.
|
||||
struct ColumnSizeTuple
|
||||
|
@ -53,6 +53,13 @@ struct TreeRewriterResult
|
||||
/// Predicate optimizer overrides the sub queries
|
||||
bool rewrite_subqueries = false;
|
||||
|
||||
/// Whether the query contains explicit columns like "SELECT column1 + column2 FROM table1".
|
||||
/// Queries like "SELECT count() FROM table1", "SELECT 1" don't contain explicit columns.
|
||||
bool has_explicit_columns = false;
|
||||
|
||||
/// Whether it's possible to use the trivial count optimization,
|
||||
/// i.e. use a fast call of IStorage::totalRows() (or IStorage::totalRowsByPartitionPredicate())
|
||||
/// instead of actual retrieving columns and counting rows.
|
||||
bool optimize_trivial_count = false;
|
||||
|
||||
/// Cache isRemote() call for storage, because it may be too heavy.
|
||||
|
@ -158,8 +158,8 @@ SRCS(
|
||||
interpretSubquery.cpp
|
||||
join_common.cpp
|
||||
loadMetadata.cpp
|
||||
replaceAliasColumnsInQuery.cpp
|
||||
processColumnTransformers.cpp
|
||||
replaceAliasColumnsInQuery.cpp
|
||||
sortBlock.cpp
|
||||
|
||||
)
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <Common/Exception.h>
|
||||
#include <common/logger_useful.h>
|
||||
|
||||
#include <Parsers/New/LexerErrorListener.h>
|
||||
|
||||
@ -17,7 +18,7 @@ extern int SYNTAX_ERROR;
|
||||
|
||||
void LexerErrorListener::syntaxError(Recognizer *, Token *, size_t, size_t, const std::string & message, std::exception_ptr)
|
||||
{
|
||||
std::cerr << "Lexer error: " << message << std::endl;
|
||||
LOG_ERROR(&Poco::Logger::get("ClickHouseLexer"), "Lexer error: {}", message);
|
||||
|
||||
throw DB::Exception("Can't recognize input: " + message, ErrorCodes::SYNTAX_ERROR);
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <Common/Exception.h>
|
||||
#include <common/logger_useful.h>
|
||||
|
||||
#include <Parsers/New/ParserErrorListener.h>
|
||||
|
||||
@ -24,9 +25,10 @@ void ParserErrorListener::syntaxError(
|
||||
{
|
||||
auto * parser = dynamic_cast<ClickHouseParser*>(recognizer);
|
||||
|
||||
std::cerr << "Last element parsed so far:" << std::endl
|
||||
<< parser->getRuleContext()->toStringTree(parser, true) << std::endl
|
||||
<< "Parser error: (pos " << token->getStartIndex() << ") " << message << std::endl;
|
||||
LOG_ERROR(&Poco::Logger::get("ClickHouseParser"),
|
||||
"Last element parsed so far:\n"
|
||||
"{}\n"
|
||||
"Parser error: (pos {}) {}", parser->getRuleContext()->toStringTree(parser, true), token->getStartIndex(), message);
|
||||
|
||||
throw DB::Exception("Can't parse input: " + message, ErrorCodes::SYNTAX_ERROR);
|
||||
}
|
||||
|
12
src/Parsers/New/README.md
Normal file
12
src/Parsers/New/README.md
Normal file
@ -0,0 +1,12 @@
|
||||
## How to generate source code files from grammar
|
||||
|
||||
Grammar is located inside `ClickHouseLexer.g4` and `ClickHouseParser.g4` files.
|
||||
|
||||
To generate source code you need to install locally the `antlr4` binary:
|
||||
```
|
||||
cd src/Parsers/New
|
||||
antlr4 -no-listener -visitor -package DB -Dlanguage=Cpp ClickHouseLexer.g4 # if you have changes in a lexer part of grammar
|
||||
antlr4 -no-listener -visitor -package DB -Dlanguage=Cpp ClickHouseParser.g4
|
||||
```
|
||||
|
||||
Commit only git-tracked generated files - not all of the generated content is required.
|
@ -320,7 +320,8 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, const Context & con
|
||||
metadata.columns.add(column, after_column, first);
|
||||
|
||||
/// Slow, because each time a list is copied
|
||||
metadata.columns.flattenNested();
|
||||
if (context.getSettingsRef().flatten_nested)
|
||||
metadata.columns.flattenNested();
|
||||
}
|
||||
else if (type == DROP_COLUMN)
|
||||
{
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypeNested.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Storages/IStorage.h>
|
||||
@ -184,6 +185,7 @@ void ColumnsDescription::add(ColumnDescription column, const String & after_colu
|
||||
insert_it = range.second;
|
||||
}
|
||||
|
||||
addSubcolumns(column.name, column.type);
|
||||
columns.get<0>().insert(insert_it, std::move(column));
|
||||
}
|
||||
|
||||
@ -195,7 +197,10 @@ void ColumnsDescription::remove(const String & column_name)
|
||||
ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
|
||||
|
||||
for (auto list_it = range.first; list_it != range.second;)
|
||||
{
|
||||
removeSubcolumns(list_it->name, list_it->type);
|
||||
list_it = columns.get<0>().erase(list_it);
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnsDescription::rename(const String & column_from, const String & column_to)
|
||||
@ -268,6 +273,7 @@ void ColumnsDescription::flattenNested()
|
||||
}
|
||||
|
||||
ColumnDescription column = std::move(*it);
|
||||
removeSubcolumns(column.name, column.type);
|
||||
it = columns.get<0>().erase(it);
|
||||
|
||||
const DataTypes & elements = type_tuple->getElements();
|
||||
@ -281,6 +287,7 @@ void ColumnsDescription::flattenNested()
|
||||
nested_column.name = Nested::concatenateName(column.name, names[i]);
|
||||
nested_column.type = std::make_shared<DataTypeArray>(elements[i]);
|
||||
|
||||
addSubcolumns(nested_column.name, nested_column.type);
|
||||
columns.get<0>().insert(it, std::move(nested_column));
|
||||
}
|
||||
}
|
||||
@ -322,10 +329,10 @@ NamesAndTypesList ColumnsDescription::getAll() const
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
bool ColumnsDescription::has(const String & column_name) const
|
||||
{
|
||||
return columns.get<1>().find(column_name) != columns.get<1>().end();
|
||||
return columns.get<1>().find(column_name) != columns.get<1>().end()
|
||||
|| subcolumns.find(column_name) != subcolumns.end();
|
||||
}
|
||||
|
||||
bool ColumnsDescription::hasNested(const String & column_name) const
|
||||
@ -371,12 +378,56 @@ NameAndTypePair ColumnsDescription::getPhysical(const String & column_name) cons
|
||||
return NameAndTypePair(it->name, it->type);
|
||||
}
|
||||
|
||||
NameAndTypePair ColumnsDescription::getPhysicalOrSubcolumn(const String & column_name) const
|
||||
{
|
||||
if (auto it = columns.get<1>().find(column_name); it != columns.get<1>().end()
|
||||
&& it->default_desc.kind != ColumnDefaultKind::Alias)
|
||||
{
|
||||
return NameAndTypePair(it->name, it->type);
|
||||
}
|
||||
|
||||
if (auto it = subcolumns.find(column_name); it != subcolumns.end())
|
||||
{
|
||||
return it->second;
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE,
|
||||
"There is no physical column or subcolumn {} in table.", column_name);
|
||||
}
|
||||
|
||||
bool ColumnsDescription::hasPhysical(const String & column_name) const
|
||||
{
|
||||
auto it = columns.get<1>().find(column_name);
|
||||
return it != columns.get<1>().end() && it->default_desc.kind != ColumnDefaultKind::Alias;
|
||||
}
|
||||
|
||||
bool ColumnsDescription::hasPhysicalOrSubcolumn(const String & column_name) const
|
||||
{
|
||||
return hasPhysical(column_name) || subcolumns.find(column_name) != subcolumns.end();
|
||||
}
|
||||
|
||||
static NamesAndTypesList getWithSubcolumns(NamesAndTypesList && source_list)
|
||||
{
|
||||
NamesAndTypesList ret;
|
||||
for (const auto & col : source_list)
|
||||
{
|
||||
ret.emplace_back(col.name, col.type);
|
||||
for (const auto & subcolumn : col.type->getSubcolumnNames())
|
||||
ret.emplace_back(col.name, subcolumn, col.type, col.type->getSubcolumnType(subcolumn));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
NamesAndTypesList ColumnsDescription::getAllWithSubcolumns() const
|
||||
{
|
||||
return getWithSubcolumns(getAll());
|
||||
}
|
||||
|
||||
NamesAndTypesList ColumnsDescription::getAllPhysicalWithSubcolumns() const
|
||||
{
|
||||
return getWithSubcolumns(getAllPhysical());
|
||||
}
|
||||
|
||||
bool ColumnsDescription::hasDefaults() const
|
||||
{
|
||||
@ -483,13 +534,33 @@ ColumnsDescription ColumnsDescription::parse(const String & str)
|
||||
ColumnDescription column;
|
||||
column.readText(buf);
|
||||
buf.ignore(1); /// ignore new line
|
||||
result.add(std::move(column));
|
||||
result.add(column);
|
||||
}
|
||||
|
||||
assertEOF(buf);
|
||||
return result;
|
||||
}
|
||||
|
||||
void ColumnsDescription::addSubcolumns(const String & name_in_storage, const DataTypePtr & type_in_storage)
|
||||
{
|
||||
for (const auto & subcolumn_name : type_in_storage->getSubcolumnNames())
|
||||
{
|
||||
auto subcolumn = NameAndTypePair(name_in_storage, subcolumn_name,
|
||||
type_in_storage, type_in_storage->getSubcolumnType(subcolumn_name));
|
||||
|
||||
if (has(subcolumn.name))
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Cannot add subcolumn {}: column with this name already exists", subcolumn.name);
|
||||
|
||||
subcolumns[subcolumn.name] = subcolumn;
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnsDescription::removeSubcolumns(const String & name_in_storage, const DataTypePtr & type_in_storage)
|
||||
{
|
||||
for (const auto & subcolumn_name : type_in_storage->getSubcolumnNames())
|
||||
subcolumns.erase(name_in_storage + "." + subcolumn_name);
|
||||
}
|
||||
|
||||
Block validateColumnsDefaultsAndGetSampleBlock(ASTPtr default_expr_list, const NamesAndTypesList & all_columns, const Context & context)
|
||||
{
|
||||
|
@ -77,6 +77,8 @@ public:
|
||||
NamesAndTypesList getAliases() const;
|
||||
NamesAndTypesList getAllPhysical() const; /// ordinary + materialized.
|
||||
NamesAndTypesList getAll() const; /// ordinary + materialized + aliases
|
||||
NamesAndTypesList getAllWithSubcolumns() const;
|
||||
NamesAndTypesList getAllPhysicalWithSubcolumns() const;
|
||||
|
||||
using ColumnTTLs = std::unordered_map<String, ASTPtr>;
|
||||
ColumnTTLs getColumnTTLs() const;
|
||||
@ -105,7 +107,9 @@ public:
|
||||
|
||||
Names getNamesOfPhysical() const;
|
||||
bool hasPhysical(const String & column_name) const;
|
||||
bool hasPhysicalOrSubcolumn(const String & column_name) const;
|
||||
NameAndTypePair getPhysical(const String & column_name) const;
|
||||
NameAndTypePair getPhysicalOrSubcolumn(const String & column_name) const;
|
||||
|
||||
ColumnDefaults getDefaults() const; /// TODO: remove
|
||||
bool hasDefault(const String & column_name) const;
|
||||
@ -141,7 +145,12 @@ public:
|
||||
private:
|
||||
Container columns;
|
||||
|
||||
using SubcolumnsContainer = std::unordered_map<String, NameAndTypePair>;
|
||||
SubcolumnsContainer subcolumns;
|
||||
|
||||
void modifyColumnOrder(const String & column_name, const String & after_column, bool first);
|
||||
void addSubcolumns(const String & name_in_storage, const DataTypePtr & type_in_storage);
|
||||
void removeSubcolumns(const String & name_in_storage, const DataTypePtr & type_in_storage);
|
||||
};
|
||||
|
||||
/// Validate default expressions and corresponding types compatibility, i.e.
|
||||
|
@ -128,6 +128,9 @@ public:
|
||||
/// Example is StorageSystemNumbers.
|
||||
virtual bool hasEvenlyDistributedRead() const { return false; }
|
||||
|
||||
/// Returns true if the storage supports reading of subcolumns of complex types.
|
||||
virtual bool supportsSubcolumns() const { return false; }
|
||||
|
||||
|
||||
/// Optional size information of each physical column.
|
||||
/// Currently it's only used by the MergeTree family for query optimizations.
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <common/logger_useful.h>
|
||||
#include <Compression/getCompressionCodecForFile.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
|
||||
|
||||
namespace CurrentMetrics
|
||||
@ -321,7 +322,12 @@ void IMergeTreeDataPart::setColumns(const NamesAndTypesList & new_columns)
|
||||
column_name_to_position.reserve(new_columns.size());
|
||||
size_t pos = 0;
|
||||
for (const auto & column : columns)
|
||||
column_name_to_position.emplace(column.name, pos++);
|
||||
{
|
||||
column_name_to_position.emplace(column.name, pos);
|
||||
for (const auto & subcolumn : column.type->getSubcolumnNames())
|
||||
column_name_to_position.emplace(Nested::concatenateName(column.name, subcolumn), pos);
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
|
||||
void IMergeTreeDataPart::removeIfNeeded()
|
||||
@ -454,7 +460,7 @@ String IMergeTreeDataPart::getColumnNameWithMinimumCompressedSize(const StorageM
|
||||
if (alter_conversions.isColumnRenamed(column.name))
|
||||
column_name = alter_conversions.getColumnOldName(column.name);
|
||||
|
||||
if (!hasColumnFiles(column_name, *column_type))
|
||||
if (!hasColumnFiles(column))
|
||||
continue;
|
||||
|
||||
const auto size = getColumnSize(column_name, *column_type).data_compressed;
|
||||
@ -640,7 +646,7 @@ CompressionCodecPtr IMergeTreeDataPart::detectDefaultCompressionCodec() const
|
||||
{
|
||||
if (path_to_data_file.empty())
|
||||
{
|
||||
String candidate_path = getFullRelativePath() + IDataType::getFileNameForStream(part_column.name, substream_path) + ".bin";
|
||||
String candidate_path = getFullRelativePath() + IDataType::getFileNameForStream(part_column, substream_path) + ".bin";
|
||||
|
||||
/// We can have existing, but empty .bin files. Example: LowCardinality(Nullable(...)) columns and column_name.dict.null.bin file.
|
||||
if (volume->getDisk()->exists(candidate_path) && volume->getDisk()->getFileSize(candidate_path) != 0)
|
||||
|
@ -330,7 +330,7 @@ public:
|
||||
/// NOTE: Doesn't take column renames into account, if some column renames
|
||||
/// take place, you must take original name of column for this part from
|
||||
/// storage and pass it to this method.
|
||||
virtual bool hasColumnFiles(const String & /* column */, const IDataType & /* type */) const { return false; }
|
||||
virtual bool hasColumnFiles(const NameAndTypePair & /* column */) const { return false; }
|
||||
|
||||
/// Returns true if this part shall participate in merges according to
|
||||
/// settings of given storage policy.
|
||||
|
@ -42,7 +42,14 @@ IMergeTreeReader::IMergeTreeReader(
|
||||
, all_mark_ranges(all_mark_ranges_)
|
||||
, alter_conversions(storage.getAlterConversionsForPart(data_part))
|
||||
{
|
||||
for (const NameAndTypePair & column_from_part : data_part->getColumns())
|
||||
auto part_columns = data_part->getColumns();
|
||||
if (settings.convert_nested_to_subcolumns)
|
||||
{
|
||||
columns = Nested::convertToSubcolumns(columns);
|
||||
part_columns = Nested::collect(part_columns);
|
||||
}
|
||||
|
||||
for (const NameAndTypePair & column_from_part : part_columns)
|
||||
columns_from_part[column_from_part.name] = column_from_part.type;
|
||||
}
|
||||
|
||||
@ -74,7 +81,6 @@ static bool arrayHasNoElementsRead(const IColumn & column)
|
||||
return last_offset != 0;
|
||||
}
|
||||
|
||||
|
||||
void IMergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows)
|
||||
{
|
||||
try
|
||||
@ -197,19 +203,33 @@ void IMergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns
|
||||
|
||||
NameAndTypePair IMergeTreeReader::getColumnFromPart(const NameAndTypePair & required_column) const
|
||||
{
|
||||
if (alter_conversions.isColumnRenamed(required_column.name))
|
||||
auto name_in_storage = required_column.getNameInStorage();
|
||||
|
||||
decltype(columns_from_part.begin()) it;
|
||||
if (alter_conversions.isColumnRenamed(name_in_storage))
|
||||
{
|
||||
String old_name = alter_conversions.getColumnOldName(required_column.name);
|
||||
auto it = columns_from_part.find(old_name);
|
||||
if (it != columns_from_part.end())
|
||||
return {it->first, it->second};
|
||||
String old_name = alter_conversions.getColumnOldName(name_in_storage);
|
||||
it = columns_from_part.find(old_name);
|
||||
}
|
||||
else if (auto it = columns_from_part.find(required_column.name); it != columns_from_part.end())
|
||||
else
|
||||
{
|
||||
return {it->first, it->second};
|
||||
it = columns_from_part.find(name_in_storage);
|
||||
}
|
||||
|
||||
return required_column;
|
||||
if (it == columns_from_part.end())
|
||||
return required_column;
|
||||
|
||||
if (required_column.isSubcolumn())
|
||||
{
|
||||
auto subcolumn_name = required_column.getSubcolumnName();
|
||||
auto subcolumn_type = it->second->tryGetSubcolumnType(subcolumn_name);
|
||||
if (!subcolumn_type)
|
||||
subcolumn_type = required_column.type;
|
||||
|
||||
return {it->first, subcolumn_name, it->second, subcolumn_type};
|
||||
}
|
||||
|
||||
return {it->first, it->second};
|
||||
}
|
||||
|
||||
void IMergeTreeReader::performRequiredConversions(Columns & res_columns)
|
||||
|
@ -33,7 +33,7 @@ NameSet IMergedBlockOutputStream::removeEmptyColumnsFromPart(
|
||||
column.type->enumerateStreams(
|
||||
[&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_path */)
|
||||
{
|
||||
++stream_counts[IDataType::getFileNameForStream(column.name, substream_path)];
|
||||
++stream_counts[IDataType::getFileNameForStream(column, substream_path)];
|
||||
},
|
||||
{});
|
||||
}
|
||||
@ -42,9 +42,13 @@ NameSet IMergedBlockOutputStream::removeEmptyColumnsFromPart(
|
||||
const String mrk_extension = data_part->getMarksFileExtension();
|
||||
for (const auto & column_name : empty_columns)
|
||||
{
|
||||
auto column_with_type = columns.tryGetByName(column_name);
|
||||
if (!column_with_type)
|
||||
continue;
|
||||
|
||||
IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_path */)
|
||||
{
|
||||
String stream_name = IDataType::getFileNameForStream(column_name, substream_path);
|
||||
String stream_name = IDataType::getFileNameForStream(*column_with_type, substream_path);
|
||||
/// Delete files if they are no longer shared with another column.
|
||||
if (--stream_counts[stream_name] == 0)
|
||||
{
|
||||
@ -52,10 +56,9 @@ NameSet IMergedBlockOutputStream::removeEmptyColumnsFromPart(
|
||||
remove_files.emplace(stream_name + mrk_extension);
|
||||
}
|
||||
};
|
||||
|
||||
IDataType::SubstreamPath stream_path;
|
||||
auto column_with_type = columns.tryGetByName(column_name);
|
||||
if (column_with_type)
|
||||
column_with_type->type->enumerateStreams(callback, stream_path);
|
||||
column_with_type->type->enumerateStreams(callback, stream_path);
|
||||
}
|
||||
|
||||
/// Remove files on disk and checksums
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <Storages/MergeTree/MergeTreeBlockReadUtils.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
#include <Common/checkStackSize.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
@ -33,21 +34,30 @@ bool injectRequiredColumnsRecursively(
|
||||
/// huge AST which for some reason was not validated on parsing/interpreter
|
||||
/// stages.
|
||||
checkStackSize();
|
||||
String column_name_in_part = column_name;
|
||||
if (alter_conversions.isColumnRenamed(column_name_in_part))
|
||||
column_name_in_part = alter_conversions.getColumnOldName(column_name_in_part);
|
||||
|
||||
/// column has files and hence does not require evaluation
|
||||
if (storage_columns.hasPhysical(column_name) && part->hasColumnFiles(column_name_in_part, *storage_columns.getPhysical(column_name).type))
|
||||
if (storage_columns.hasPhysicalOrSubcolumn(column_name))
|
||||
{
|
||||
/// ensure each column is added only once
|
||||
if (required_columns.count(column_name) == 0)
|
||||
auto column_in_storage = storage_columns.getPhysicalOrSubcolumn(column_name);
|
||||
auto column_name_in_part = column_in_storage.getNameInStorage();
|
||||
if (alter_conversions.isColumnRenamed(column_name_in_part))
|
||||
column_name_in_part = alter_conversions.getColumnOldName(column_name_in_part);
|
||||
|
||||
auto column_in_part = NameAndTypePair(
|
||||
column_name_in_part, column_in_storage.getSubcolumnName(),
|
||||
column_in_storage.getTypeInStorage(), column_in_storage.type);
|
||||
|
||||
/// column has files and hence does not require evaluation
|
||||
if (part->hasColumnFiles(column_in_part))
|
||||
{
|
||||
columns.emplace_back(column_name);
|
||||
required_columns.emplace(column_name);
|
||||
injected_columns.emplace(column_name);
|
||||
/// ensure each column is added only once
|
||||
if (required_columns.count(column_name) == 0)
|
||||
{
|
||||
columns.emplace_back(column_name);
|
||||
required_columns.emplace(column_name);
|
||||
injected_columns.emplace(column_name);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Column doesn't have default value and don't exist in part
|
||||
@ -81,8 +91,8 @@ NameSet injectRequiredColumns(const MergeTreeData & storage, const StorageMetada
|
||||
for (size_t i = 0; i < columns.size(); ++i)
|
||||
{
|
||||
/// We are going to fetch only physical columns
|
||||
if (!storage_columns.hasPhysical(columns[i]))
|
||||
throw Exception("There is no physical column " + columns[i] + " in table.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
|
||||
if (!storage_columns.hasPhysicalOrSubcolumn(columns[i]))
|
||||
throw Exception("There is no physical column or subcolumn " + columns[i] + " in table.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
|
||||
|
||||
have_at_least_one_physical_column |= injectRequiredColumnsRecursively(
|
||||
columns[i], storage_columns, alter_conversions,
|
||||
@ -285,7 +295,7 @@ MergeTreeReadTaskColumns getReadTaskColumns(
|
||||
|
||||
if (check_columns)
|
||||
{
|
||||
const NamesAndTypesList & physical_columns = metadata_snapshot->getColumns().getAllPhysical();
|
||||
const NamesAndTypesList & physical_columns = metadata_snapshot->getColumns().getAllWithSubcolumns();
|
||||
result.pre_columns = physical_columns.addTypes(pre_column_names);
|
||||
result.columns = physical_columns.addTypes(column_names);
|
||||
}
|
||||
|
@ -357,6 +357,8 @@ public:
|
||||
|| merging_params.mode == MergingParams::VersionedCollapsing;
|
||||
}
|
||||
|
||||
bool supportsSubcolumns() const override { return true; }
|
||||
|
||||
NamesAndTypesList getVirtuals() const override;
|
||||
|
||||
bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context &, const StorageMetadataPtr & metadata_snapshot) const override;
|
||||
|
@ -1493,7 +1493,7 @@ NameToNameVector MergeTreeDataMergerMutator::collectFilesForRenames(
|
||||
column.type->enumerateStreams(
|
||||
[&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
|
||||
{
|
||||
++stream_counts[IDataType::getFileNameForStream(column.name, substream_path)];
|
||||
++stream_counts[IDataType::getFileNameForStream(column, substream_path)];
|
||||
},
|
||||
{});
|
||||
}
|
||||
@ -1511,7 +1511,7 @@ NameToNameVector MergeTreeDataMergerMutator::collectFilesForRenames(
|
||||
{
|
||||
IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
|
||||
{
|
||||
String stream_name = IDataType::getFileNameForStream(command.column_name, substream_path);
|
||||
String stream_name = IDataType::getFileNameForStream({command.column_name, command.data_type}, substream_path);
|
||||
/// Delete files if they are no longer shared with another column.
|
||||
if (--stream_counts[stream_name] == 0)
|
||||
{
|
||||
@ -1532,7 +1532,7 @@ NameToNameVector MergeTreeDataMergerMutator::collectFilesForRenames(
|
||||
|
||||
IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
|
||||
{
|
||||
String stream_from = IDataType::getFileNameForStream(command.column_name, substream_path);
|
||||
String stream_from = IDataType::getFileNameForStream({command.column_name, command.data_type}, substream_path);
|
||||
|
||||
String stream_to = boost::replace_first_copy(stream_from, escaped_name_from, escaped_name_to);
|
||||
|
||||
@ -1565,7 +1565,7 @@ NameSet MergeTreeDataMergerMutator::collectFilesToSkip(
|
||||
{
|
||||
IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
|
||||
{
|
||||
String stream_name = IDataType::getFileNameForStream(entry.name, substream_path);
|
||||
String stream_name = IDataType::getFileNameForStream({entry.name, entry.type}, substream_path);
|
||||
files_to_skip.insert(stream_name + ".bin");
|
||||
files_to_skip.insert(stream_name + mrk_extension);
|
||||
};
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include "MergeTreeDataPartCompact.h"
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <Storages/MergeTree/MergeTreeReaderCompact.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPartWriterCompact.h>
|
||||
#include <Poco/File.h>
|
||||
@ -121,9 +122,9 @@ void MergeTreeDataPartCompact::loadIndexGranularity()
|
||||
index_granularity.setInitialized();
|
||||
}
|
||||
|
||||
bool MergeTreeDataPartCompact::hasColumnFiles(const String & column_name, const IDataType &) const
|
||||
bool MergeTreeDataPartCompact::hasColumnFiles(const NameAndTypePair & column) const
|
||||
{
|
||||
if (!getColumnPosition(column_name))
|
||||
if (!getColumnPosition(column.name))
|
||||
return false;
|
||||
|
||||
auto bin_checksum = checksums.files.find(DATA_FILE_NAME_WITH_EXTENSION);
|
||||
|
@ -55,7 +55,7 @@ public:
|
||||
|
||||
bool isStoredOnDisk() const override { return true; }
|
||||
|
||||
bool hasColumnFiles(const String & column_name, const IDataType & type) const override;
|
||||
bool hasColumnFiles(const NameAndTypePair & column) const override;
|
||||
|
||||
String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return DATA_FILE_NAME; }
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Storages/MergeTree/MergedBlockOutputStream.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPartWriterInMemory.h>
|
||||
#include <Storages/MergeTree/IMergeTreeReader.h>
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Poco/File.h>
|
||||
#include <Poco/Logger.h>
|
||||
|
@ -32,6 +32,7 @@ public:
|
||||
const MergeTreeReaderSettings & reader_settings_,
|
||||
const ValueSizeMap & avg_value_size_hints,
|
||||
const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override;
|
||||
|
||||
MergeTreeWriterPtr getWriter(
|
||||
const NamesAndTypesList & columns_list,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
@ -41,7 +42,7 @@ public:
|
||||
const MergeTreeIndexGranularity & computed_index_granularity) const override;
|
||||
|
||||
bool isStoredOnDisk() const override { return false; }
|
||||
bool hasColumnFiles(const String & column_name, const IDataType & /* type */) const override { return !!getColumnPosition(column_name); }
|
||||
bool hasColumnFiles(const NameAndTypePair & column) const override { return !!getColumnPosition(column.name); }
|
||||
String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; }
|
||||
void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) const override;
|
||||
void makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const override;
|
||||
|
@ -3,6 +3,8 @@
|
||||
#include <Storages/MergeTree/MergeTreeReaderWide.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPartWriterWide.h>
|
||||
#include <Storages/MergeTree/IMergeTreeDataPartWriter.h>
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -46,10 +48,13 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader(
|
||||
const ValueSizeMap & avg_value_size_hints,
|
||||
const ReadBufferFromFileBase::ProfileCallback & profile_callback) const
|
||||
{
|
||||
auto new_settings = reader_settings;
|
||||
new_settings.convert_nested_to_subcolumns = true;
|
||||
|
||||
auto ptr = std::static_pointer_cast<const MergeTreeDataPartWide>(shared_from_this());
|
||||
return std::make_unique<MergeTreeReaderWide>(
|
||||
ptr, columns_to_read, metadata_snapshot, uncompressed_cache,
|
||||
mark_cache, mark_ranges, reader_settings,
|
||||
mark_cache, mark_ranges, new_settings,
|
||||
avg_value_size_hints, profile_callback);
|
||||
}
|
||||
|
||||
@ -71,15 +76,15 @@ IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartWide::getWriter(
|
||||
/// Takes into account the fact that several columns can e.g. share their .size substreams.
|
||||
/// When calculating totals these should be counted only once.
|
||||
ColumnSize MergeTreeDataPartWide::getColumnSizeImpl(
|
||||
const String & column_name, const IDataType & type, std::unordered_set<String> * processed_substreams) const
|
||||
const NameAndTypePair & column, std::unordered_set<String> * processed_substreams) const
|
||||
{
|
||||
ColumnSize size;
|
||||
if (checksums.empty())
|
||||
return size;
|
||||
|
||||
type.enumerateStreams([&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
|
||||
column.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
|
||||
{
|
||||
String file_name = IDataType::getFileNameForStream(column_name, substream_path);
|
||||
String file_name = IDataType::getFileNameForStream(column, substream_path);
|
||||
|
||||
if (processed_substreams && !processed_substreams->insert(file_name).second)
|
||||
return;
|
||||
@ -157,7 +162,7 @@ void MergeTreeDataPartWide::checkConsistency(bool require_part_metadata) const
|
||||
IDataType::SubstreamPath stream_path;
|
||||
name_type.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
|
||||
{
|
||||
String file_name = IDataType::getFileNameForStream(name_type.name, substream_path);
|
||||
String file_name = IDataType::getFileNameForStream(name_type, substream_path);
|
||||
String mrk_file_name = file_name + index_granularity_info.marks_file_extension;
|
||||
String bin_file_name = file_name + ".bin";
|
||||
if (!checksums.files.count(mrk_file_name))
|
||||
@ -179,7 +184,7 @@ void MergeTreeDataPartWide::checkConsistency(bool require_part_metadata) const
|
||||
{
|
||||
name_type.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
|
||||
{
|
||||
auto file_path = path + IDataType::getFileNameForStream(name_type.name, substream_path) + index_granularity_info.marks_file_extension;
|
||||
auto file_path = path + IDataType::getFileNameForStream(name_type, substream_path) + index_granularity_info.marks_file_extension;
|
||||
|
||||
/// Missing file is Ok for case when new column was added.
|
||||
if (volume->getDisk()->exists(file_path))
|
||||
@ -201,13 +206,13 @@ void MergeTreeDataPartWide::checkConsistency(bool require_part_metadata) const
|
||||
}
|
||||
}
|
||||
|
||||
bool MergeTreeDataPartWide::hasColumnFiles(const String & column_name, const IDataType & type) const
|
||||
bool MergeTreeDataPartWide::hasColumnFiles(const NameAndTypePair & column) const
|
||||
{
|
||||
bool res = true;
|
||||
|
||||
type.enumerateStreams([&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
|
||||
column.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
|
||||
{
|
||||
String file_name = IDataType::getFileNameForStream(column_name, substream_path);
|
||||
String file_name = IDataType::getFileNameForStream(column, substream_path);
|
||||
|
||||
auto bin_checksum = checksums.files.find(file_name + ".bin");
|
||||
auto mrk_checksum = checksums.files.find(file_name + index_granularity_info.marks_file_extension);
|
||||
@ -225,7 +230,7 @@ String MergeTreeDataPartWide::getFileNameForColumn(const NameAndTypePair & colum
|
||||
column.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
|
||||
{
|
||||
if (filename.empty())
|
||||
filename = IDataType::getFileNameForStream(column.name, substream_path);
|
||||
filename = IDataType::getFileNameForStream(column, substream_path);
|
||||
});
|
||||
return filename;
|
||||
}
|
||||
@ -235,7 +240,7 @@ void MergeTreeDataPartWide::calculateEachColumnSizes(ColumnSizeByName & each_col
|
||||
std::unordered_set<String> processed_substreams;
|
||||
for (const NameAndTypePair & column : columns)
|
||||
{
|
||||
ColumnSize size = getColumnSizeImpl(column.name, *column.type, &processed_substreams);
|
||||
ColumnSize size = getColumnSizeImpl(column, &processed_substreams);
|
||||
each_columns_size[column.name] = size;
|
||||
total_size.add(size);
|
||||
|
||||
|
@ -54,7 +54,7 @@ public:
|
||||
|
||||
~MergeTreeDataPartWide() override;
|
||||
|
||||
bool hasColumnFiles(const String & column, const IDataType & type) const override;
|
||||
bool hasColumnFiles(const NameAndTypePair & column) const override;
|
||||
|
||||
private:
|
||||
void checkConsistency(bool require_part_metadata) const override;
|
||||
@ -62,7 +62,7 @@ private:
|
||||
/// Loads marks index granularity into memory
|
||||
void loadIndexGranularity() override;
|
||||
|
||||
ColumnSize getColumnSizeImpl(const String & name, const IDataType & type, std::unordered_set<String> * processed_substreams) const;
|
||||
ColumnSize getColumnSizeImpl(const NameAndTypePair & column, std::unordered_set<String> * processed_substreams) const;
|
||||
|
||||
void calculateEachColumnSizes(ColumnSizeByName & each_columns_size, ColumnSize & total_size) const override;
|
||||
};
|
||||
|
@ -34,14 +34,14 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact(
|
||||
{
|
||||
const auto & storage_columns = metadata_snapshot->getColumns();
|
||||
for (const auto & column : columns_list)
|
||||
addStreams(column.name, *column.type, storage_columns.getCodecDescOrDefault(column.name, default_codec));
|
||||
addStreams(column, storage_columns.getCodecDescOrDefault(column.name, default_codec));
|
||||
}
|
||||
|
||||
void MergeTreeDataPartWriterCompact::addStreams(const String & name, const IDataType & type, const ASTPtr & effective_codec_desc)
|
||||
void MergeTreeDataPartWriterCompact::addStreams(const NameAndTypePair & column, const ASTPtr & effective_codec_desc)
|
||||
{
|
||||
IDataType::StreamCallback callback = [&] (const IDataType::SubstreamPath & substream_path, const IDataType & substream_type)
|
||||
{
|
||||
String stream_name = IDataType::getFileNameForStream(name, substream_path);
|
||||
String stream_name = IDataType::getFileNameForStream(column, substream_path);
|
||||
|
||||
/// Shared offsets for Nested type.
|
||||
if (compressed_streams.count(stream_name))
|
||||
@ -64,7 +64,7 @@ void MergeTreeDataPartWriterCompact::addStreams(const String & name, const IData
|
||||
};
|
||||
|
||||
IDataType::SubstreamPath stream_path;
|
||||
type.enumerateStreams(callback, stream_path);
|
||||
column.type->enumerateStreams(callback, stream_path);
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -183,7 +183,7 @@ void MergeTreeDataPartWriterCompact::writeDataBlock(const Block & block, const G
|
||||
CompressedStreamPtr prev_stream;
|
||||
auto stream_getter = [&, this](const IDataType::SubstreamPath & substream_path) -> WriteBuffer *
|
||||
{
|
||||
String stream_name = IDataType::getFileNameForStream(name_and_type->name, substream_path);
|
||||
String stream_name = IDataType::getFileNameForStream(*name_and_type, substream_path);
|
||||
|
||||
auto & result_stream = compressed_streams[stream_name];
|
||||
/// Write one compressed block per column in granule for more optimal reading.
|
||||
|
@ -37,7 +37,7 @@ private:
|
||||
|
||||
void addToChecksums(MergeTreeDataPartChecksums & checksums);
|
||||
|
||||
void addStreams(const String & name, const IDataType & type, const ASTPtr & effective_codec_desc);
|
||||
void addStreams(const NameAndTypePair & column, const ASTPtr & effective_codec_desc);
|
||||
|
||||
Block header;
|
||||
|
||||
|
@ -80,17 +80,17 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide(
|
||||
{
|
||||
const auto & columns = metadata_snapshot->getColumns();
|
||||
for (const auto & it : columns_list)
|
||||
addStreams(it.name, *it.type, columns.getCodecDescOrDefault(it.name, default_codec));
|
||||
addStreams(it, columns.getCodecDescOrDefault(it.name, default_codec));
|
||||
}
|
||||
|
||||
|
||||
void MergeTreeDataPartWriterWide::addStreams(
|
||||
const String & name,
|
||||
const IDataType & type,
|
||||
const NameAndTypePair & column,
|
||||
const ASTPtr & effective_codec_desc)
|
||||
{
|
||||
IDataType::StreamCallback callback = [&] (const IDataType::SubstreamPath & substream_path, const IDataType & substream_type)
|
||||
{
|
||||
String stream_name = IDataType::getFileNameForStream(name, substream_path);
|
||||
String stream_name = IDataType::getFileNameForStream(column, substream_path);
|
||||
/// Shared offsets for Nested type.
|
||||
if (column_streams.count(stream_name))
|
||||
return;
|
||||
@ -112,18 +112,18 @@ void MergeTreeDataPartWriterWide::addStreams(
|
||||
};
|
||||
|
||||
IDataType::SubstreamPath stream_path;
|
||||
type.enumerateStreams(callback, stream_path);
|
||||
column.type->enumerateStreams(callback, stream_path);
|
||||
}
|
||||
|
||||
|
||||
IDataType::OutputStreamGetter MergeTreeDataPartWriterWide::createStreamGetter(
|
||||
const String & name, WrittenOffsetColumns & offset_columns) const
|
||||
const NameAndTypePair & column, WrittenOffsetColumns & offset_columns) const
|
||||
{
|
||||
return [&, this] (const IDataType::SubstreamPath & substream_path) -> WriteBuffer *
|
||||
{
|
||||
bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes;
|
||||
|
||||
String stream_name = IDataType::getFileNameForStream(name, substream_path);
|
||||
String stream_name = IDataType::getFileNameForStream(column, substream_path);
|
||||
|
||||
/// Don't write offsets more than one time for Nested type.
|
||||
if (is_offsets && offset_columns.count(stream_name))
|
||||
@ -210,23 +210,23 @@ void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Perm
|
||||
if (primary_key_block.has(it->name))
|
||||
{
|
||||
const auto & primary_column = *primary_key_block.getByName(it->name).column;
|
||||
writeColumn(column.name, *column.type, primary_column, offset_columns, granules_to_write);
|
||||
writeColumn(*it, primary_column, offset_columns, granules_to_write);
|
||||
}
|
||||
else if (skip_indexes_block.has(it->name))
|
||||
{
|
||||
const auto & index_column = *skip_indexes_block.getByName(it->name).column;
|
||||
writeColumn(column.name, *column.type, index_column, offset_columns, granules_to_write);
|
||||
writeColumn(*it, index_column, offset_columns, granules_to_write);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// We rearrange the columns that are not included in the primary key here; Then the result is released - to save RAM.
|
||||
ColumnPtr permuted_column = column.column->permute(*permutation, 0);
|
||||
writeColumn(column.name, *column.type, *permuted_column, offset_columns, granules_to_write);
|
||||
writeColumn(*it, *permuted_column, offset_columns, granules_to_write);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
writeColumn(column.name, *column.type, *column.column, offset_columns, granules_to_write);
|
||||
writeColumn(*it, *column.column, offset_columns, granules_to_write);
|
||||
}
|
||||
}
|
||||
|
||||
@ -239,13 +239,12 @@ void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Perm
|
||||
}
|
||||
|
||||
void MergeTreeDataPartWriterWide::writeSingleMark(
|
||||
const String & name,
|
||||
const IDataType & type,
|
||||
const NameAndTypePair & column,
|
||||
WrittenOffsetColumns & offset_columns,
|
||||
size_t number_of_rows,
|
||||
DB::IDataType::SubstreamPath & path)
|
||||
{
|
||||
StreamsWithMarks marks = getCurrentMarksForColumn(name, type, offset_columns, path);
|
||||
StreamsWithMarks marks = getCurrentMarksForColumn(column, offset_columns, path);
|
||||
for (const auto & mark : marks)
|
||||
flushMarkToFile(mark, number_of_rows);
|
||||
}
|
||||
@ -260,17 +259,16 @@ void MergeTreeDataPartWriterWide::flushMarkToFile(const StreamNameAndMark & stre
|
||||
}
|
||||
|
||||
StreamsWithMarks MergeTreeDataPartWriterWide::getCurrentMarksForColumn(
|
||||
const String & name,
|
||||
const IDataType & type,
|
||||
const NameAndTypePair & column,
|
||||
WrittenOffsetColumns & offset_columns,
|
||||
DB::IDataType::SubstreamPath & path)
|
||||
{
|
||||
StreamsWithMarks result;
|
||||
type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
|
||||
column.type->enumerateStreams([&] (const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
|
||||
{
|
||||
bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes;
|
||||
|
||||
String stream_name = IDataType::getFileNameForStream(name, substream_path);
|
||||
String stream_name = IDataType::getFileNameForStream(column, substream_path);
|
||||
|
||||
/// Don't write offsets more than one time for Nested type.
|
||||
if (is_offsets && offset_columns.count(stream_name))
|
||||
@ -294,22 +292,21 @@ StreamsWithMarks MergeTreeDataPartWriterWide::getCurrentMarksForColumn(
|
||||
}
|
||||
|
||||
void MergeTreeDataPartWriterWide::writeSingleGranule(
|
||||
const String & name,
|
||||
const IDataType & type,
|
||||
const NameAndTypePair & name_and_type,
|
||||
const IColumn & column,
|
||||
WrittenOffsetColumns & offset_columns,
|
||||
IDataType::SerializeBinaryBulkStatePtr & serialization_state,
|
||||
IDataType::SerializeBinaryBulkSettings & serialize_settings,
|
||||
const Granule & granule)
|
||||
{
|
||||
type.serializeBinaryBulkWithMultipleStreams(column, granule.start_row, granule.rows_to_write, serialize_settings, serialization_state);
|
||||
name_and_type.type->serializeBinaryBulkWithMultipleStreams(column, granule.start_row, granule.rows_to_write, serialize_settings, serialization_state);
|
||||
|
||||
/// So that instead of the marks pointing to the end of the compressed block, there were marks pointing to the beginning of the next one.
|
||||
type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
|
||||
name_and_type.type->enumerateStreams([&] (const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
|
||||
{
|
||||
bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes;
|
||||
|
||||
String stream_name = IDataType::getFileNameForStream(name, substream_path);
|
||||
String stream_name = IDataType::getFileNameForStream(name_and_type, substream_path);
|
||||
|
||||
/// Don't write offsets more than one time for Nested type.
|
||||
if (is_offsets && offset_columns.count(stream_name))
|
||||
@ -321,27 +318,27 @@ void MergeTreeDataPartWriterWide::writeSingleGranule(
|
||||
|
||||
/// Column must not be empty. (column.size() !== 0)
|
||||
void MergeTreeDataPartWriterWide::writeColumn(
|
||||
const String & name,
|
||||
const IDataType & type,
|
||||
const NameAndTypePair & name_and_type,
|
||||
const IColumn & column,
|
||||
WrittenOffsetColumns & offset_columns,
|
||||
const Granules & granules)
|
||||
{
|
||||
if (granules.empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty granules for column {}, current mark {}", backQuoteIfNeed(name), getCurrentMark());
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty granules for column {}, current mark {}", backQuoteIfNeed(name_and_type.name), getCurrentMark());
|
||||
|
||||
const auto & [name, type] = name_and_type;
|
||||
auto [it, inserted] = serialization_states.emplace(name, nullptr);
|
||||
|
||||
if (inserted)
|
||||
{
|
||||
IDataType::SerializeBinaryBulkSettings serialize_settings;
|
||||
serialize_settings.getter = createStreamGetter(name, offset_columns);
|
||||
type.serializeBinaryBulkStatePrefix(serialize_settings, it->second);
|
||||
serialize_settings.getter = createStreamGetter(name_and_type, offset_columns);
|
||||
type->serializeBinaryBulkStatePrefix(serialize_settings, it->second);
|
||||
}
|
||||
|
||||
const auto & global_settings = storage.global_context.getSettingsRef();
|
||||
IDataType::SerializeBinaryBulkSettings serialize_settings;
|
||||
serialize_settings.getter = createStreamGetter(name, offset_columns);
|
||||
serialize_settings.getter = createStreamGetter(name_and_type, offset_columns);
|
||||
serialize_settings.low_cardinality_max_dictionary_size = global_settings.low_cardinality_max_dictionary_size;
|
||||
serialize_settings.low_cardinality_use_single_dictionary_for_part = global_settings.low_cardinality_use_single_dictionary_for_part != 0;
|
||||
|
||||
@ -353,12 +350,11 @@ void MergeTreeDataPartWriterWide::writeColumn(
|
||||
{
|
||||
if (last_non_written_marks.count(name))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "We have to add new mark for column, but already have non written mark. Current mark {}, total marks {}, offset {}", getCurrentMark(), index_granularity.getMarksCount(), rows_written_in_last_mark);
|
||||
last_non_written_marks[name] = getCurrentMarksForColumn(name, type, offset_columns, serialize_settings.path);
|
||||
last_non_written_marks[name] = getCurrentMarksForColumn(name_and_type, offset_columns, serialize_settings.path);
|
||||
}
|
||||
|
||||
writeSingleGranule(
|
||||
name,
|
||||
type,
|
||||
name_and_type,
|
||||
column,
|
||||
offset_columns,
|
||||
it->second,
|
||||
@ -378,12 +374,12 @@ void MergeTreeDataPartWriterWide::writeColumn(
|
||||
}
|
||||
}
|
||||
|
||||
type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
|
||||
name_and_type.type->enumerateStreams([&] (const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
|
||||
{
|
||||
bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes;
|
||||
if (is_offsets)
|
||||
{
|
||||
String stream_name = IDataType::getFileNameForStream(name, substream_path);
|
||||
String stream_name = IDataType::getFileNameForStream(name_and_type, substream_path);
|
||||
offset_columns.insert(stream_name);
|
||||
}
|
||||
}, serialize_settings.path);
|
||||
@ -526,12 +522,12 @@ void MergeTreeDataPartWriterWide::finishDataSerialization(IMergeTreeDataPart::Ch
|
||||
{
|
||||
if (!serialization_states.empty())
|
||||
{
|
||||
serialize_settings.getter = createStreamGetter(it->name, written_offset_columns ? *written_offset_columns : offset_columns);
|
||||
serialize_settings.getter = createStreamGetter(*it, written_offset_columns ? *written_offset_columns : offset_columns);
|
||||
it->type->serializeBinaryBulkStateSuffix(serialize_settings, serialization_states[it->name]);
|
||||
}
|
||||
|
||||
if (write_final_mark)
|
||||
writeFinalMark(it->name, it->type, offset_columns, serialize_settings.path);
|
||||
writeFinalMark(*it, offset_columns, serialize_settings.path);
|
||||
}
|
||||
}
|
||||
for (auto & stream : column_streams)
|
||||
@ -567,19 +563,18 @@ void MergeTreeDataPartWriterWide::finish(IMergeTreeDataPart::Checksums & checksu
|
||||
}
|
||||
|
||||
void MergeTreeDataPartWriterWide::writeFinalMark(
|
||||
const std::string & column_name,
|
||||
const DataTypePtr column_type,
|
||||
const NameAndTypePair & column,
|
||||
WrittenOffsetColumns & offset_columns,
|
||||
DB::IDataType::SubstreamPath & path)
|
||||
{
|
||||
writeSingleMark(column_name, *column_type, offset_columns, 0, path);
|
||||
writeSingleMark(column, offset_columns, 0, path);
|
||||
/// Memoize information about offsets
|
||||
column_type->enumerateStreams([&] (const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
|
||||
column.type->enumerateStreams([&] (const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
|
||||
{
|
||||
bool is_offsets = !substream_path.empty() && substream_path.back().type == IDataType::Substream::ArraySizes;
|
||||
if (is_offsets)
|
||||
{
|
||||
String stream_name = IDataType::getFileNameForStream(column_name, substream_path);
|
||||
String stream_name = IDataType::getFileNameForStream(column, substream_path);
|
||||
offset_columns.insert(stream_name);
|
||||
}
|
||||
}, path);
|
||||
|
@ -40,16 +40,14 @@ private:
|
||||
/// Return how many marks were written and
|
||||
/// how many rows were written for last mark
|
||||
void writeColumn(
|
||||
const String & name,
|
||||
const IDataType & type,
|
||||
const NameAndTypePair & name_and_type,
|
||||
const IColumn & column,
|
||||
WrittenOffsetColumns & offset_columns,
|
||||
const Granules & granules);
|
||||
|
||||
/// Write single granule of one column.
|
||||
void writeSingleGranule(
|
||||
const String & name,
|
||||
const IDataType & type,
|
||||
const NameAndTypePair & name_and_type,
|
||||
const IColumn & column,
|
||||
WrittenOffsetColumns & offset_columns,
|
||||
IDataType::SerializeBinaryBulkStatePtr & serialization_state,
|
||||
@ -58,8 +56,7 @@ private:
|
||||
|
||||
/// Take offsets from column and return as MarkInCompressed file with stream name
|
||||
StreamsWithMarks getCurrentMarksForColumn(
|
||||
const String & name,
|
||||
const IDataType & type,
|
||||
const NameAndTypePair & column,
|
||||
WrittenOffsetColumns & offset_columns,
|
||||
DB::IDataType::SubstreamPath & path);
|
||||
|
||||
@ -70,21 +67,18 @@ private:
|
||||
|
||||
/// Write mark for column taking offsets from column stream
|
||||
void writeSingleMark(
|
||||
const String & name,
|
||||
const IDataType & type,
|
||||
const NameAndTypePair & column,
|
||||
WrittenOffsetColumns & offset_columns,
|
||||
size_t number_of_rows,
|
||||
DB::IDataType::SubstreamPath & path);
|
||||
|
||||
void writeFinalMark(
|
||||
const std::string & column_name,
|
||||
const DataTypePtr column_type,
|
||||
const NameAndTypePair & column,
|
||||
WrittenOffsetColumns & offset_columns,
|
||||
DB::IDataType::SubstreamPath & path);
|
||||
|
||||
void addStreams(
|
||||
const String & name,
|
||||
const IDataType & type,
|
||||
const NameAndTypePair & column,
|
||||
const ASTPtr & effective_codec_desc);
|
||||
|
||||
/// Method for self check (used in debug-build only). Checks that written
|
||||
@ -106,7 +100,7 @@ private:
|
||||
/// Also useful to have exact amount of rows in last (non-final) mark.
|
||||
void adjustLastMarkIfNeedAndFlushToDisk(size_t new_rows_in_last_mark);
|
||||
|
||||
IDataType::OutputStreamGetter createStreamGetter(const String & name, WrittenOffsetColumns & offset_columns) const;
|
||||
IDataType::OutputStreamGetter createStreamGetter(const NameAndTypePair & column, WrittenOffsetColumns & offset_columns) const;
|
||||
|
||||
using SerializationState = IDataType::SerializeBinaryBulkStatePtr;
|
||||
using SerializationStates = std::unordered_map<String, SerializationState>;
|
||||
|
@ -14,6 +14,8 @@ struct MergeTreeReaderSettings
|
||||
/// If save_marks_in_cache is false, then, if marks are not in cache,
|
||||
/// we will load them but won't save in the cache, to avoid evicting other data.
|
||||
bool save_marks_in_cache = false;
|
||||
/// Convert old-style nested (single arrays with same prefix, `n.a`, `n.b`...) to subcolumns of data type Nested.
|
||||
bool convert_nested_to_subcolumns = false;
|
||||
};
|
||||
|
||||
struct MergeTreeWriterSettings
|
||||
|
@ -93,7 +93,7 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr)
|
||||
{
|
||||
const auto & column = index_sample_block.getByPosition(i);
|
||||
const auto & type = column.type;
|
||||
auto new_column = type->createColumn();
|
||||
ColumnPtr new_column = type->createColumn();
|
||||
|
||||
IDataType::DeserializeBinaryBulkSettings settings;
|
||||
settings.getter = [&](IDataType::SubstreamPath) -> ReadBuffer * { return &istr; };
|
||||
@ -101,9 +101,9 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr)
|
||||
|
||||
IDataType::DeserializeBinaryBulkStatePtr state;
|
||||
type->deserializeBinaryBulkStatePrefix(settings, state);
|
||||
type->deserializeBinaryBulkWithMultipleStreams(*new_column, rows_to_read, settings, state);
|
||||
type->deserializeBinaryBulkWithMultipleStreams(new_column, rows_to_read, settings, state);
|
||||
|
||||
block.insert(ColumnWithTypeAndName(new_column->getPtr(), type, column.name));
|
||||
block.insert(ColumnWithTypeAndName(new_column, type, column.name));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -53,14 +53,14 @@ MergeTreeReaderCompact::MergeTreeReaderCompact(
|
||||
auto name_and_type = columns.begin();
|
||||
for (size_t i = 0; i < columns_num; ++i, ++name_and_type)
|
||||
{
|
||||
const auto & [name, type] = getColumnFromPart(*name_and_type);
|
||||
auto position = data_part->getColumnPosition(name);
|
||||
auto column_from_part = getColumnFromPart(*name_and_type);
|
||||
|
||||
if (!position && typeid_cast<const DataTypeArray *>(type.get()))
|
||||
auto position = data_part->getColumnPosition(column_from_part.name);
|
||||
if (!position && typeid_cast<const DataTypeArray *>(column_from_part.type.get()))
|
||||
{
|
||||
/// If array of Nested column is missing in part,
|
||||
/// we have to read its offsets if they exist.
|
||||
position = findColumnForOffsets(name);
|
||||
position = findColumnForOffsets(column_from_part.name);
|
||||
read_only_offsets[i] = (position != std::nullopt);
|
||||
}
|
||||
|
||||
@ -133,10 +133,8 @@ size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading,
|
||||
if (!column_positions[i])
|
||||
continue;
|
||||
|
||||
bool append = res_columns[i] != nullptr;
|
||||
if (!append)
|
||||
if (res_columns[i] == nullptr)
|
||||
res_columns[i] = getColumnFromPart(*column_it).type->createColumn();
|
||||
mutable_columns[i] = res_columns[i]->assumeMutable();
|
||||
}
|
||||
|
||||
while (read_rows < max_rows_to_read)
|
||||
@ -146,20 +144,18 @@ size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading,
|
||||
auto name_and_type = columns.begin();
|
||||
for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type)
|
||||
{
|
||||
auto column_from_part = getColumnFromPart(*name_and_type);
|
||||
if (!res_columns[pos])
|
||||
continue;
|
||||
|
||||
auto [name, type] = getColumnFromPart(*name_and_type);
|
||||
auto & column = mutable_columns[pos];
|
||||
|
||||
try
|
||||
{
|
||||
auto & column = res_columns[pos];
|
||||
size_t column_size_before_reading = column->size();
|
||||
|
||||
readData(name, *column, *type, from_mark, *column_positions[pos], rows_to_read, read_only_offsets[pos]);
|
||||
readData(column_from_part, column, from_mark, *column_positions[pos], rows_to_read, read_only_offsets[pos]);
|
||||
|
||||
size_t read_rows_in_column = column->size() - column_size_before_reading;
|
||||
|
||||
if (read_rows_in_column < rows_to_read)
|
||||
throw Exception("Cannot read all data in MergeTreeReaderCompact. Rows read: " + toString(read_rows_in_column) +
|
||||
". Rows expected: " + toString(rows_to_read) + ".", ErrorCodes::CANNOT_READ_ALL_DATA);
|
||||
@ -170,7 +166,7 @@ size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading,
|
||||
storage.reportBrokenPart(data_part->name);
|
||||
|
||||
/// Better diagnostics.
|
||||
e.addMessage("(while reading column " + name + ")");
|
||||
e.addMessage("(while reading column " + column_from_part.name + ")");
|
||||
throw;
|
||||
}
|
||||
catch (...)
|
||||
@ -184,24 +180,17 @@ size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading,
|
||||
read_rows += rows_to_read;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < num_columns; ++i)
|
||||
{
|
||||
auto & column = mutable_columns[i];
|
||||
if (column && !column->empty())
|
||||
res_columns[i] = std::move(column);
|
||||
else
|
||||
res_columns[i] = nullptr;
|
||||
}
|
||||
|
||||
next_mark = from_mark;
|
||||
|
||||
return read_rows;
|
||||
}
|
||||
|
||||
void MergeTreeReaderCompact::readData(
|
||||
const String & name, IColumn & column, const IDataType & type,
|
||||
const NameAndTypePair & name_and_type, ColumnPtr & column,
|
||||
size_t from_mark, size_t column_position, size_t rows_to_read, bool only_offsets)
|
||||
{
|
||||
const auto & [name, type] = name_and_type;
|
||||
|
||||
if (!isContinuousReading(from_mark, column_position))
|
||||
seekToMark(from_mark, column_position);
|
||||
|
||||
@ -213,14 +202,25 @@ void MergeTreeReaderCompact::readData(
|
||||
return data_buffer;
|
||||
};
|
||||
|
||||
IDataType::DeserializeBinaryBulkStatePtr state;
|
||||
IDataType::DeserializeBinaryBulkSettings deserialize_settings;
|
||||
deserialize_settings.getter = buffer_getter;
|
||||
deserialize_settings.avg_value_size_hint = avg_value_size_hints[name];
|
||||
deserialize_settings.position_independent_encoding = true;
|
||||
|
||||
IDataType::DeserializeBinaryBulkStatePtr state;
|
||||
type.deserializeBinaryBulkStatePrefix(deserialize_settings, state);
|
||||
type.deserializeBinaryBulkWithMultipleStreams(column, rows_to_read, deserialize_settings, state);
|
||||
if (name_and_type.isSubcolumn())
|
||||
{
|
||||
auto type_in_storage = name_and_type.getTypeInStorage();
|
||||
ColumnPtr temp_column = type_in_storage->createColumn();
|
||||
|
||||
type_in_storage->deserializeBinaryBulkStatePrefix(deserialize_settings, state);
|
||||
type_in_storage->deserializeBinaryBulkWithMultipleStreams(temp_column, rows_to_read, deserialize_settings, state);
|
||||
column = type_in_storage->getSubcolumn(name_and_type.getSubcolumnName(), *temp_column);
|
||||
}
|
||||
else
|
||||
{
|
||||
type->deserializeBinaryBulkStatePrefix(deserialize_settings, state);
|
||||
type->deserializeBinaryBulkWithMultipleStreams(column, rows_to_read, deserialize_settings, state);
|
||||
}
|
||||
|
||||
/// The buffer is left in inconsistent state after reading single offsets
|
||||
if (only_offsets)
|
||||
|
@ -56,8 +56,8 @@ private:
|
||||
|
||||
void seekToMark(size_t row_index, size_t column_index);
|
||||
|
||||
void readData(const String & name, IColumn & column, const IDataType & type,
|
||||
size_t from_mark, size_t column_position, size_t rows_to_read, bool only_offsets = false);
|
||||
void readData(const NameAndTypePair & name_and_type, ColumnPtr & column, size_t from_mark,
|
||||
size_t column_position, size_t rows_to_read, bool only_offsets);
|
||||
|
||||
/// Returns maximal value of granule size in compressed file from @mark_ranges.
|
||||
/// This value is used as size of read buffer.
|
||||
|
@ -12,6 +12,7 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_READ_ALL_DATA;
|
||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
@ -38,6 +39,19 @@ MergeTreeReaderInMemory::MergeTreeReaderInMemory(
|
||||
}
|
||||
}
|
||||
|
||||
static ColumnPtr getColumnFromBlock(const Block & block, const NameAndTypePair & name_and_type)
|
||||
{
|
||||
auto storage_name = name_and_type.getNameInStorage();
|
||||
if (!block.has(storage_name))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Not found column '{}' in block", storage_name);
|
||||
|
||||
const auto & column = block.getByName(storage_name).column;
|
||||
if (name_and_type.isSubcolumn())
|
||||
return name_and_type.getTypeInStorage()->getSubcolumn(name_and_type.getSubcolumnName(), *column);
|
||||
|
||||
return column;
|
||||
}
|
||||
|
||||
size_t MergeTreeReaderInMemory::readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns)
|
||||
{
|
||||
if (!continue_reading)
|
||||
@ -60,17 +74,17 @@ size_t MergeTreeReaderInMemory::readRows(size_t from_mark, bool continue_reading
|
||||
auto column_it = columns.begin();
|
||||
for (size_t i = 0; i < num_columns; ++i, ++column_it)
|
||||
{
|
||||
auto [name, type] = getColumnFromPart(*column_it);
|
||||
auto name_type = getColumnFromPart(*column_it);
|
||||
|
||||
/// Copy offsets, if array of Nested column is missing in part.
|
||||
auto offsets_it = positions_for_offsets.find(name);
|
||||
if (offsets_it != positions_for_offsets.end())
|
||||
auto offsets_it = positions_for_offsets.find(name_type.name);
|
||||
if (offsets_it != positions_for_offsets.end() && !name_type.isSubcolumn())
|
||||
{
|
||||
const auto & source_offsets = assert_cast<const ColumnArray &>(
|
||||
*part_in_memory->block.getByPosition(offsets_it->second).column).getOffsets();
|
||||
|
||||
if (res_columns[i] == nullptr)
|
||||
res_columns[i] = type->createColumn();
|
||||
res_columns[i] = name_type.type->createColumn();
|
||||
|
||||
auto mutable_column = res_columns[i]->assumeMutable();
|
||||
auto & res_offstes = assert_cast<ColumnArray &>(*mutable_column).getOffsets();
|
||||
@ -80,9 +94,9 @@ size_t MergeTreeReaderInMemory::readRows(size_t from_mark, bool continue_reading
|
||||
|
||||
res_columns[i] = std::move(mutable_column);
|
||||
}
|
||||
else if (part_in_memory->block.has(name))
|
||||
else if (part_in_memory->hasColumnFiles(name_type))
|
||||
{
|
||||
const auto & block_column = part_in_memory->block.getByName(name).column;
|
||||
auto block_column = getColumnFromBlock(part_in_memory->block, name_type);
|
||||
if (rows_to_read == part_rows)
|
||||
{
|
||||
res_columns[i] = block_column;
|
||||
@ -90,7 +104,7 @@ size_t MergeTreeReaderInMemory::readRows(size_t from_mark, bool continue_reading
|
||||
else
|
||||
{
|
||||
if (res_columns[i] == nullptr)
|
||||
res_columns[i] = type->createColumn();
|
||||
res_columns[i] = name_type.type->createColumn();
|
||||
|
||||
auto mutable_column = res_columns[i]->assumeMutable();
|
||||
mutable_column->insertRangeFrom(*block_column, total_rows_read, rows_to_read);
|
||||
|
@ -9,7 +9,6 @@
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -50,7 +49,7 @@ MergeTreeReaderWide::MergeTreeReaderWide(
|
||||
for (const NameAndTypePair & column : columns)
|
||||
{
|
||||
auto column_from_part = getColumnFromPart(column);
|
||||
addStreams(column_from_part.name, *column_from_part.type, profile_callback_, clock_type_);
|
||||
addStreams(column_from_part, profile_callback_, clock_type_);
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
@ -73,48 +72,26 @@ size_t MergeTreeReaderWide::readRows(size_t from_mark, bool continue_reading, si
|
||||
/// If append is true, then the value will be equal to nullptr and will be used only to
|
||||
/// check that the offsets column has been already read.
|
||||
OffsetColumns offset_columns;
|
||||
std::unordered_map<String, IDataType::SubstreamsCache> caches;
|
||||
|
||||
auto name_and_type = columns.begin();
|
||||
for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type)
|
||||
{
|
||||
auto [name, type] = getColumnFromPart(*name_and_type);
|
||||
auto column_from_part = getColumnFromPart(*name_and_type);
|
||||
const auto & [name, type] = column_from_part;
|
||||
|
||||
/// The column is already present in the block so we will append the values to the end.
|
||||
bool append = res_columns[pos] != nullptr;
|
||||
if (!append)
|
||||
res_columns[pos] = type->createColumn();
|
||||
|
||||
/// To keep offsets shared. TODO Very dangerous. Get rid of this.
|
||||
MutableColumnPtr column = res_columns[pos]->assumeMutable();
|
||||
|
||||
bool read_offsets = true;
|
||||
|
||||
/// For nested data structures collect pointers to offset columns.
|
||||
if (const auto * type_arr = typeid_cast<const DataTypeArray *>(type.get()))
|
||||
{
|
||||
String table_name = Nested::extractTableName(name);
|
||||
|
||||
auto it_inserted = offset_columns.emplace(table_name, nullptr);
|
||||
|
||||
/// offsets have already been read on the previous iteration and we don't need to read it again
|
||||
if (!it_inserted.second)
|
||||
read_offsets = false;
|
||||
|
||||
/// need to create new offsets
|
||||
if (it_inserted.second && !append)
|
||||
it_inserted.first->second = ColumnArray::ColumnOffsets::create();
|
||||
|
||||
/// share offsets in all elements of nested structure
|
||||
if (!append)
|
||||
column = ColumnArray::create(type_arr->getNestedType()->createColumn(),
|
||||
it_inserted.first->second)->assumeMutable();
|
||||
}
|
||||
|
||||
auto & column = res_columns[pos];
|
||||
try
|
||||
{
|
||||
size_t column_size_before_reading = column->size();
|
||||
auto & cache = caches[column_from_part.getNameInStorage()];
|
||||
|
||||
readData(name, *type, *column, from_mark, continue_reading, max_rows_to_read, read_offsets);
|
||||
readData(column_from_part, column, from_mark, continue_reading, max_rows_to_read, cache);
|
||||
|
||||
/// For elements of Nested, column_size_before_reading may be greater than column size
|
||||
/// if offsets are not empty and were already read, but elements are empty.
|
||||
@ -130,8 +107,6 @@ size_t MergeTreeReaderWide::readRows(size_t from_mark, bool continue_reading, si
|
||||
|
||||
if (column->empty())
|
||||
res_columns[pos] = nullptr;
|
||||
else
|
||||
res_columns[pos] = std::move(column);
|
||||
}
|
||||
|
||||
/// NOTE: positions for all streams must be kept in sync.
|
||||
@ -159,12 +134,12 @@ size_t MergeTreeReaderWide::readRows(size_t from_mark, bool continue_reading, si
|
||||
return read_rows;
|
||||
}
|
||||
|
||||
void MergeTreeReaderWide::addStreams(const String & name, const IDataType & type,
|
||||
void MergeTreeReaderWide::addStreams(const NameAndTypePair & name_and_type,
|
||||
const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type)
|
||||
{
|
||||
IDataType::StreamCallback callback = [&] (const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
|
||||
{
|
||||
String stream_name = IDataType::getFileNameForStream(name, substream_path);
|
||||
String stream_name = IDataType::getFileNameForStream(name_and_type, substream_path);
|
||||
|
||||
if (streams.count(stream_name))
|
||||
return;
|
||||
@ -186,24 +161,24 @@ void MergeTreeReaderWide::addStreams(const String & name, const IDataType & type
|
||||
};
|
||||
|
||||
IDataType::SubstreamPath substream_path;
|
||||
type.enumerateStreams(callback, substream_path);
|
||||
name_and_type.type->enumerateStreams(callback, substream_path);
|
||||
}
|
||||
|
||||
|
||||
void MergeTreeReaderWide::readData(
|
||||
const String & name, const IDataType & type, IColumn & column,
|
||||
const NameAndTypePair & name_and_type, ColumnPtr & column,
|
||||
size_t from_mark, bool continue_reading, size_t max_rows_to_read,
|
||||
bool with_offsets)
|
||||
IDataType::SubstreamsCache & cache)
|
||||
{
|
||||
auto get_stream_getter = [&](bool stream_for_prefix) -> IDataType::InputStreamGetter
|
||||
{
|
||||
return [&, stream_for_prefix](const IDataType::SubstreamPath & substream_path) -> ReadBuffer *
|
||||
{
|
||||
/// If offsets for arrays have already been read.
|
||||
if (!with_offsets && substream_path.size() == 1 && substream_path[0].type == IDataType::Substream::ArraySizes)
|
||||
/// If substream have already been read.
|
||||
if (cache.count(IDataType::getSubcolumnNameForStream(substream_path)))
|
||||
return nullptr;
|
||||
|
||||
String stream_name = IDataType::getFileNameForStream(name, substream_path);
|
||||
String stream_name = IDataType::getFileNameForStream(name_and_type, substream_path);
|
||||
|
||||
auto it = streams.find(stream_name);
|
||||
if (it == streams.end())
|
||||
@ -223,21 +198,21 @@ void MergeTreeReaderWide::readData(
|
||||
};
|
||||
};
|
||||
|
||||
double & avg_value_size_hint = avg_value_size_hints[name];
|
||||
double & avg_value_size_hint = avg_value_size_hints[name_and_type.name];
|
||||
IDataType::DeserializeBinaryBulkSettings deserialize_settings;
|
||||
deserialize_settings.avg_value_size_hint = avg_value_size_hint;
|
||||
|
||||
if (deserialize_binary_bulk_state_map.count(name) == 0)
|
||||
if (deserialize_binary_bulk_state_map.count(name_and_type.name) == 0)
|
||||
{
|
||||
deserialize_settings.getter = get_stream_getter(true);
|
||||
type.deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name]);
|
||||
name_and_type.type->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name_and_type.name]);
|
||||
}
|
||||
|
||||
deserialize_settings.getter = get_stream_getter(false);
|
||||
deserialize_settings.continuous_reading = continue_reading;
|
||||
auto & deserialize_state = deserialize_binary_bulk_state_map[name];
|
||||
type.deserializeBinaryBulkWithMultipleStreams(column, max_rows_to_read, deserialize_settings, deserialize_state);
|
||||
IDataType::updateAvgValueSizeHint(column, avg_value_size_hint);
|
||||
auto & deserialize_state = deserialize_binary_bulk_state_map[name_and_type.name];
|
||||
name_and_type.type->deserializeBinaryBulkWithMultipleStreams(column, max_rows_to_read, deserialize_settings, deserialize_state, &cache);
|
||||
IDataType::updateAvgValueSizeHint(*column, avg_value_size_hint);
|
||||
}
|
||||
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user