Merge branch 'master' into network-instrumentation

This commit is contained in:
mergify[bot] 2021-07-06 00:54:34 +00:00 committed by GitHub
commit 720660e9e9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
296 changed files with 11313 additions and 36496 deletions

3
.gitmodules vendored
View File

@ -168,9 +168,6 @@
[submodule "contrib/fmtlib"]
path = contrib/fmtlib
url = https://github.com/fmtlib/fmt.git
[submodule "contrib/antlr4-runtime"]
path = contrib/antlr4-runtime
url = https://github.com/ClickHouse-Extras/antlr4-runtime.git
[submodule "contrib/sentry-native"]
path = contrib/sentry-native
url = https://github.com/ClickHouse-Extras/sentry-native.git

View File

@ -13,6 +13,3 @@ ClickHouse® is an open-source column-oriented database management system that a
* [Code Browser](https://clickhouse.tech/codebrowser/html_report/ClickHouse/index.html) with syntax highlight and navigation.
* [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
* You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.
## Upcoming Events
* [China ClickHouse Community Meetup (online)](http://hdxu.cn/rhbfZ) on 26 June 2021.

View File

@ -0,0 +1,41 @@
#include <functional>
/** Adapt functor to static method where functor passed as context.
* Main use case to convert lambda into function that can be passed into JIT code.
*/
template <typename Functor>
class FunctorToStaticMethodAdaptor : public FunctorToStaticMethodAdaptor<decltype(&Functor::operator())>
{
};
template <typename R, typename C, typename ...Args>
class FunctorToStaticMethodAdaptor<R (C::*)(Args...) const>
{
public:
static R call(C * ptr, Args &&... arguments)
{
return std::invoke(&C::operator(), ptr, std::forward<Args>(arguments)...);
}
static R unsafeCall(char * ptr, Args &&... arguments)
{
C * ptr_typed = reinterpret_cast<C*>(ptr);
return std::invoke(&C::operator(), ptr_typed, std::forward<Args>(arguments)...);
}
};
template <typename R, typename C, typename ...Args>
class FunctorToStaticMethodAdaptor<R (C::*)(Args...)>
{
public:
static R call(C * ptr, Args &&... arguments)
{
return std::invoke(&C::operator(), ptr, std::forward<Args>(arguments)...);
}
static R unsafeCall(char * ptr, Args &&... arguments)
{
C * ptr_typed = static_cast<C*>(ptr);
return std::invoke(&C::operator(), ptr_typed, std::forward<Args>(arguments)...);
}
};

View File

@ -34,7 +34,6 @@ endif()
set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL 1)
add_subdirectory (abseil-cpp-cmake)
add_subdirectory (antlr4-runtime-cmake)
add_subdirectory (boost-cmake)
add_subdirectory (cctz-cmake)
add_subdirectory (consistent-hashing)

@ -1 +0,0 @@
Subproject commit 672643e9a427ef803abf13bc8cb4989606553d64

View File

@ -1,156 +0,0 @@
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/antlr4-runtime")
set (SRCS
"${LIBRARY_DIR}/ANTLRErrorListener.cpp"
"${LIBRARY_DIR}/ANTLRErrorStrategy.cpp"
"${LIBRARY_DIR}/ANTLRFileStream.cpp"
"${LIBRARY_DIR}/ANTLRInputStream.cpp"
"${LIBRARY_DIR}/atn/AbstractPredicateTransition.cpp"
"${LIBRARY_DIR}/atn/ActionTransition.cpp"
"${LIBRARY_DIR}/atn/AmbiguityInfo.cpp"
"${LIBRARY_DIR}/atn/ArrayPredictionContext.cpp"
"${LIBRARY_DIR}/atn/ATN.cpp"
"${LIBRARY_DIR}/atn/ATNConfig.cpp"
"${LIBRARY_DIR}/atn/ATNConfigSet.cpp"
"${LIBRARY_DIR}/atn/ATNDeserializationOptions.cpp"
"${LIBRARY_DIR}/atn/ATNDeserializer.cpp"
"${LIBRARY_DIR}/atn/ATNSerializer.cpp"
"${LIBRARY_DIR}/atn/ATNSimulator.cpp"
"${LIBRARY_DIR}/atn/ATNState.cpp"
"${LIBRARY_DIR}/atn/AtomTransition.cpp"
"${LIBRARY_DIR}/atn/BasicBlockStartState.cpp"
"${LIBRARY_DIR}/atn/BasicState.cpp"
"${LIBRARY_DIR}/atn/BlockEndState.cpp"
"${LIBRARY_DIR}/atn/BlockStartState.cpp"
"${LIBRARY_DIR}/atn/ContextSensitivityInfo.cpp"
"${LIBRARY_DIR}/atn/DecisionEventInfo.cpp"
"${LIBRARY_DIR}/atn/DecisionInfo.cpp"
"${LIBRARY_DIR}/atn/DecisionState.cpp"
"${LIBRARY_DIR}/atn/EmptyPredictionContext.cpp"
"${LIBRARY_DIR}/atn/EpsilonTransition.cpp"
"${LIBRARY_DIR}/atn/ErrorInfo.cpp"
"${LIBRARY_DIR}/atn/LexerAction.cpp"
"${LIBRARY_DIR}/atn/LexerActionExecutor.cpp"
"${LIBRARY_DIR}/atn/LexerATNConfig.cpp"
"${LIBRARY_DIR}/atn/LexerATNSimulator.cpp"
"${LIBRARY_DIR}/atn/LexerChannelAction.cpp"
"${LIBRARY_DIR}/atn/LexerCustomAction.cpp"
"${LIBRARY_DIR}/atn/LexerIndexedCustomAction.cpp"
"${LIBRARY_DIR}/atn/LexerModeAction.cpp"
"${LIBRARY_DIR}/atn/LexerMoreAction.cpp"
"${LIBRARY_DIR}/atn/LexerPopModeAction.cpp"
"${LIBRARY_DIR}/atn/LexerPushModeAction.cpp"
"${LIBRARY_DIR}/atn/LexerSkipAction.cpp"
"${LIBRARY_DIR}/atn/LexerTypeAction.cpp"
"${LIBRARY_DIR}/atn/LL1Analyzer.cpp"
"${LIBRARY_DIR}/atn/LookaheadEventInfo.cpp"
"${LIBRARY_DIR}/atn/LoopEndState.cpp"
"${LIBRARY_DIR}/atn/NotSetTransition.cpp"
"${LIBRARY_DIR}/atn/OrderedATNConfigSet.cpp"
"${LIBRARY_DIR}/atn/ParseInfo.cpp"
"${LIBRARY_DIR}/atn/ParserATNSimulator.cpp"
"${LIBRARY_DIR}/atn/PlusBlockStartState.cpp"
"${LIBRARY_DIR}/atn/PlusLoopbackState.cpp"
"${LIBRARY_DIR}/atn/PrecedencePredicateTransition.cpp"
"${LIBRARY_DIR}/atn/PredicateEvalInfo.cpp"
"${LIBRARY_DIR}/atn/PredicateTransition.cpp"
"${LIBRARY_DIR}/atn/PredictionContext.cpp"
"${LIBRARY_DIR}/atn/PredictionMode.cpp"
"${LIBRARY_DIR}/atn/ProfilingATNSimulator.cpp"
"${LIBRARY_DIR}/atn/RangeTransition.cpp"
"${LIBRARY_DIR}/atn/RuleStartState.cpp"
"${LIBRARY_DIR}/atn/RuleStopState.cpp"
"${LIBRARY_DIR}/atn/RuleTransition.cpp"
"${LIBRARY_DIR}/atn/SemanticContext.cpp"
"${LIBRARY_DIR}/atn/SetTransition.cpp"
"${LIBRARY_DIR}/atn/SingletonPredictionContext.cpp"
"${LIBRARY_DIR}/atn/StarBlockStartState.cpp"
"${LIBRARY_DIR}/atn/StarLoopbackState.cpp"
"${LIBRARY_DIR}/atn/StarLoopEntryState.cpp"
"${LIBRARY_DIR}/atn/TokensStartState.cpp"
"${LIBRARY_DIR}/atn/Transition.cpp"
"${LIBRARY_DIR}/atn/WildcardTransition.cpp"
"${LIBRARY_DIR}/BailErrorStrategy.cpp"
"${LIBRARY_DIR}/BaseErrorListener.cpp"
"${LIBRARY_DIR}/BufferedTokenStream.cpp"
"${LIBRARY_DIR}/CharStream.cpp"
"${LIBRARY_DIR}/CommonToken.cpp"
"${LIBRARY_DIR}/CommonTokenFactory.cpp"
"${LIBRARY_DIR}/CommonTokenStream.cpp"
"${LIBRARY_DIR}/ConsoleErrorListener.cpp"
"${LIBRARY_DIR}/DefaultErrorStrategy.cpp"
"${LIBRARY_DIR}/dfa/DFA.cpp"
"${LIBRARY_DIR}/dfa/DFASerializer.cpp"
"${LIBRARY_DIR}/dfa/DFAState.cpp"
"${LIBRARY_DIR}/dfa/LexerDFASerializer.cpp"
"${LIBRARY_DIR}/DiagnosticErrorListener.cpp"
"${LIBRARY_DIR}/Exceptions.cpp"
"${LIBRARY_DIR}/FailedPredicateException.cpp"
"${LIBRARY_DIR}/InputMismatchException.cpp"
"${LIBRARY_DIR}/InterpreterRuleContext.cpp"
"${LIBRARY_DIR}/IntStream.cpp"
"${LIBRARY_DIR}/Lexer.cpp"
"${LIBRARY_DIR}/LexerInterpreter.cpp"
"${LIBRARY_DIR}/LexerNoViableAltException.cpp"
"${LIBRARY_DIR}/ListTokenSource.cpp"
"${LIBRARY_DIR}/misc/InterpreterDataReader.cpp"
"${LIBRARY_DIR}/misc/Interval.cpp"
"${LIBRARY_DIR}/misc/IntervalSet.cpp"
"${LIBRARY_DIR}/misc/MurmurHash.cpp"
"${LIBRARY_DIR}/misc/Predicate.cpp"
"${LIBRARY_DIR}/NoViableAltException.cpp"
"${LIBRARY_DIR}/Parser.cpp"
"${LIBRARY_DIR}/ParserInterpreter.cpp"
"${LIBRARY_DIR}/ParserRuleContext.cpp"
"${LIBRARY_DIR}/ProxyErrorListener.cpp"
"${LIBRARY_DIR}/RecognitionException.cpp"
"${LIBRARY_DIR}/Recognizer.cpp"
"${LIBRARY_DIR}/RuleContext.cpp"
"${LIBRARY_DIR}/RuleContextWithAltNum.cpp"
"${LIBRARY_DIR}/RuntimeMetaData.cpp"
"${LIBRARY_DIR}/support/Any.cpp"
"${LIBRARY_DIR}/support/Arrays.cpp"
"${LIBRARY_DIR}/support/CPPUtils.cpp"
"${LIBRARY_DIR}/support/guid.cpp"
"${LIBRARY_DIR}/support/StringUtils.cpp"
"${LIBRARY_DIR}/Token.cpp"
"${LIBRARY_DIR}/TokenSource.cpp"
"${LIBRARY_DIR}/TokenStream.cpp"
"${LIBRARY_DIR}/TokenStreamRewriter.cpp"
"${LIBRARY_DIR}/tree/ErrorNode.cpp"
"${LIBRARY_DIR}/tree/ErrorNodeImpl.cpp"
"${LIBRARY_DIR}/tree/IterativeParseTreeWalker.cpp"
"${LIBRARY_DIR}/tree/ParseTree.cpp"
"${LIBRARY_DIR}/tree/ParseTreeListener.cpp"
"${LIBRARY_DIR}/tree/ParseTreeVisitor.cpp"
"${LIBRARY_DIR}/tree/ParseTreeWalker.cpp"
"${LIBRARY_DIR}/tree/pattern/Chunk.cpp"
"${LIBRARY_DIR}/tree/pattern/ParseTreeMatch.cpp"
"${LIBRARY_DIR}/tree/pattern/ParseTreePattern.cpp"
"${LIBRARY_DIR}/tree/pattern/ParseTreePatternMatcher.cpp"
"${LIBRARY_DIR}/tree/pattern/RuleTagToken.cpp"
"${LIBRARY_DIR}/tree/pattern/TagChunk.cpp"
"${LIBRARY_DIR}/tree/pattern/TextChunk.cpp"
"${LIBRARY_DIR}/tree/pattern/TokenTagToken.cpp"
"${LIBRARY_DIR}/tree/TerminalNode.cpp"
"${LIBRARY_DIR}/tree/TerminalNodeImpl.cpp"
"${LIBRARY_DIR}/tree/Trees.cpp"
"${LIBRARY_DIR}/tree/xpath/XPath.cpp"
"${LIBRARY_DIR}/tree/xpath/XPathElement.cpp"
"${LIBRARY_DIR}/tree/xpath/XPathLexer.cpp"
"${LIBRARY_DIR}/tree/xpath/XPathLexerErrorListener.cpp"
"${LIBRARY_DIR}/tree/xpath/XPathRuleAnywhereElement.cpp"
"${LIBRARY_DIR}/tree/xpath/XPathRuleElement.cpp"
"${LIBRARY_DIR}/tree/xpath/XPathTokenAnywhereElement.cpp"
"${LIBRARY_DIR}/tree/xpath/XPathTokenElement.cpp"
"${LIBRARY_DIR}/tree/xpath/XPathWildcardAnywhereElement.cpp"
"${LIBRARY_DIR}/tree/xpath/XPathWildcardElement.cpp"
"${LIBRARY_DIR}/UnbufferedCharStream.cpp"
"${LIBRARY_DIR}/UnbufferedTokenStream.cpp"
"${LIBRARY_DIR}/Vocabulary.cpp"
"${LIBRARY_DIR}/WritableToken.cpp"
)
add_library (antlr4-runtime ${SRCS})
target_include_directories (antlr4-runtime SYSTEM PUBLIC ${LIBRARY_DIR})

2
contrib/libunwind vendored

@ -1 +1 @@
Subproject commit cdcc3d8c6f6e80a0886082704a0902d61d8d3ffe
Subproject commit 6b816d2fba3991f8fd6aaec17d92f68947eab667

View File

@ -160,7 +160,6 @@ function clone_submodules
SUBMODULES_TO_UPDATE=(
contrib/abseil-cpp
contrib/antlr4-runtime
contrib/boost
contrib/zlib-ng
contrib/libxml2
@ -374,12 +373,6 @@ function run_tests
# Depends on AWS
01801_s3_cluster
# Depends on LLVM JIT
01072_nullable_jit
01852_jit_if
01865_jit_comparison_constant_result
01871_merge_tree_compile_expressions
# needs psql
01889_postgresql_protocol_null_fields

View File

@ -11,6 +11,7 @@ services:
interval: 10s
timeout: 5s
retries: 5
command: [ "postgres", "-c", "wal_level=logical", "-c", "max_replication_slots=2"]
networks:
default:
aliases:
@ -22,4 +23,4 @@ services:
volumes:
- type: ${POSTGRES_LOGS_FS:-tmpfs}
source: ${POSTGRES_DIR:-}
target: /postgres/
target: /postgres/

View File

@ -35,7 +35,7 @@ if [ "$NUM_TRIES" -gt "1" ]; then
# simpliest way to forward env variables to server
sudo -E -u clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml --daemon
else
service clickhouse-server start
sudo clickhouse start
fi
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then

View File

@ -7,13 +7,13 @@ toc_title: Third-Party Libraries Used
The list of third-party libraries can be obtained by the following query:
```
``` sql
SELECT library_name, license_type, license_path FROM system.licenses ORDER BY library_name COLLATE 'en'
```
[Example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIGxpYnJhcnlfbmFtZSwgbGljZW5zZV90eXBlLCBsaWNlbnNlX3BhdGggRlJPTSBzeXN0ZW0ubGljZW5zZXMgT1JERVIgQlkgbGlicmFyeV9uYW1lIENPTExBVEUgJ2VuJw==)
| library_name | license_type | license_path |
| library_name | license_type | license_path |
|:-|:-|:-|
| abseil-cpp | Apache | /contrib/abseil-cpp/LICENSE |
| AMQP-CPP | Apache | /contrib/AMQP-CPP/LICENSE |
@ -89,3 +89,15 @@ SELECT library_name, license_type, license_path FROM system.licenses ORDER BY li
| xz | Public Domain | /contrib/xz/COPYING |
| zlib-ng | zLib | /contrib/zlib-ng/LICENSE.md |
| zstd | BSD | /contrib/zstd/LICENSE |
## Guidelines for adding new third-party libraries and maintaining custom changes in them {#adding-third-party-libraries}
1. All external third-party code should reside in the dedicated directories under `contrib` directory of ClickHouse repo. Prefer Git submodules, when available.
2. Fork/mirror the official repo in [Clickhouse-extras](https://github.com/ClickHouse-Extras). Prefer official GitHub repos, when available.
3. Branch from the branch you want to integrate, e.g., `master` -> `clickhouse/master`, or `release/vX.Y.Z` -> `clickhouse/release/vX.Y.Z`.
4. All forks in [Clickhouse-extras](https://github.com/ClickHouse-Extras) can be automatically synchronized with upstreams. `clickhouse/...` branches will remain unaffected, since virtually nobody is going to use that naming pattern in their upstream repos.
5. Add submodules under `contrib` of ClickHouse repo that refer the above forks/mirrors. Set the submodules to track the corresponding `clickhouse/...` branches.
6. Every time the custom changes have to be made in the library code, a dedicated branch should be created, like `clickhouse/my-fix`. Then this branch should be merged into the branch, that is tracked by the submodule, e.g., `clickhouse/master` or `clickhouse/release/vX.Y.Z`.
7. No code should be pushed in any branch of the forks in [Clickhouse-extras](https://github.com/ClickHouse-Extras), whose names do not follow `clickhouse/...` pattern.
8. Always write the custom changes with the official repo in mind. Once the PR is merged from (a feature/fix branch in) your personal fork into the fork in [Clickhouse-extras](https://github.com/ClickHouse-Extras), and the submodule is bumped in ClickHouse repo, consider opening another PR from (a feature/fix branch in) the fork in [Clickhouse-extras](https://github.com/ClickHouse-Extras) to the official repo of the library. This will make sure, that 1) the contribution has more than a single use case and importance, 2) others will also benefit from it, 3) the change will not remain a maintenance burden solely on ClickHouse developers.
9. When a submodule needs to start using a newer code from the original branch (e.g., `master`), and since the custom changes might be merged in the branch it is tracking (e.g., `clickhouse/master`) and so it may diverge from its original counterpart (i.e., `master`), a careful merge should be carried out first, i.e., `master` -> `clickhouse/master`, and only then the submodule can be bumped in ClickHouse.

View File

@ -237,6 +237,8 @@ The description of ClickHouse architecture can be found here: https://clickhouse
The Code Style Guide: https://clickhouse.tech/docs/en/development/style/
Adding third-party libraries: https://clickhouse.tech/docs/en/development/contrib/#adding-third-party-libraries
Writing tests: https://clickhouse.tech/docs/en/development/tests/
List of tasks: https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aopen+is%3Aissue+label%3A%22easy+task%22

View File

@ -628,7 +628,7 @@ If the class is not intended for polymorphic use, you do not need to make functi
**18.** Encodings.
Use UTF-8 everywhere. Use `std::string`and`char *`. Do not use `std::wstring`and`wchar_t`.
Use UTF-8 everywhere. Use `std::string` and `char *`. Do not use `std::wstring` and `wchar_t`.
**19.** Logging.
@ -749,17 +749,9 @@ If your code in the `master` branch is not buildable yet, exclude it from the bu
**1.** The C++20 standard library is used (experimental extensions are allowed), as well as `boost` and `Poco` frameworks.
**2.** If necessary, you can use any well-known libraries available in the OS package.
**2.** It is not allowed to use libraries from OS packages. It is also not allowed to use pre-installed libraries. All libraries should be placed in form of source code in `contrib` directory and built with ClickHouse.
If there is a good solution already available, then use it, even if it means you have to install another library.
(But be prepared to remove bad libraries from code.)
**3.** You can install a library that isnt in the packages, if the packages do not have what you need or have an outdated version or the wrong type of compilation.
**4.** If the library is small and does not have its own complex build system, put the source files in the `contrib` folder.
**5.** Preference is always given to libraries that are already in use.
**3.** Preference is always given to libraries that are already in use.
## General Recommendations {#general-recommendations-1}

View File

@ -0,0 +1,71 @@
---
toc_priority: 30
toc_title: MaterializedPostgreSQL
---
# MaterializedPostgreSQL {#materialize-postgresql}
## Creating a Database {#creating-a-database}
``` sql
CREATE DATABASE test_database
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password'
SELECT * FROM test_database.postgres_table;
```
## Settings {#settings}
1. `materialized_postgresql_max_block_size` - Number of rows collected before flushing data into table. Default: `65536`.
2. `materialized_postgresql_tables_list` - List of tables for MaterializedPostgreSQL database engine. Default: `whole database`.
3. `materialized_postgresql_allow_automatic_update` - Allow to reload table in the background, when schema changes are detected. Default: `0` (`false`).
``` sql
CREATE DATABASE test_database
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password'
SETTINGS materialized_postgresql_max_block_size = 65536,
materialized_postgresql_tables_list = 'table1,table2,table3';
SELECT * FROM test_database.table1;
```
## Requirements {#requirements}
- Setting `wal_level`to `logical` and `max_replication_slots` to at least `2` in the postgresql config file.
- Each replicated table must have one of the following **replica identity**:
1. **default** (primary key)
2. **index**
``` bash
postgres# CREATE TABLE postgres_table (a Integer NOT NULL, b Integer, c Integer NOT NULL, d Integer, e Integer NOT NULL);
postgres# CREATE unique INDEX postgres_table_index on postgres_table(a, c, e);
postgres# ALTER TABLE postgres_table REPLICA IDENTITY USING INDEX postgres_table_index;
```
Primary key is always checked first. If it is absent, then index, defined as replica identity index, is checked.
If index is used as replica identity, there has to be only one such index in a table.
You can check what type is used for a specific table with the following command:
``` bash
postgres# SELECT CASE relreplident
WHEN 'd' THEN 'default'
WHEN 'n' THEN 'nothing'
WHEN 'f' THEN 'full'
WHEN 'i' THEN 'index'
END AS replica_identity
FROM pg_class
WHERE oid = 'postgres_table'::regclass;
```
## Warning {#warning}
1. **TOAST** values convertion is not supported. Default value for the data type will be used.

View File

@ -0,0 +1,46 @@
---
toc_priority: 12
toc_title: MateriaziePostgreSQL
---
# MaterializedPostgreSQL {#materialize-postgresql}
## Creating a Table {#creating-a-table}
``` sql
CREATE TABLE test.postgresql_replica (key UInt64, value UInt64)
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgresql_replica', 'postgres_user', 'postgres_password')
PRIMARY KEY key;
```
## Requirements {#requirements}
- Setting `wal_level`to `logical` and `max_replication_slots` to at least `2` in the postgresql config file.
- A table with engine `MaterializedPostgreSQL` must have a primary key - the same as a replica identity index (default: primary key) of a postgres table (See [details on replica identity index](../../database-engines/materialized-postgresql.md#requirements)).
- Only database `Atomic` is allowed.
## Virtual columns {#creating-a-table}
- `_version` (`UInt64`)
- `_sign` (`Int8`)
These columns do not need to be added, when table is created. They are always accessible in `SELECT` query.
`_version` column equals `LSN` position in `WAL`, so it might be used to check how up-to-date replication is.
``` sql
CREATE TABLE test.postgresql_replica (key UInt64, value UInt64)
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgresql_replica', 'postgres_user', 'postgres_password')
PRIMARY KEY key;
SELECT key, value, _version FROM test.postgresql_replica;
```
## Warning {#warning}
1. **TOAST** values convertion is not supported. Default value for the data type will be used.

View File

@ -80,6 +80,7 @@ SELECT toDateTime('2019-01-01 00:00:00', 'UTC') AS time_utc,
toInt32(time_samoa) AS int32samoa
FORMAT Vertical;
```
Result:
```text
@ -1014,7 +1015,7 @@ Result:
## dateName {#dataname}
Returns part of date with specified date part.
Returns specified part of date.
**Syntax**
@ -1024,13 +1025,13 @@ dateName(date_part, date)
**Arguments**
- `date_part` - Date part. Possible values .
- `date` — Date [Date](../../sql-reference/data-types/date.md) or DateTime [DateTime](../../sql-reference/data-types/datetime.md), [DateTime64](../../sql-reference/data-types/datetime64.md).
- `date_part` — Date part. Possible values: 'year', 'quarter', 'month', 'week', 'dayofyear', 'day', 'weekday', 'hour', 'minute', 'second'. [String](../../sql-reference/data-types/string.md).
- `date` — Date. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — Timezone. Optional. [String](../../sql-reference/data-types/string.md).
**Returned value**
- Specified date part of date.
- The specified part of date.
Type: [String](../../sql-reference/data-types/string.md#string)

View File

@ -224,7 +224,7 @@ Accepts an integer. Returns an array of UInt64 numbers containing the list of po
## bitPositionsToArray(num) {#bitpositionstoarraynum}
Accepts an integer, argument will be converted to unsigned integer type. Returns an array of UInt64 numbers containing the list of positions of bits that equals 1. Numbers in the array are in ascending order.
Accepts an integer and converts it to an unsigned integer. Returns an array of `UInt64` numbers containing the list of positions of bits of `arg` that equal `1`, in ascending order.
**Syntax**
@ -234,11 +234,13 @@ bitPositionsToArray(arg)
**Arguments**
- `arg` — Integer value.Types: [Int/UInt](../../sql-reference/data-types/int-uint.md)
- `arg` — Integer value. [Int/UInt](../../sql-reference/data-types/int-uint.md).
**Returned value**
An array of UInt64 numbers containing the list of positions of bits that equals 1. Numbers in the array are in ascending order.
- An array containing a list of positions of bits that equal `1`, in ascending order.
Type: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)).
**Example**

View File

@ -749,19 +749,11 @@ CPU命令セットは、サーバー間でサポートされる最小のセッ
## 図書館 {#libraries}
**1.** C++20標準ライブラリが使用されています実験的な拡張が許可されています`boost``Poco` フレームワーク
**1.** The C++20 standard library is used (experimental extensions are allowed), as well as `boost` and `Poco` frameworks.
**2.** 必要に応じて、OSパッケージで利用可能な既知のライブラリを使用できます。
**2.** It is not allowed to use libraries from OS packages. It is also not allowed to use pre-installed libraries. All libraries should be placed in form of source code in `contrib` directory and built with ClickHouse.
すでに利用可能な良い解決策がある場合は、別のライブラリをインストールする必要がある場合でも、それを使用してください。
(が準備をしておいてくださ去の悪い図書館からのコードです。)
**3.** パッケージに必要なものがない場合や、古いバージョンや間違った種類のコンパイルがある場合は、パッケージにないライブラリをインストールできます。
**4.** ライブラリが小さく、独自の複雑なビルドシステムがない場合は、ソースファイルを `contrib` フォルダ。
**5.** すでに使用されているライブラリが優先されます。
**3.** Preference is always given to libraries that are already in use.
## 一般的な推奨事項 {#general-recommendations-1}

View File

@ -824,17 +824,9 @@ The dictionary is configured incorrectly.
**1.** Используются стандартная библиотека C++20 (допустимо использовать экспериментальные расширения) а также фреймворки `boost`, `Poco`.
**2.** При необходимости, можно использовать любые известные библиотеки, доступные в ОС из пакетов.
**2.** Библиотеки должны быть расположены в виде исходников в директории `contrib` и собираться вместе с ClickHouse. Не разрешено использовать библиотеки, доступные в пакетах ОС или любые другие способы установки библиотек в систему.
Если есть хорошее готовое решение, то оно используется, даже если для этого придётся установить ещё одну библиотеку.
(Но будьте готовы к тому, что иногда вам придётся выкидывать плохие библиотеки из кода.)
**3.** Если в пакетах нет нужной библиотеки, или её версия достаточно старая, или если она собрана не так, как нужно, то можно использовать библиотеку, устанавливаемую не из пакетов.
**4.** Если библиотека достаточно маленькая и у неё нет своей системы сборки, то следует включить её файлы в проект, в директорию `contrib`.
**5.** Предпочтение всегда отдаётся уже использующимся библиотекам.
**3.** Предпочтение отдаётся уже использующимся библиотекам.
## Общее {#obshchee-1}

View File

@ -27,40 +27,40 @@ SELECT
Возвращает часовой пояс сервера.
**Синтаксис**
**Синтаксис**
``` sql
timeZone()
```
Псевдоним: `timezone`.
Синоним: `timezone`.
**Возвращаемое значение**
- Часовой пояс.
- Часовой пояс.
Тип: [String](../../sql-reference/data-types/string.md).
## toTimeZone {#totimezone}
Переводит дату или дату с временем в указанный часовой пояс. Часовой пояс - это атрибут типов `Date` и `DateTime`. Внутреннее значение (количество секунд) поля таблицы или результирующего столбца не изменяется, изменяется тип поля и, соответственно, его текстовое отображение.
Переводит дату или дату с временем в указанный часовой пояс. Часовой пояс - это атрибут типов `Date` и `DateTime`. Внутреннее значение (количество секунд) поля таблицы или результирующего столбца не изменяется, изменяется тип поля и, соответственно, его текстовое отображение.
**Синтаксис**
**Синтаксис**
``` sql
toTimezone(value, timezone)
```
Псевдоним: `toTimezone`.
Синоним: `toTimezone`.
**Аргументы**
**Аргументы**
- `value` — время или дата с временем. [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — часовой пояс для возвращаемого значения. [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
- Дата с временем.
- Дата с временем.
Тип: [DateTime](../../sql-reference/data-types/datetime.md).
@ -80,6 +80,7 @@ SELECT toDateTime('2019-01-01 00:00:00', 'UTC') AS time_utc,
toInt32(time_samoa) AS int32samoa
FORMAT Vertical;
```
Результат:
```text
@ -102,21 +103,21 @@ int32samoa: 1546300800
Возвращает название часового пояса для значений типа [DateTime](../../sql-reference/data-types/datetime.md) и [DateTime64](../../sql-reference/data-types/datetime64.md).
**Синтаксис**
**Синтаксис**
``` sql
timeZoneOf(value)
```
Псевдоним: `timezoneOf`.
Синоним: `timezoneOf`.
**Аргументы**
- `value` — Дата с временем. [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md).
- `value` — Дата с временем. [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md).
**Возвращаемое значение**
- Название часового пояса.
- Название часового пояса.
Тип: [String](../../sql-reference/data-types/string.md).
@ -145,15 +146,15 @@ SELECT timezoneOf(now());
timeZoneOffset(value)
```
Псевдоним: `timezoneOffset`.
Синоним: `timezoneOffset`.
**Аргументы**
- `value` — Дата с временем. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
- `value` — Дата с временем. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
**Возвращаемое значение**
- Смещение в секундах от UTC.
- Смещение в секундах от UTC.
Тип: [Int32](../../sql-reference/data-types/int-uint.md).
@ -626,7 +627,7 @@ SELECT now(), date_trunc('hour', now(), 'Europe/Moscow');
Добавляет интервал времени или даты к указанной дате или дате со временем.
**Синтаксис**
**Синтаксис**
``` sql
date_add(unit, value, date)
@ -1025,6 +1026,45 @@ SELECT formatDateTime(toDate('2010-01-04'), '%g');
└────────────────────────────────────────────┘
```
## dateName {#dataname}
Возвращает указанную часть даты.
**Синтаксис**
``` sql
dateName(date_part, date)
```
**Аргументы**
- `date_part` — часть даты. Возможные значения: 'year', 'quarter', 'month', 'week', 'dayofyear', 'day', 'weekday', 'hour', 'minute', 'second'. [String](../../sql-reference/data-types/string.md).
- `date` — дата. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — часовой пояс. Необязательный аргумент. [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
- Указанная часть даты.
Тип: [String](../../sql-reference/data-types/string.md#string).
**Пример**
Запрос:
```sql
WITH toDateTime('2021-04-14 11:22:33') AS date_value
SELECT dateName('year', date_value), dateName('month', date_value), dateName('day', date_value);
```
Результат:
```text
┌─dateName('year', date_value)─┬─dateName('month', date_value)─┬─dateName('day', date_value)─┐
│ 2021 │ April │ 14 │
└──────────────────────────────┴───────────────────────────────┴─────────────────────────────
```
## FROM\_UNIXTIME {#fromunixtime}
Функция преобразует Unix timestamp в календарную дату и время.

View File

@ -223,3 +223,53 @@ SELECT reinterpretAsUInt64(reverse(unhex('FFF'))) AS num;
## bitmaskToArray(num) {#bitmasktoarraynum}
Принимает целое число. Возвращает массив чисел типа UInt64, содержащий степени двойки, в сумме дающих исходное число; числа в массиве идут по возрастанию.
## bitPositionsToArray(num) {#bitpositionstoarraynum}
Принимает целое число и приводит его к беззнаковому виду. Возвращает массив `UInt64` чисел, который содержит список позиций битов `arg`, равных `1`, в порядке возрастания.
**Синтаксис**
```sql
bitPositionsToArray(arg)
```
**Аргументы**
- `arg` — целое значение. [Int/UInt](../../sql-reference/data-types/int-uint.md).
**Возвращаемое значение**
- Массив, содержащий список позиций битов, равных `1`, в порядке возрастания.
Тип: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)).
**Примеры**
Запрос:
``` sql
SELECT bitPositionsToArray(toInt8(1)) AS bit_positions;
```
Результат:
``` text
┌─bit_positions─┐
│ [0] │
└───────────────┘
```
Запрос:
``` sql
select bitPositionsToArray(toInt8(-1)) as bit_positions;
```
Результат:
``` text
┌─bit_positions─────┐
│ [0,1,2,3,4,5,6,7] │
└───────────────────┘
```

View File

@ -742,19 +742,11 @@ CPU指令集是我们服务器中支持的最小集合。 目前它是SSE 4.2
## 库 {#ku}
**1.** 使用C++20标准库允许实验性功能以及 `boost``Poco` 框架。
**1.** The C++20 standard library is used (experimental extensions are allowed), as well as `boost` and `Poco` frameworks.
**2.** 如有必要,您可以使用 OS 包中提供的任何已知库。
**2.** It is not allowed to use libraries from OS packages. It is also not allowed to use pre-installed libraries. All libraries should be placed in form of source code in `contrib` directory and built with ClickHouse.
如果有一个好的解决方案已经可用,那就使用它,即使这意味着你必须安装另一个库。
(但要准备从代码中删除不好的库)
**3.** 如果软件包没有您需要的软件包或者有过时的版本或错误的编译类型,则可以安装不在软件包中的库。
**4.** 如果库很小并且没有自己的复杂构建系统,请将源文件放在 `contrib` 文件夹中。
**5.** 始终优先考虑已经使用的库。
**3.** Preference is always given to libraries that are already in use.
## 一般建议 {#yi-ban-jian-yi-1}

View File

@ -50,7 +50,7 @@
#include <Interpreters/DNSCacheUpdater.h>
#include <Interpreters/ExternalLoaderXMLConfigRepository.h>
#include <Interpreters/InterserverCredentials.h>
#include <Interpreters/ExpressionJIT.h>
#include <Interpreters/JIT/CompiledExpressionCache.h>
#include <Access/AccessControlManager.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/System/attachSystemTables.h>

View File

@ -44,6 +44,7 @@
--table-header-color: #F8F8F8;
--table-hover-color: #FFF8EF;
--null-color: #A88;
--link-color: #06D;
}
[data-theme="dark"] {
@ -61,6 +62,7 @@
--table-header-color: #102020;
--table-hover-color: #003333;
--null-color: #A88;
--link-color: #4BDAF7;
}
html, body
@ -275,6 +277,12 @@
font-size: 110%;
color: #080;
}
a, a:visited
{
color: var(--link-color);
text-decoration: none;
}
</style>
</head>
@ -482,6 +490,7 @@
let cell = response.data[row_idx][col_idx];
let is_null = (cell === null);
let is_link = false;
/// Test: SELECT number, toString(number) AS str, number % 2 ? number : NULL AS nullable, range(number) AS arr, CAST((['hello', 'world'], [number, number % 2]) AS Map(String, UInt64)) AS map FROM numbers(10)
let text;
@ -491,9 +500,23 @@
text = JSON.stringify(cell);
} else {
text = cell;
/// If it looks like URL, create a link. This is for convenience.
if (typeof(cell) == 'string' && cell.match(/^https?:\/\/\S+$/)) {
is_link = true;
}
}
td.appendChild(document.createTextNode(text));
let node = document.createTextNode(text);
if (is_link) {
let link = document.createElement('a');
link.appendChild(node);
link.href = text;
link.setAttribute('target', '_blank');
node = link;
}
td.appendChild(node);
td.className = column_classes[col_idx];
if (is_null) {
td.className += ' null';

View File

@ -9,6 +9,14 @@
#include <AggregateFunctions/IAggregateFunction.h>
#include <Core/DecimalFunctions.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#if USE_EMBEDDED_COMPILER
# include <llvm/IR/IRBuilder.h>
# include <DataTypes/Native.h>
#endif
namespace DB
{
@ -85,13 +93,15 @@ struct AvgFraction
* @tparam Derived When deriving from this class, use the child class name as in CRTP, e.g.
* class Self : Agg<char, bool, bool, Self>.
*/
template <typename Numerator, typename Denominator, typename Derived>
template <typename TNumerator, typename TDenominator, typename Derived>
class AggregateFunctionAvgBase : public
IAggregateFunctionDataHelper<AvgFraction<Numerator, Denominator>, Derived>
IAggregateFunctionDataHelper<AvgFraction<TNumerator, TDenominator>, Derived>
{
public:
using Base = IAggregateFunctionDataHelper<AvgFraction<TNumerator, TDenominator>, Derived>;
using Numerator = TNumerator;
using Denominator = TDenominator;
using Fraction = AvgFraction<Numerator, Denominator>;
using Base = IAggregateFunctionDataHelper<Fraction, Derived>;
explicit AggregateFunctionAvgBase(const DataTypes & argument_types_,
UInt32 num_scale_ = 0, UInt32 denom_scale_ = 0)
@ -135,6 +145,77 @@ public:
else
assert_cast<ColumnVector<Float64> &>(to).getData().push_back(this->data(place).divide());
}
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override
{
bool can_be_compiled = true;
for (const auto & argument : this->argument_types)
can_be_compiled &= canBeNativeType(*argument);
auto return_type = getReturnType();
can_be_compiled &= canBeNativeType(*return_type);
return can_be_compiled;
}
void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
b.CreateMemSet(aggregate_data_ptr, llvm::ConstantInt::get(b.getInt8Ty(), 0), sizeof(Fraction), llvm::assumeAligned(this->alignOfData()));
}
void compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * numerator_type = toNativeType<Numerator>(b);
auto * numerator_dst_ptr = b.CreatePointerCast(aggregate_data_dst_ptr, numerator_type->getPointerTo());
auto * numerator_dst_value = b.CreateLoad(numerator_type, numerator_dst_ptr);
auto * numerator_src_ptr = b.CreatePointerCast(aggregate_data_src_ptr, numerator_type->getPointerTo());
auto * numerator_src_value = b.CreateLoad(numerator_type, numerator_src_ptr);
auto * numerator_result_value = numerator_type->isIntegerTy() ? b.CreateAdd(numerator_dst_value, numerator_src_value) : b.CreateFAdd(numerator_dst_value, numerator_src_value);
b.CreateStore(numerator_result_value, numerator_dst_ptr);
auto * denominator_type = toNativeType<Denominator>(b);
static constexpr size_t denominator_offset = offsetof(Fraction, denominator);
auto * denominator_dst_ptr = b.CreatePointerCast(b.CreateConstGEP1_32(nullptr, aggregate_data_dst_ptr, denominator_offset), denominator_type->getPointerTo());
auto * denominator_src_ptr = b.CreatePointerCast(b.CreateConstGEP1_32(nullptr, aggregate_data_src_ptr, denominator_offset), denominator_type->getPointerTo());
auto * denominator_dst_value = b.CreateLoad(denominator_type, denominator_dst_ptr);
auto * denominator_src_value = b.CreateLoad(denominator_type, denominator_src_ptr);
auto * denominator_result_value = denominator_type->isIntegerTy() ? b.CreateAdd(denominator_src_value, denominator_dst_value) : b.CreateFAdd(denominator_src_value, denominator_dst_value);
b.CreateStore(denominator_result_value, denominator_dst_ptr);
}
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * numerator_type = toNativeType<Numerator>(b);
auto * numerator_ptr = b.CreatePointerCast(aggregate_data_ptr, numerator_type->getPointerTo());
auto * numerator_value = b.CreateLoad(numerator_type, numerator_ptr);
auto * denominator_type = toNativeType<Denominator>(b);
static constexpr size_t denominator_offset = offsetof(Fraction, denominator);
auto * denominator_ptr = b.CreatePointerCast(b.CreateConstGEP1_32(nullptr, aggregate_data_ptr, denominator_offset), denominator_type->getPointerTo());
auto * denominator_value = b.CreateLoad(denominator_type, denominator_ptr);
auto * double_numerator = nativeCast<Numerator>(b, numerator_value, b.getDoubleTy());
auto * double_denominator = nativeCast<Denominator>(b, denominator_value, b.getDoubleTy());
return b.CreateFDiv(double_numerator, double_denominator);
}
#endif
private:
UInt32 num_scale;
UInt32 denom_scale;
@ -149,7 +230,12 @@ template <typename T>
class AggregateFunctionAvg final : public AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionAvg<T>>
{
public:
using AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionAvg<T>>::AggregateFunctionAvgBase;
using Base = AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionAvg<T>>;
using Base::Base;
using Numerator = typename Base::Numerator;
using Denominator = typename Base::Denominator;
using Fraction = typename Base::Fraction;
void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const final
{
@ -158,5 +244,29 @@ public:
}
String getName() const final { return "avg"; }
#if USE_EMBEDDED_COMPILER
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes & arguments_types, const std::vector<llvm::Value *> & argument_values) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * numerator_type = toNativeType<Numerator>(b);
auto * numerator_ptr = b.CreatePointerCast(aggregate_data_ptr, numerator_type->getPointerTo());
auto * numerator_value = b.CreateLoad(numerator_type, numerator_ptr);
auto * value_cast_to_numerator = nativeCast(b, arguments_types[0], argument_values[0], numerator_type);
auto * numerator_result_value = numerator_type->isIntegerTy() ? b.CreateAdd(numerator_value, value_cast_to_numerator) : b.CreateFAdd(numerator_value, value_cast_to_numerator);
b.CreateStore(numerator_result_value, numerator_ptr);
auto * denominator_type = toNativeType<Denominator>(b);
static constexpr size_t denominator_offset = offsetof(Fraction, denominator);
auto * denominator_ptr = b.CreatePointerCast(b.CreateConstGEP1_32(nullptr, aggregate_data_ptr, denominator_offset), denominator_type->getPointerTo());
auto * denominator_value_updated = b.CreateAdd(b.CreateLoad(denominator_type, denominator_ptr), llvm::ConstantInt::get(denominator_type, 1));
b.CreateStore(denominator_value_updated, denominator_ptr);
}
#endif
};
}

View File

@ -28,19 +28,64 @@ public:
MaxFieldType<Value, Weight>, AvgWeightedFieldType<Weight>, AggregateFunctionAvgWeighted<Value, Weight>>;
using Base::Base;
using ValueT = MaxFieldType<Value, Weight>;
using Numerator = typename Base::Numerator;
using Denominator = typename Base::Denominator;
using Fraction = typename Base::Fraction;
void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
{
const auto& weights = static_cast<const DecimalOrVectorCol<Weight> &>(*columns[1]);
this->data(place).numerator += static_cast<ValueT>(
this->data(place).numerator += static_cast<Numerator>(
static_cast<const DecimalOrVectorCol<Value> &>(*columns[0]).getData()[row_num]) *
static_cast<ValueT>(weights.getData()[row_num]);
static_cast<Numerator>(weights.getData()[row_num]);
this->data(place).denominator += static_cast<AvgWeightedFieldType<Weight>>(weights.getData()[row_num]);
this->data(place).denominator += static_cast<Denominator>(weights.getData()[row_num]);
}
String getName() const override { return "avgWeighted"; }
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override
{
bool can_be_compiled = Base::isCompilable();
can_be_compiled &= canBeNativeType<Weight>();
return can_be_compiled;
}
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes & arguments_types, const std::vector<llvm::Value *> & argument_values) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * numerator_type = toNativeType<Numerator>(b);
auto * numerator_ptr = b.CreatePointerCast(aggregate_data_ptr, numerator_type->getPointerTo());
auto * numerator_value = b.CreateLoad(numerator_type, numerator_ptr);
auto * argument = nativeCast(b, arguments_types[0], argument_values[0], numerator_type);
auto * weight = nativeCast(b, arguments_types[1], argument_values[1], numerator_type);
llvm::Value * value_weight_multiplication = argument->getType()->isIntegerTy() ? b.CreateMul(argument, weight) : b.CreateFMul(argument, weight);
auto * numerator_result_value = numerator_type->isIntegerTy() ? b.CreateAdd(numerator_value, value_weight_multiplication) : b.CreateFAdd(numerator_value, value_weight_multiplication);
b.CreateStore(numerator_result_value, numerator_ptr);
auto * denominator_type = toNativeType<Denominator>(b);
static constexpr size_t denominator_offset = offsetof(Fraction, denominator);
auto * denominator_offset_ptr = b.CreateConstGEP1_32(nullptr, aggregate_data_ptr, denominator_offset);
auto * denominator_ptr = b.CreatePointerCast(denominator_offset_ptr, denominator_type->getPointerTo());
auto * weight_cast_to_denominator = nativeCast(b, arguments_types[1], argument_values[1], denominator_type);
auto * denominator_value = b.CreateLoad(denominator_type, denominator_ptr);
auto * denominator_value_updated = denominator_type->isIntegerTy() ? b.CreateAdd(denominator_value, weight_cast_to_denominator) : b.CreateFAdd(denominator_value, weight_cast_to_denominator);
b.CreateStore(denominator_value_updated, denominator_ptr);
}
#endif
};
}

View File

@ -10,6 +10,15 @@
#include <AggregateFunctions/IAggregateFunction.h>
#include <Common/assert_cast.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#if USE_EMBEDDED_COMPILER
# include <llvm/IR/IRBuilder.h>
# include <DataTypes/Native.h>
#endif
namespace DB
{
@ -107,6 +116,66 @@ public:
AggregateFunctionPtr getOwnNullAdapter(
const AggregateFunctionPtr &, const DataTypes & types, const Array & params, const AggregateFunctionProperties & /*properties*/) const override;
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override
{
bool is_compilable = true;
for (const auto & argument_type : argument_types)
is_compilable &= canBeNativeType(*argument_type);
return is_compilable;
}
void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
b.CreateMemSet(aggregate_data_ptr, llvm::ConstantInt::get(b.getInt8Ty(), 0), sizeof(AggregateFunctionCountData), llvm::assumeAligned(this->alignOfData()));
}
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes &, const std::vector<llvm::Value *> &) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, getReturnType());
auto * count_value_ptr = b.CreatePointerCast(aggregate_data_ptr, return_type->getPointerTo());
auto * count_value = b.CreateLoad(return_type, count_value_ptr);
auto * updated_count_value = b.CreateAdd(count_value, llvm::ConstantInt::get(return_type, 1));
b.CreateStore(updated_count_value, count_value_ptr);
}
void compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, getReturnType());
auto * count_value_dst_ptr = b.CreatePointerCast(aggregate_data_dst_ptr, return_type->getPointerTo());
auto * count_value_dst = b.CreateLoad(return_type, count_value_dst_ptr);
auto * count_value_src_ptr = b.CreatePointerCast(aggregate_data_src_ptr, return_type->getPointerTo());
auto * count_value_src = b.CreateLoad(return_type, count_value_src_ptr);
auto * count_value_dst_updated = b.CreateAdd(count_value_dst, count_value_src);
b.CreateStore(count_value_dst_updated, count_value_dst_ptr);
}
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, getReturnType());
auto * count_value_ptr = b.CreatePointerCast(aggregate_data_ptr, return_type->getPointerTo());
return b.CreateLoad(return_type, count_value_ptr);
}
#endif
};
@ -155,6 +224,71 @@ public:
{
assert_cast<ColumnUInt64 &>(to).getData().push_back(data(place).count);
}
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override
{
bool is_compilable = true;
for (const auto & argument_type : argument_types)
is_compilable &= canBeNativeType(*argument_type);
return is_compilable;
}
void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
b.CreateMemSet(aggregate_data_ptr, llvm::ConstantInt::get(b.getInt8Ty(), 0), sizeof(AggregateFunctionCountData), llvm::assumeAligned(this->alignOfData()));
}
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes &, const std::vector<llvm::Value *> & values) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, getReturnType());
auto * is_null_value = b.CreateExtractValue(values[0], {1});
auto * increment_value = b.CreateSelect(is_null_value, llvm::ConstantInt::get(return_type, 0), llvm::ConstantInt::get(return_type, 1));
auto * count_value_ptr = b.CreatePointerCast(aggregate_data_ptr, return_type->getPointerTo());
auto * count_value = b.CreateLoad(return_type, count_value_ptr);
auto * updated_count_value = b.CreateAdd(count_value, increment_value);
b.CreateStore(updated_count_value, count_value_ptr);
}
void compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, getReturnType());
auto * count_value_dst_ptr = b.CreatePointerCast(aggregate_data_dst_ptr, return_type->getPointerTo());
auto * count_value_dst = b.CreateLoad(return_type, count_value_dst_ptr);
auto * count_value_src_ptr = b.CreatePointerCast(aggregate_data_src_ptr, return_type->getPointerTo());
auto * count_value_src = b.CreateLoad(return_type, count_value_src_ptr);
auto * count_value_dst_updated = b.CreateAdd(count_value_dst, count_value_src);
b.CreateStore(count_value_dst_updated, count_value_dst_ptr);
}
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, getReturnType());
auto * count_value_ptr = b.CreatePointerCast(aggregate_data_ptr, return_type->getPointerTo());
return b.CreateLoad(return_type, count_value_ptr);
}
#endif
};
}

View File

@ -106,6 +106,48 @@ public:
this->nested_function->add(this->nestedPlace(place), &nested_column, row_num, arena);
}
}
#if USE_EMBEDDED_COMPILER
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes & arguments_types, const std::vector<llvm::Value *> & argument_values) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
const auto & nullable_type = arguments_types[0];
const auto & nullable_value = argument_values[0];
auto * wrapped_value = b.CreateExtractValue(nullable_value, {0});
auto * is_null_value = b.CreateExtractValue(nullable_value, {1});
const auto & predicate_type = arguments_types[argument_values.size() - 1];
auto * predicate_value = argument_values[argument_values.size() - 1];
auto * is_predicate_true = nativeBoolCast(b, predicate_type, predicate_value);
auto * head = b.GetInsertBlock();
auto * join_block = llvm::BasicBlock::Create(head->getContext(), "join_block", head->getParent());
auto * if_null = llvm::BasicBlock::Create(head->getContext(), "if_null", head->getParent());
auto * if_not_null = llvm::BasicBlock::Create(head->getContext(), "if_not_null", head->getParent());
b.CreateCondBr(b.CreateAnd(b.CreateNot(is_null_value), is_predicate_true), if_not_null, if_null);
b.SetInsertPoint(if_null);
b.CreateBr(join_block);
b.SetInsertPoint(if_not_null);
if constexpr (result_is_nullable)
b.CreateStore(llvm::ConstantInt::get(b.getInt8Ty(), 1), aggregate_data_ptr);
auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstGEP1_32(nullptr, aggregate_data_ptr, this->prefix_size);
this->nested_function->compileAdd(b, aggregate_data_ptr_with_prefix_size_offset, { removeNullable(nullable_type) }, { wrapped_value });
b.CreateBr(join_block);
b.SetInsertPoint(join_block);
}
#endif
};
template <bool result_is_nullable, bool serialize_flag, bool null_is_skipped>
@ -168,6 +210,95 @@ public:
}
}
#if USE_EMBEDDED_COMPILER
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes & arguments_types, const std::vector<llvm::Value *> & argument_values) const override
{
/// TODO: Check
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
size_t arguments_size = arguments_types.size();
DataTypes non_nullable_types;
std::vector<llvm::Value * > wrapped_values;
std::vector<llvm::Value * > is_null_values;
non_nullable_types.resize(arguments_size);
wrapped_values.resize(arguments_size);
is_null_values.resize(arguments_size);
for (size_t i = 0; i < arguments_size; ++i)
{
const auto & argument_value = argument_values[i];
if (is_nullable[i])
{
auto * wrapped_value = b.CreateExtractValue(argument_value, {0});
if constexpr (null_is_skipped)
is_null_values[i] = b.CreateExtractValue(argument_value, {1});
wrapped_values[i] = wrapped_value;
non_nullable_types[i] = removeNullable(arguments_types[i]);
}
else
{
wrapped_values[i] = argument_value;
non_nullable_types[i] = arguments_types[i];
}
}
auto * head = b.GetInsertBlock();
auto * join_block = llvm::BasicBlock::Create(head->getContext(), "join_block", head->getParent());
auto * join_block_after_null_checks = llvm::BasicBlock::Create(head->getContext(), "join_block_after_null_checks", head->getParent());
if constexpr (null_is_skipped)
{
auto * values_have_null_ptr = b.CreateAlloca(b.getInt1Ty());
b.CreateStore(b.getInt1(false), values_have_null_ptr);
for (auto * is_null_value : is_null_values)
{
if (!is_null_value)
continue;
auto * values_have_null = b.CreateLoad(b.getInt1Ty(), values_have_null_ptr);
b.CreateStore(b.CreateOr(values_have_null, is_null_value), values_have_null_ptr);
}
b.CreateCondBr(b.CreateLoad(b.getInt1Ty(), values_have_null_ptr), join_block, join_block_after_null_checks);
}
b.SetInsertPoint(join_block_after_null_checks);
const auto & predicate_type = arguments_types[argument_values.size() - 1];
auto * predicate_value = argument_values[argument_values.size() - 1];
auto * is_predicate_true = nativeBoolCast(b, predicate_type, predicate_value);
auto * if_true = llvm::BasicBlock::Create(head->getContext(), "if_true", head->getParent());
auto * if_false = llvm::BasicBlock::Create(head->getContext(), "if_false", head->getParent());
b.CreateCondBr(is_predicate_true, if_true, if_false);
b.SetInsertPoint(if_false);
b.CreateBr(join_block);
b.SetInsertPoint(if_true);
if constexpr (result_is_nullable)
b.CreateStore(llvm::ConstantInt::get(b.getInt8Ty(), 1), aggregate_data_ptr);
auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstGEP1_32(nullptr, aggregate_data_ptr, this->prefix_size);
this->nested_function->compileAdd(b, aggregate_data_ptr_with_prefix_size_offset, non_nullable_types, wrapped_values);
b.CreateBr(join_block);
b.SetInsertPoint(join_block);
}
#endif
private:
using Base = AggregateFunctionNullBase<result_is_nullable, serialize_flag,
AggregateFunctionIfNullVariadic<result_is_nullable, serialize_flag, null_is_skipped>>;

View File

@ -5,6 +5,14 @@
#include <Common/assert_cast.h>
#include <AggregateFunctions/IAggregateFunction.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#if USE_EMBEDDED_COMPILER
# include <llvm/IR/IRBuilder.h>
# include <DataTypes/Native.h>
#endif
namespace DB
{
@ -154,6 +162,76 @@ public:
const Array & params, const AggregateFunctionProperties & properties) const override;
AggregateFunctionPtr getNestedFunction() const override { return nested_func; }
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override
{
return nested_func->isCompilable();
}
void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
nested_func->compileCreate(builder, aggregate_data_ptr);
}
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes & arguments_types, const std::vector<llvm::Value *> & argument_values) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
const auto & predicate_type = arguments_types[argument_values.size() - 1];
auto * predicate_value = argument_values[argument_values.size() - 1];
auto * head = b.GetInsertBlock();
auto * join_block = llvm::BasicBlock::Create(head->getContext(), "join_block", head->getParent());
auto * if_true = llvm::BasicBlock::Create(head->getContext(), "if_true", head->getParent());
auto * if_false = llvm::BasicBlock::Create(head->getContext(), "if_false", head->getParent());
auto * is_predicate_true = nativeBoolCast(b, predicate_type, predicate_value);
b.CreateCondBr(is_predicate_true, if_true, if_false);
b.SetInsertPoint(if_true);
size_t arguments_size_without_predicate = arguments_types.size() - 1;
DataTypes argument_types_without_predicate;
std::vector<llvm::Value *> argument_values_without_predicate;
argument_types_without_predicate.resize(arguments_size_without_predicate);
argument_values_without_predicate.resize(arguments_size_without_predicate);
for (size_t i = 0; i < arguments_size_without_predicate; ++i)
{
argument_types_without_predicate[i] = arguments_types[i];
argument_values_without_predicate[i] = argument_values[i];
}
nested_func->compileAdd(builder, aggregate_data_ptr, argument_types_without_predicate, argument_values_without_predicate);
b.CreateBr(join_block);
b.SetInsertPoint(if_false);
b.CreateBr(join_block);
b.SetInsertPoint(join_block);
}
void compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
{
nested_func->compileMerge(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
}
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
return nested_func->compileGetResult(builder, aggregate_data_ptr);
}
#endif
};
}

View File

@ -7,11 +7,20 @@
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypesNumber.h>
#include <common/StringRef.h>
#include <Common/assert_cast.h>
#include <AggregateFunctions/IAggregateFunction.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#if USE_EMBEDDED_COMPILER
# include <llvm/IR/IRBuilder.h>
# include <DataTypes/Native.h>
#endif
namespace DB
{
@ -20,6 +29,7 @@ struct Settings;
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NOT_IMPLEMENTED;
}
/** Aggregate functions that store one of passed values.
@ -177,6 +187,265 @@ public:
{
return false;
}
#if USE_EMBEDDED_COMPILER
static constexpr bool is_compilable = true;
static llvm::Value * getValuePtrFromAggregateDataPtr(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr)
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
static constexpr size_t value_offset_from_structure = offsetof(SingleValueDataFixed<T>, value);
auto * type = toNativeType<T>(builder);
auto * value_ptr_with_offset = b.CreateConstGEP1_32(nullptr, aggregate_data_ptr, value_offset_from_structure);
auto * value_ptr = b.CreatePointerCast(value_ptr_with_offset, type->getPointerTo());
return value_ptr;
}
static llvm::Value * getValueFromAggregateDataPtr(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr)
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * type = toNativeType<T>(builder);
auto * value_ptr = getValuePtrFromAggregateDataPtr(builder, aggregate_data_ptr);
return b.CreateLoad(type, value_ptr);
}
static void compileChange(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * value_to_check)
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * has_value_ptr = b.CreatePointerCast(aggregate_data_ptr, b.getInt1Ty()->getPointerTo());
b.CreateStore(b.getInt1(true), has_value_ptr);
auto * value_ptr = getValuePtrFromAggregateDataPtr(b, aggregate_data_ptr);
b.CreateStore(value_to_check, value_ptr);
}
static void compileChangeMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr)
{
auto * value_src = getValueFromAggregateDataPtr(builder, aggregate_data_src_ptr);
compileChange(builder, aggregate_data_dst_ptr, value_src);
}
static void compileChangeFirstTime(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * value_to_check)
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * has_value_ptr = b.CreatePointerCast(aggregate_data_ptr, b.getInt1Ty()->getPointerTo());
auto * has_value_value = b.CreateLoad(b.getInt1Ty(), has_value_ptr);
auto * head = b.GetInsertBlock();
auto * join_block = llvm::BasicBlock::Create(head->getContext(), "join_block", head->getParent());
auto * if_should_change = llvm::BasicBlock::Create(head->getContext(), "if_should_change", head->getParent());
auto * if_should_not_change = llvm::BasicBlock::Create(head->getContext(), "if_should_not_change", head->getParent());
b.CreateCondBr(has_value_value, if_should_not_change, if_should_change);
b.SetInsertPoint(if_should_not_change);
b.CreateBr(join_block);
b.SetInsertPoint(if_should_change);
compileChange(builder, aggregate_data_ptr, value_to_check);
b.CreateBr(join_block);
b.SetInsertPoint(join_block);
}
static void compileChangeFirstTimeMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr)
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * has_value_dst_ptr = b.CreatePointerCast(aggregate_data_dst_ptr, b.getInt1Ty()->getPointerTo());
auto * has_value_dst = b.CreateLoad(b.getInt1Ty(), has_value_dst_ptr);
auto * has_value_src_ptr = b.CreatePointerCast(aggregate_data_src_ptr, b.getInt1Ty()->getPointerTo());
auto * has_value_src = b.CreateLoad(b.getInt1Ty(), has_value_src_ptr);
auto * head = b.GetInsertBlock();
auto * join_block = llvm::BasicBlock::Create(head->getContext(), "join_block", head->getParent());
auto * if_should_change = llvm::BasicBlock::Create(head->getContext(), "if_should_change", head->getParent());
auto * if_should_not_change = llvm::BasicBlock::Create(head->getContext(), "if_should_not_change", head->getParent());
b.CreateCondBr(b.CreateAnd(b.CreateNot(has_value_dst), has_value_src), if_should_change, if_should_not_change);
b.SetInsertPoint(if_should_change);
compileChangeMerge(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
b.CreateBr(join_block);
b.SetInsertPoint(if_should_not_change);
b.CreateBr(join_block);
b.SetInsertPoint(join_block);
}
static void compileChangeEveryTime(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * value_to_check)
{
compileChange(builder, aggregate_data_ptr, value_to_check);
}
static void compileChangeEveryTimeMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr)
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * has_value_src_ptr = b.CreatePointerCast(aggregate_data_src_ptr, b.getInt1Ty()->getPointerTo());
auto * has_value_src = b.CreateLoad(b.getInt1Ty(), has_value_src_ptr);
auto * head = b.GetInsertBlock();
auto * join_block = llvm::BasicBlock::Create(head->getContext(), "join_block", head->getParent());
auto * if_should_change = llvm::BasicBlock::Create(head->getContext(), "if_should_change", head->getParent());
auto * if_should_not_change = llvm::BasicBlock::Create(head->getContext(), "if_should_not_change", head->getParent());
b.CreateCondBr(has_value_src, if_should_change, if_should_not_change);
b.SetInsertPoint(if_should_change);
compileChangeMerge(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
b.CreateBr(join_block);
b.SetInsertPoint(if_should_not_change);
b.CreateBr(join_block);
b.SetInsertPoint(join_block);
}
template <bool is_less>
static void compileChangeComparison(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * value_to_check)
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * has_value_ptr = b.CreatePointerCast(aggregate_data_ptr, b.getInt1Ty()->getPointerTo());
auto * has_value_value = b.CreateLoad(b.getInt1Ty(), has_value_ptr);
auto * value = getValueFromAggregateDataPtr(b, aggregate_data_ptr);
auto * head = b.GetInsertBlock();
auto * join_block = llvm::BasicBlock::Create(head->getContext(), "join_block", head->getParent());
auto * if_should_change = llvm::BasicBlock::Create(head->getContext(), "if_should_change", head->getParent());
auto * if_should_not_change = llvm::BasicBlock::Create(head->getContext(), "if_should_not_change", head->getParent());
auto is_signed = std::numeric_limits<T>::is_signed;
llvm::Value * should_change_after_comparison = nullptr;
if constexpr (is_less)
{
if (value_to_check->getType()->isIntegerTy())
should_change_after_comparison = is_signed ? b.CreateICmpSLT(value_to_check, value) : b.CreateICmpULT(value_to_check, value);
else
should_change_after_comparison = b.CreateFCmpOLT(value_to_check, value);
}
else
{
if (value_to_check->getType()->isIntegerTy())
should_change_after_comparison = is_signed ? b.CreateICmpSGT(value_to_check, value) : b.CreateICmpUGT(value_to_check, value);
else
should_change_after_comparison = b.CreateFCmpOGT(value_to_check, value);
}
b.CreateCondBr(b.CreateOr(b.CreateNot(has_value_value), should_change_after_comparison), if_should_change, if_should_not_change);
b.SetInsertPoint(if_should_change);
compileChange(builder, aggregate_data_ptr, value_to_check);
b.CreateBr(join_block);
b.SetInsertPoint(if_should_not_change);
b.CreateBr(join_block);
b.SetInsertPoint(join_block);
}
template <bool is_less>
static void compileChangeComparisonMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr)
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * has_value_dst_ptr = b.CreatePointerCast(aggregate_data_dst_ptr, b.getInt1Ty()->getPointerTo());
auto * has_value_dst = b.CreateLoad(b.getInt1Ty(), has_value_dst_ptr);
auto * value_dst = getValueFromAggregateDataPtr(b, aggregate_data_dst_ptr);
auto * has_value_src_ptr = b.CreatePointerCast(aggregate_data_src_ptr, b.getInt1Ty()->getPointerTo());
auto * has_value_src = b.CreateLoad(b.getInt1Ty(), has_value_src_ptr);
auto * value_src = getValueFromAggregateDataPtr(b, aggregate_data_src_ptr);
auto * head = b.GetInsertBlock();
auto * join_block = llvm::BasicBlock::Create(head->getContext(), "join_block", head->getParent());
auto * if_should_change = llvm::BasicBlock::Create(head->getContext(), "if_should_change", head->getParent());
auto * if_should_not_change = llvm::BasicBlock::Create(head->getContext(), "if_should_not_change", head->getParent());
auto is_signed = std::numeric_limits<T>::is_signed;
llvm::Value * should_change_after_comparison = nullptr;
if constexpr (is_less)
{
if (value_src->getType()->isIntegerTy())
should_change_after_comparison = is_signed ? b.CreateICmpSLT(value_src, value_dst) : b.CreateICmpULT(value_src, value_dst);
else
should_change_after_comparison = b.CreateFCmpOLT(value_src, value_dst);
}
else
{
if (value_src->getType()->isIntegerTy())
should_change_after_comparison = is_signed ? b.CreateICmpSGT(value_src, value_dst) : b.CreateICmpUGT(value_src, value_dst);
else
should_change_after_comparison = b.CreateFCmpOGT(value_src, value_dst);
}
b.CreateCondBr(b.CreateAnd(has_value_src, b.CreateOr(b.CreateNot(has_value_dst), should_change_after_comparison)), if_should_change, if_should_not_change);
b.SetInsertPoint(if_should_change);
compileChangeMerge(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
b.CreateBr(join_block);
b.SetInsertPoint(if_should_not_change);
b.CreateBr(join_block);
b.SetInsertPoint(join_block);
}
static void compileChangeIfLess(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * value_to_check)
{
static constexpr bool is_less = true;
compileChangeComparison<is_less>(builder, aggregate_data_ptr, value_to_check);
}
static void compileChangeIfLessMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr)
{
static constexpr bool is_less = true;
compileChangeComparisonMerge<is_less>(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
}
static void compileChangeIfGreater(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * value_to_check)
{
static constexpr bool is_less = false;
compileChangeComparison<is_less>(builder, aggregate_data_ptr, value_to_check);
}
static void compileChangeIfGreaterMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr)
{
static constexpr bool is_less = false;
compileChangeComparisonMerge<is_less>(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
}
static llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr)
{
return getValueFromAggregateDataPtr(builder, aggregate_data_ptr);
}
#endif
};
@ -400,6 +669,13 @@ public:
{
return true;
}
#if USE_EMBEDDED_COMPILER
static constexpr bool is_compilable = false;
#endif
};
static_assert(
@ -576,6 +852,13 @@ public:
{
return false;
}
#if USE_EMBEDDED_COMPILER
static constexpr bool is_compilable = false;
#endif
};
@ -593,6 +876,22 @@ struct AggregateFunctionMinData : Data
bool changeIfBetter(const Self & to, Arena * arena) { return this->changeIfLess(to, arena); }
static const char * name() { return "min"; }
#if USE_EMBEDDED_COMPILER
static constexpr bool is_compilable = Data::is_compilable;
static void compileChangeIfBetter(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * value_to_check)
{
Data::compileChangeIfLess(builder, aggregate_data_ptr, value_to_check);
}
static void compileChangeIfBetterMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr)
{
Data::compileChangeIfLessMerge(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
}
#endif
};
template <typename Data>
@ -604,6 +903,22 @@ struct AggregateFunctionMaxData : Data
bool changeIfBetter(const Self & to, Arena * arena) { return this->changeIfGreater(to, arena); }
static const char * name() { return "max"; }
#if USE_EMBEDDED_COMPILER
static constexpr bool is_compilable = Data::is_compilable;
static void compileChangeIfBetter(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * value_to_check)
{
Data::compileChangeIfGreater(builder, aggregate_data_ptr, value_to_check);
}
static void compileChangeIfBetterMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr)
{
Data::compileChangeIfGreaterMerge(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
}
#endif
};
template <typename Data>
@ -615,6 +930,22 @@ struct AggregateFunctionAnyData : Data
bool changeIfBetter(const Self & to, Arena * arena) { return this->changeFirstTime(to, arena); }
static const char * name() { return "any"; }
#if USE_EMBEDDED_COMPILER
static constexpr bool is_compilable = Data::is_compilable;
static void compileChangeIfBetter(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * value_to_check)
{
Data::compileChangeFirstTime(builder, aggregate_data_ptr, value_to_check);
}
static void compileChangeIfBetterMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr)
{
Data::compileChangeFirstTimeMerge(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
}
#endif
};
template <typename Data>
@ -626,6 +957,22 @@ struct AggregateFunctionAnyLastData : Data
bool changeIfBetter(const Self & to, Arena * arena) { return this->changeEveryTime(to, arena); }
static const char * name() { return "anyLast"; }
#if USE_EMBEDDED_COMPILER
static constexpr bool is_compilable = Data::is_compilable;
static void compileChangeIfBetter(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * value_to_check)
{
Data::compileChangeEveryTime(builder, aggregate_data_ptr, value_to_check);
}
static void compileChangeIfBetterMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr)
{
Data::compileChangeEveryTimeMerge(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
}
#endif
};
@ -693,6 +1040,13 @@ struct AggregateFunctionAnyHeavyData : Data
}
static const char * name() { return "anyHeavy"; }
#if USE_EMBEDDED_COMPILER
static constexpr bool is_compilable = false;
#endif
};
@ -752,6 +1106,62 @@ public:
{
this->data(place).insertResultInto(to);
}
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override
{
if constexpr (!Data::is_compilable)
return false;
return canBeNativeType(*this->argument_types[0]);
}
void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
b.CreateMemSet(aggregate_data_ptr, llvm::ConstantInt::get(b.getInt8Ty(), 0), this->sizeOfData(), llvm::assumeAligned(this->alignOfData()));
}
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes &, const std::vector<llvm::Value *> & argument_values) const override
{
if constexpr (Data::is_compilable)
{
Data::compileChangeIfBetter(builder, aggregate_data_ptr, argument_values[0]);
}
else
{
throw Exception(getName() + " is not JIT-compilable", ErrorCodes::NOT_IMPLEMENTED);
}
}
void compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
{
if constexpr (Data::is_compilable)
{
Data::compileChangeIfBetterMerge(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
}
else
{
throw Exception(getName() + " is not JIT-compilable", ErrorCodes::NOT_IMPLEMENTED);
}
}
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
if constexpr (Data::is_compilable)
{
return Data::compileGetResult(builder, aggregate_data_ptr);
}
else
{
throw Exception(getName() + " is not JIT-compilable", ErrorCodes::NOT_IMPLEMENTED);
}
}
#endif
};
}

View File

@ -6,9 +6,18 @@
#include <Common/assert_cast.h>
#include <Columns/ColumnsCommon.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#if USE_EMBEDDED_COMPILER
# include <llvm/IR/IRBuilder.h>
# include <DataTypes/Native.h>
#endif
namespace DB
{
@ -183,6 +192,93 @@ public:
}
AggregateFunctionPtr getNestedFunction() const override { return nested_function; }
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override
{
return this->nested_function->isCompilable();
}
void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
if constexpr (result_is_nullable)
b.CreateMemSet(aggregate_data_ptr, llvm::ConstantInt::get(b.getInt8Ty(), 0), this->prefix_size, llvm::assumeAligned(this->alignOfData()));
auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstGEP1_32(nullptr, aggregate_data_ptr, this->prefix_size);
this->nested_function->compileCreate(b, aggregate_data_ptr_with_prefix_size_offset);
}
void compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
if constexpr (result_is_nullable)
{
auto * aggregate_data_is_null_dst_value = b.CreateLoad(aggregate_data_dst_ptr);
auto * aggregate_data_is_null_src_value = b.CreateLoad(aggregate_data_src_ptr);
auto * is_src_null = nativeBoolCast(b, std::make_shared<DataTypeUInt8>(), aggregate_data_is_null_src_value);
auto * is_null_result_value = b.CreateSelect(is_src_null, llvm::ConstantInt::get(b.getInt8Ty(), 1), aggregate_data_is_null_dst_value);
b.CreateStore(is_null_result_value, aggregate_data_dst_ptr);
}
auto * aggregate_data_dst_ptr_with_prefix_size_offset = b.CreateConstGEP1_32(nullptr, aggregate_data_dst_ptr, this->prefix_size);
auto * aggregate_data_src_ptr_with_prefix_size_offset = b.CreateConstGEP1_32(nullptr, aggregate_data_src_ptr, this->prefix_size);
this->nested_function->compileMerge(b, aggregate_data_dst_ptr_with_prefix_size_offset, aggregate_data_src_ptr_with_prefix_size_offset);
}
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, this->getReturnType());
llvm::Value * result = nullptr;
if constexpr (result_is_nullable)
{
auto * place = b.CreateLoad(b.getInt8Ty(), aggregate_data_ptr);
auto * head = b.GetInsertBlock();
auto * join_block = llvm::BasicBlock::Create(head->getContext(), "join_block", head->getParent());
auto * if_null = llvm::BasicBlock::Create(head->getContext(), "if_null", head->getParent());
auto * if_not_null = llvm::BasicBlock::Create(head->getContext(), "if_not_null", head->getParent());
auto * nullable_value_ptr = b.CreateAlloca(return_type);
b.CreateStore(llvm::ConstantInt::getNullValue(return_type), nullable_value_ptr);
auto * nullable_value = b.CreateLoad(return_type, nullable_value_ptr);
b.CreateCondBr(nativeBoolCast(b, std::make_shared<DataTypeUInt8>(), place), if_not_null, if_null);
b.SetInsertPoint(if_null);
b.CreateStore(b.CreateInsertValue(nullable_value, b.getInt1(true), {1}), nullable_value_ptr);
b.CreateBr(join_block);
b.SetInsertPoint(if_not_null);
auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstGEP1_32(nullptr, aggregate_data_ptr, this->prefix_size);
auto * nested_result = this->nested_function->compileGetResult(builder, aggregate_data_ptr_with_prefix_size_offset);
b.CreateStore(b.CreateInsertValue(nullable_value, nested_result, {0}), nullable_value_ptr);
b.CreateBr(join_block);
b.SetInsertPoint(join_block);
result = b.CreateLoad(return_type, nullable_value_ptr);
}
else
{
result = this->nested_function->compileGetResult(b, aggregate_data_ptr);
}
return result;
}
#endif
};
@ -226,6 +322,44 @@ public:
if (!memoryIsByte(null_map, batch_size, 1))
this->setFlag(place);
}
#if USE_EMBEDDED_COMPILER
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes & arguments_types, const std::vector<llvm::Value *> & argument_values) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
const auto & nullable_type = arguments_types[0];
const auto & nullable_value = argument_values[0];
auto * wrapped_value = b.CreateExtractValue(nullable_value, {0});
auto * is_null_value = b.CreateExtractValue(nullable_value, {1});
auto * head = b.GetInsertBlock();
auto * join_block = llvm::BasicBlock::Create(head->getContext(), "join_block", head->getParent());
auto * if_null = llvm::BasicBlock::Create(head->getContext(), "if_null", head->getParent());
auto * if_not_null = llvm::BasicBlock::Create(head->getContext(), "if_not_null", head->getParent());
b.CreateCondBr(is_null_value, if_null, if_not_null);
b.SetInsertPoint(if_null);
b.CreateBr(join_block);
b.SetInsertPoint(if_not_null);
if constexpr (result_is_nullable)
b.CreateStore(llvm::ConstantInt::get(b.getInt8Ty(), 1), aggregate_data_ptr);
auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstGEP1_32(nullptr, aggregate_data_ptr, this->prefix_size);
this->nested_function->compileAdd(b, aggregate_data_ptr_with_prefix_size_offset, { removeNullable(nullable_type) }, { wrapped_value });
b.CreateBr(join_block);
b.SetInsertPoint(join_block);
}
#endif
};
@ -277,6 +411,90 @@ public:
this->nested_function->add(this->nestedPlace(place), nested_columns, row_num, arena);
}
#if USE_EMBEDDED_COMPILER
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes & arguments_types, const std::vector<llvm::Value *> & argument_values) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
size_t arguments_size = arguments_types.size();
DataTypes non_nullable_types;
std::vector<llvm::Value * > wrapped_values;
std::vector<llvm::Value * > is_null_values;
non_nullable_types.resize(arguments_size);
wrapped_values.resize(arguments_size);
is_null_values.resize(arguments_size);
for (size_t i = 0; i < arguments_size; ++i)
{
const auto & argument_value = argument_values[i];
if (is_nullable[i])
{
auto * wrapped_value = b.CreateExtractValue(argument_value, {0});
if constexpr (null_is_skipped)
is_null_values[i] = b.CreateExtractValue(argument_value, {1});
wrapped_values[i] = wrapped_value;
non_nullable_types[i] = removeNullable(arguments_types[i]);
}
else
{
wrapped_values[i] = argument_value;
non_nullable_types[i] = arguments_types[i];
}
}
if constexpr (null_is_skipped)
{
auto * head = b.GetInsertBlock();
auto * join_block = llvm::BasicBlock::Create(head->getContext(), "join_block", head->getParent());
auto * if_null = llvm::BasicBlock::Create(head->getContext(), "if_null", head->getParent());
auto * if_not_null = llvm::BasicBlock::Create(head->getContext(), "if_not_null", head->getParent());
auto * values_have_null_ptr = b.CreateAlloca(b.getInt1Ty());
b.CreateStore(b.getInt1(false), values_have_null_ptr);
for (auto * is_null_value : is_null_values)
{
if (!is_null_value)
continue;
auto * values_have_null = b.CreateLoad(b.getInt1Ty(), values_have_null_ptr);
b.CreateStore(b.CreateOr(values_have_null, is_null_value), values_have_null_ptr);
}
b.CreateCondBr(b.CreateLoad(b.getInt1Ty(), values_have_null_ptr), if_null, if_not_null);
b.SetInsertPoint(if_null);
b.CreateBr(join_block);
b.SetInsertPoint(if_not_null);
if constexpr (result_is_nullable)
b.CreateStore(llvm::ConstantInt::get(b.getInt8Ty(), 1), aggregate_data_ptr);
auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstGEP1_32(nullptr, aggregate_data_ptr, this->prefix_size);
this->nested_function->compileAdd(b, aggregate_data_ptr_with_prefix_size_offset, arguments_types, wrapped_values);
b.CreateBr(join_block);
b.SetInsertPoint(join_block);
}
else
{
b.CreateStore(llvm::ConstantInt::get(b.getInt8Ty(), 1), aggregate_data_ptr);
auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstGEP1_32(nullptr, aggregate_data_ptr, this->prefix_size);
this->nested_function->compileAdd(b, aggregate_data_ptr_with_prefix_size_offset, non_nullable_types, wrapped_values);
}
}
#endif
private:
enum { MAX_ARGS = 8 };
size_t number_of_arguments = 0;

View File

@ -12,6 +12,14 @@
#include <AggregateFunctions/IAggregateFunction.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#if USE_EMBEDDED_COMPILER
# include <llvm/IR/IRBuilder.h>
# include <DataTypes/Native.h>
#endif
namespace DB
{
@ -385,6 +393,80 @@ public:
column.getData().push_back(this->data(place).get());
}
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override
{
if constexpr (Type == AggregateFunctionTypeSumKahan)
return false;
bool can_be_compiled = true;
for (const auto & argument_type : this->argument_types)
can_be_compiled &= canBeNativeType(*argument_type);
auto return_type = getReturnType();
can_be_compiled &= canBeNativeType(*return_type);
return can_be_compiled;
}
void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, getReturnType());
auto * aggregate_sum_ptr = b.CreatePointerCast(aggregate_data_ptr, return_type->getPointerTo());
b.CreateStore(llvm::Constant::getNullValue(return_type), aggregate_sum_ptr);
}
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes & arguments_types, const std::vector<llvm::Value *> & argument_values) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, getReturnType());
auto * sum_value_ptr = b.CreatePointerCast(aggregate_data_ptr, return_type->getPointerTo());
auto * sum_value = b.CreateLoad(return_type, sum_value_ptr);
const auto & argument_type = arguments_types[0];
const auto & argument_value = argument_values[0];
auto * value_cast_to_result = nativeCast(b, argument_type, argument_value, return_type);
auto * sum_result_value = sum_value->getType()->isIntegerTy() ? b.CreateAdd(sum_value, value_cast_to_result) : b.CreateFAdd(sum_value, value_cast_to_result);
b.CreateStore(sum_result_value, sum_value_ptr);
}
void compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, getReturnType());
auto * sum_value_dst_ptr = b.CreatePointerCast(aggregate_data_dst_ptr, return_type->getPointerTo());
auto * sum_value_dst = b.CreateLoad(return_type, sum_value_dst_ptr);
auto * sum_value_src_ptr = b.CreatePointerCast(aggregate_data_src_ptr, return_type->getPointerTo());
auto * sum_value_src = b.CreateLoad(return_type, sum_value_src_ptr);
auto * sum_return_value = sum_value_dst->getType()->isIntegerTy() ? b.CreateAdd(sum_value_dst, sum_value_src) : b.CreateFAdd(sum_value_dst, sum_value_src);
b.CreateStore(sum_return_value, sum_value_dst_ptr);
}
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, getReturnType());
auto * sum_value_ptr = b.CreatePointerCast(aggregate_data_ptr, return_type->getPointerTo());
return b.CreateLoad(return_type, sum_value_ptr);
}
#endif
private:
UInt32 scale;
};

View File

@ -48,6 +48,15 @@ public:
String getName() const final { return "sumCount"; }
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override
{
return false;
}
#endif
private:
UInt32 scale;
};

View File

@ -10,4 +10,44 @@ DataTypePtr IAggregateFunction::getStateType() const
return std::make_shared<DataTypeAggregateFunction>(shared_from_this(), argument_types, parameters);
}
String IAggregateFunction::getDescription() const
{
String description;
description += getName();
description += '(';
for (const auto & parameter : parameters)
{
description += parameter.dump();
description += ", ";
}
if (!parameters.empty())
{
description.pop_back();
description.pop_back();
}
description += ')';
description += '(';
for (const auto & argument_type : argument_types)
{
description += argument_type->getName();
description += ", ";
}
if (!argument_types.empty())
{
description.pop_back();
description.pop_back();
}
description += ')';
return description;
}
}

View File

@ -9,11 +9,21 @@
#include <Common/Exception.h>
#include <common/types.h>
#if !defined(ARCADIA_BUILD)
# include "config_core.h"
#endif
#include <cstddef>
#include <memory>
#include <vector>
#include <type_traits>
namespace llvm
{
class LLVMContext;
class Value;
class IRBuilderBase;
}
namespace DB
{
@ -208,6 +218,26 @@ public:
const IColumn ** columns,
Arena * arena) const = 0;
/** Insert result of aggregate function into result column with batch size.
* If destroy_place_after_insert is true. Then implementation of this method
* must destroy aggregate place if insert state into result column was successful.
* All places that were not inserted must be destroyed if there was exception during insert into result column.
*/
virtual void insertResultIntoBatch(
size_t batch_size,
AggregateDataPtr * places,
size_t place_offset,
IColumn & to,
Arena * arena,
bool destroy_place_after_insert) const = 0;
/** Destroy batch of aggregate places.
*/
virtual void destroyBatch(
size_t batch_size,
AggregateDataPtr * places,
size_t place_offset) const noexcept = 0;
/** By default all NULLs are skipped during aggregation.
* If it returns nullptr, the default one will be used.
* If an aggregate function wants to use something instead of the default one, it overrides this function and returns its own null adapter.
@ -241,6 +271,40 @@ public:
// of true window functions, so this hack-ish interface suffices.
virtual bool isOnlyWindowFunction() const { return false; }
/// Description of AggregateFunction in form of name(parameters)(argument_types).
String getDescription() const;
#if USE_EMBEDDED_COMPILER
/// Is function JIT compilable
virtual bool isCompilable() const { return false; }
/// compileCreate should generate code for initialization of aggregate function state in aggregate_data_ptr
virtual void compileCreate(llvm::IRBuilderBase & /*builder*/, llvm::Value * /*aggregate_data_ptr*/) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
}
/// compileAdd should generate code for updating aggregate function state stored in aggregate_data_ptr
virtual void compileAdd(llvm::IRBuilderBase & /*builder*/, llvm::Value * /*aggregate_data_ptr*/, const DataTypes & /*arguments_types*/, const std::vector<llvm::Value *> & /*arguments_values*/) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
}
/// compileMerge should generate code for merging aggregate function states stored in aggregate_data_dst_ptr and aggregate_data_src_ptr
virtual void compileMerge(llvm::IRBuilderBase & /*builder*/, llvm::Value * /*aggregate_data_dst_ptr*/, llvm::Value * /*aggregate_data_src_ptr*/) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
}
/// compileGetResult should generate code for getting result value from aggregate function state stored in aggregate_data_ptr
virtual llvm::Value * compileGetResult(llvm::IRBuilderBase & /*builder*/, llvm::Value * /*aggregate_data_ptr*/) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
}
#endif
protected:
DataTypes argument_types;
Array parameters;
@ -415,6 +479,37 @@ public:
static_cast<const Derived *>(this)->add(place + place_offset, columns, i, arena);
}
}
void insertResultIntoBatch(size_t batch_size, AggregateDataPtr * places, size_t place_offset, IColumn & to, Arena * arena, bool destroy_place_after_insert) const override
{
size_t batch_index = 0;
try
{
for (; batch_index < batch_size; ++batch_index)
{
static_cast<const Derived *>(this)->insertResultInto(places[batch_index] + place_offset, to, arena);
if (destroy_place_after_insert)
static_cast<const Derived *>(this)->destroy(places[batch_index] + place_offset);
}
}
catch (...)
{
for (size_t destroy_index = batch_index; destroy_index < batch_size; ++destroy_index)
static_cast<const Derived *>(this)->destroy(places[destroy_index] + place_offset);
throw;
}
}
void destroyBatch(size_t batch_size, AggregateDataPtr * places, size_t place_offset) const noexcept override
{
for (size_t i = 0; i < batch_size; ++i)
{
static_cast<const Derived *>(this)->destroy(places[i] + place_offset);
}
}
};

View File

@ -54,7 +54,6 @@ add_subdirectory (Dictionaries)
add_subdirectory (Disks)
add_subdirectory (Storages)
add_subdirectory (Parsers)
add_subdirectory (Parsers/New)
add_subdirectory (IO)
add_subdirectory (Functions)
add_subdirectory (Interpreters)
@ -86,6 +85,7 @@ if (USE_AMQPCPP)
endif()
if (USE_LIBPQXX)
add_headers_and_sources(dbms Core/PostgreSQL)
add_headers_and_sources(dbms Databases/PostgreSQL)
add_headers_and_sources(dbms Storages/PostgreSQL)
endif()
@ -222,12 +222,12 @@ endif()
if (MAKE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES)
add_library (dbms STATIC ${dbms_headers} ${dbms_sources})
target_link_libraries (dbms PRIVATE clickhouse_parsers_new jemalloc libdivide ${DBMS_COMMON_LIBRARIES})
target_link_libraries (dbms PRIVATE jemalloc libdivide ${DBMS_COMMON_LIBRARIES})
set (all_modules dbms)
else()
add_library (dbms SHARED ${dbms_headers} ${dbms_sources})
target_link_libraries (dbms PUBLIC ${all_modules} ${DBMS_COMMON_LIBRARIES})
target_link_libraries (clickhouse_interpreters PRIVATE clickhouse_parsers_new jemalloc libdivide)
target_link_libraries (clickhouse_interpreters PRIVATE jemalloc libdivide)
list (APPEND all_modules dbms)
# force all split libs to be linked
if (OS_DARWIN)

View File

@ -7,6 +7,11 @@
#pragma clang diagnostic ignored "-Wreserved-id-macro"
#endif
#undef __msan_unpoison
#undef __msan_test_shadow
#undef __msan_print_shadow
#undef __msan_unpoison_string
#define __msan_unpoison(X, Y)
#define __msan_test_shadow(X, Y) (false)
#define __msan_print_shadow(X, Y)

View File

@ -0,0 +1,74 @@
#include "Connection.h"
#include <common/logger_useful.h>
namespace postgres
{
Connection::Connection(const ConnectionInfo & connection_info_, bool replication_, size_t num_tries_)
: connection_info(connection_info_), replication(replication_), num_tries(num_tries_)
, log(&Poco::Logger::get("PostgreSQLReplicaConnection"))
{
if (replication)
{
connection_info = std::make_pair(
fmt::format("{} replication=database", connection_info.first), connection_info.second);
}
}
void Connection::execWithRetry(const std::function<void(pqxx::nontransaction &)> & exec)
{
for (size_t try_no = 0; try_no < num_tries; ++try_no)
{
try
{
pqxx::nontransaction tx(getRef());
exec(tx);
break;
}
catch (const pqxx::broken_connection & e)
{
LOG_DEBUG(log, "Cannot execute query due to connection failure, attempt: {}/{}. (Message: {})",
try_no, num_tries, e.what());
if (try_no == num_tries)
throw;
}
}
}
pqxx::connection & Connection::getRef()
{
connect();
assert(connection != nullptr);
return *connection;
}
void Connection::tryUpdateConnection()
{
try
{
updateConnection();
}
catch (const pqxx::broken_connection & e)
{
LOG_ERROR(log, "Unable to update connection: {}", e.what());
}
}
void Connection::updateConnection()
{
if (connection)
connection->close();
/// Always throws if there is no connection.
connection = std::make_unique<pqxx::connection>(connection_info.first);
if (replication)
connection->set_variable("default_transaction_isolation", "'repeatable read'");
LOG_DEBUG(&Poco::Logger::get("PostgreSQLConnection"), "New connection to {}", connection_info.second);
}
void Connection::connect()
{
if (!connection || !connection->is_open())
updateConnection();
}
}

View File

@ -0,0 +1,47 @@
#pragma once
#include <pqxx/pqxx> // Y_IGNORE
#include <Core/Types.h>
#include <boost/noncopyable.hpp>
/* Methods to work with PostgreSQL connection object.
* Should only be used in case there has to be a single connection object, which
* is long-lived and there are no concurrent connection queries.
* Now only use case - for replication handler for replication from PostgreSQL.
* In all other integration engine use pool with failover.
**/
namespace Poco { class Logger; }
namespace postgres
{
using ConnectionInfo = std::pair<String, String>;
using ConnectionPtr = std::unique_ptr<pqxx::connection>;
class Connection : private boost::noncopyable
{
public:
Connection(const ConnectionInfo & connection_info_, bool replication_ = false, size_t num_tries = 3);
void execWithRetry(const std::function<void(pqxx::nontransaction &)> & exec);
pqxx::connection & getRef();
void connect();
void tryUpdateConnection();
const ConnectionInfo & getConnectionInfo() { return connection_info; }
private:
void updateConnection();
ConnectionPtr connection;
ConnectionInfo connection_info;
bool replication;
size_t num_tries;
Poco::Logger * log;
};
}

View File

@ -1,7 +1,7 @@
#include <Storages/PostgreSQL/PoolWithFailover.h>
#include "PoolWithFailover.h"
#include "Utils.h"
#include <Common/parseRemoteDescription.h>
#include <Common/Exception.h>
#include <IO/Operators.h>
namespace DB
{
@ -14,18 +14,6 @@ namespace ErrorCodes
namespace postgres
{
String formatConnectionString(String dbname, String host, UInt16 port, String user, String password)
{
DB::WriteBufferFromOwnString out;
out << "dbname=" << DB::quote << dbname
<< " host=" << DB::quote << host
<< " port=" << port
<< " user=" << DB::quote << user
<< " password=" << DB::quote << password
<< " connect_timeout=10";
return out.str();
}
PoolWithFailover::PoolWithFailover(
const Poco::Util::AbstractConfiguration & config, const String & config_prefix,
size_t pool_size, size_t pool_wait_timeout_, size_t max_tries_)
@ -58,14 +46,14 @@ PoolWithFailover::PoolWithFailover(
auto replica_user = config.getString(replica_name + ".user", user);
auto replica_password = config.getString(replica_name + ".password", password);
auto connection_string = formatConnectionString(db, replica_host, replica_port, replica_user, replica_password);
auto connection_string = formatConnectionString(db, replica_host, replica_port, replica_user, replica_password).first;
replicas_with_priority[priority].emplace_back(connection_string, pool_size);
}
}
}
else
{
auto connection_string = formatConnectionString(db, host, port, user, password);
auto connection_string = formatConnectionString(db, host, port, user, password).first;
replicas_with_priority[0].emplace_back(connection_string, pool_size);
}
}
@ -85,7 +73,7 @@ PoolWithFailover::PoolWithFailover(
for (const auto & [host, port] : addresses)
{
LOG_DEBUG(&Poco::Logger::get("PostgreSQLPoolWithFailover"), "Adding address host: {}, port: {} to connection pool", host, port);
auto connection_string = formatConnectionString(database, host, port, user, password);
auto connection_string = formatConnectionString(database, host, port, user, password).first;
replicas_with_priority[0].emplace_back(connection_string, pool_size);
}
}

View File

@ -1,16 +1,14 @@
#pragma once
#include "ConnectionHolder.h"
#include <mutex>
#include <Poco/Util/AbstractConfiguration.h>
#include <Storages/PostgreSQL/ConnectionHolder.h>
#include <common/logger_useful.h>
namespace postgres
{
String formatConnectionString(String dbname, String host, UInt16 port, String user, String password);
class PoolWithFailover
{

View File

@ -0,0 +1,19 @@
#include "Utils.h"
#include <IO/Operators.h>
namespace postgres
{
ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, String user, String password)
{
DB::WriteBufferFromOwnString out;
out << "dbname=" << DB::quote << dbname
<< " host=" << DB::quote << host
<< " port=" << port
<< " user=" << DB::quote << user
<< " password=" << DB::quote << password
<< " connect_timeout=10";
return std::make_pair(out.str(), host + ':' + DB::toString(port));
}
}

View File

@ -0,0 +1,17 @@
#pragma once
#include <pqxx/pqxx> // Y_IGNORE
#include <Core/Types.h>
#include "Connection.h"
#include <Common/Exception.h>
namespace pqxx
{
using ReadTransaction = pqxx::read_transaction;
using ReplicationTransaction = pqxx::transaction<isolation_level::repeatable_read, write_policy::read_only>;
}
namespace postgres
{
ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, String user, String password);
}

View File

@ -0,0 +1,241 @@
#include "insertPostgreSQLValue.h"
#if USE_LIBPQXX
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnDecimal.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypesDecimal.h>
#include <Interpreters/convertFieldToType.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromString.h>
#include <Common/assert_cast.h>
#include <pqxx/pqxx> // Y_IGNORE
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
void insertDefaultPostgreSQLValue(IColumn & column, const IColumn & sample_column)
{
column.insertFrom(sample_column, 0);
}
void insertPostgreSQLValue(
IColumn & column, std::string_view value,
const ExternalResultDescription::ValueType type, const DataTypePtr data_type,
std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t idx)
{
switch (type)
{
case ExternalResultDescription::ValueType::vtUInt8:
{
if (value == "t")
assert_cast<ColumnUInt8 &>(column).insertValue(1);
else if (value == "f")
assert_cast<ColumnUInt8 &>(column).insertValue(0);
else
assert_cast<ColumnUInt8 &>(column).insertValue(pqxx::from_string<uint16_t>(value));
break;
}
case ExternalResultDescription::ValueType::vtUInt16:
assert_cast<ColumnUInt16 &>(column).insertValue(pqxx::from_string<uint16_t>(value));
break;
case ExternalResultDescription::ValueType::vtUInt32:
assert_cast<ColumnUInt32 &>(column).insertValue(pqxx::from_string<uint32_t>(value));
break;
case ExternalResultDescription::ValueType::vtUInt64:
assert_cast<ColumnUInt64 &>(column).insertValue(pqxx::from_string<uint64_t>(value));
break;
case ExternalResultDescription::ValueType::vtInt8:
assert_cast<ColumnInt8 &>(column).insertValue(pqxx::from_string<int16_t>(value));
break;
case ExternalResultDescription::ValueType::vtInt16:
assert_cast<ColumnInt16 &>(column).insertValue(pqxx::from_string<int16_t>(value));
break;
case ExternalResultDescription::ValueType::vtInt32:
assert_cast<ColumnInt32 &>(column).insertValue(pqxx::from_string<int32_t>(value));
break;
case ExternalResultDescription::ValueType::vtInt64:
assert_cast<ColumnInt64 &>(column).insertValue(pqxx::from_string<int64_t>(value));
break;
case ExternalResultDescription::ValueType::vtFloat32:
assert_cast<ColumnFloat32 &>(column).insertValue(pqxx::from_string<float>(value));
break;
case ExternalResultDescription::ValueType::vtFloat64:
assert_cast<ColumnFloat64 &>(column).insertValue(pqxx::from_string<double>(value));
break;
case ExternalResultDescription::ValueType::vtEnum8:[[fallthrough]];
case ExternalResultDescription::ValueType::vtEnum16:[[fallthrough]];
case ExternalResultDescription::ValueType::vtFixedString:[[fallthrough]];
case ExternalResultDescription::ValueType::vtString:
assert_cast<ColumnString &>(column).insertData(value.data(), value.size());
break;
case ExternalResultDescription::ValueType::vtUUID:
assert_cast<ColumnUInt128 &>(column).insert(parse<UUID>(value.data(), value.size()));
break;
case ExternalResultDescription::ValueType::vtDate:
assert_cast<ColumnUInt16 &>(column).insertValue(UInt16{LocalDate{std::string(value)}.getDayNum()});
break;
case ExternalResultDescription::ValueType::vtDateTime:
{
ReadBufferFromString in(value);
time_t time = 0;
readDateTimeText(time, in, assert_cast<const DataTypeDateTime *>(data_type.get())->getTimeZone());
if (time < 0)
time = 0;
assert_cast<ColumnUInt32 &>(column).insertValue(time);
break;
}
case ExternalResultDescription::ValueType::vtDateTime64:[[fallthrough]];
case ExternalResultDescription::ValueType::vtDecimal32: [[fallthrough]];
case ExternalResultDescription::ValueType::vtDecimal64: [[fallthrough]];
case ExternalResultDescription::ValueType::vtDecimal128: [[fallthrough]];
case ExternalResultDescription::ValueType::vtDecimal256:
{
ReadBufferFromString istr(value);
data_type->getDefaultSerialization()->deserializeWholeText(column, istr, FormatSettings{});
break;
}
case ExternalResultDescription::ValueType::vtArray:
{
pqxx::array_parser parser{value};
std::pair<pqxx::array_parser::juncture, std::string> parsed = parser.get_next();
size_t dimension = 0, max_dimension = 0, expected_dimensions = array_info[idx].num_dimensions;
const auto parse_value = array_info[idx].pqxx_parser;
std::vector<Row> dimensions(expected_dimensions + 1);
while (parsed.first != pqxx::array_parser::juncture::done)
{
if ((parsed.first == pqxx::array_parser::juncture::row_start) && (++dimension > expected_dimensions))
throw Exception("Got more dimensions than expected", ErrorCodes::BAD_ARGUMENTS);
else if (parsed.first == pqxx::array_parser::juncture::string_value)
dimensions[dimension].emplace_back(parse_value(parsed.second));
else if (parsed.first == pqxx::array_parser::juncture::null_value)
dimensions[dimension].emplace_back(array_info[idx].default_value);
else if (parsed.first == pqxx::array_parser::juncture::row_end)
{
max_dimension = std::max(max_dimension, dimension);
--dimension;
if (dimension == 0)
break;
dimensions[dimension].emplace_back(Array(dimensions[dimension + 1].begin(), dimensions[dimension + 1].end()));
dimensions[dimension + 1].clear();
}
parsed = parser.get_next();
}
if (max_dimension < expected_dimensions)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Got less dimensions than expected. ({} instead of {})", max_dimension, expected_dimensions);
assert_cast<ColumnArray &>(column).insert(Array(dimensions[1].begin(), dimensions[1].end()));
break;
}
}
}
void preparePostgreSQLArrayInfo(
std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t column_idx, const DataTypePtr data_type)
{
const auto * array_type = typeid_cast<const DataTypeArray *>(data_type.get());
auto nested = array_type->getNestedType();
size_t count_dimensions = 1;
while (isArray(nested))
{
++count_dimensions;
nested = typeid_cast<const DataTypeArray *>(nested.get())->getNestedType();
}
Field default_value = nested->getDefault();
if (nested->isNullable())
nested = static_cast<const DataTypeNullable *>(nested.get())->getNestedType();
WhichDataType which(nested);
std::function<Field(std::string & fields)> parser;
if (which.isUInt8() || which.isUInt16())
parser = [](std::string & field) -> Field { return pqxx::from_string<uint16_t>(field); };
else if (which.isInt8() || which.isInt16())
parser = [](std::string & field) -> Field { return pqxx::from_string<int16_t>(field); };
else if (which.isUInt32())
parser = [](std::string & field) -> Field { return pqxx::from_string<uint32_t>(field); };
else if (which.isInt32())
parser = [](std::string & field) -> Field { return pqxx::from_string<int32_t>(field); };
else if (which.isUInt64())
parser = [](std::string & field) -> Field { return pqxx::from_string<uint64_t>(field); };
else if (which.isInt64())
parser = [](std::string & field) -> Field { return pqxx::from_string<int64_t>(field); };
else if (which.isFloat32())
parser = [](std::string & field) -> Field { return pqxx::from_string<float>(field); };
else if (which.isFloat64())
parser = [](std::string & field) -> Field { return pqxx::from_string<double>(field); };
else if (which.isString() || which.isFixedString())
parser = [](std::string & field) -> Field { return field; };
else if (which.isDate())
parser = [](std::string & field) -> Field { return UInt16{LocalDate{field}.getDayNum()}; };
else if (which.isDateTime())
parser = [nested](std::string & field) -> Field
{
ReadBufferFromString in(field);
time_t time = 0;
readDateTimeText(time, in, assert_cast<const DataTypeDateTime *>(nested.get())->getTimeZone());
return time;
};
else if (which.isDecimal32())
parser = [nested](std::string & field) -> Field
{
const auto & type = typeid_cast<const DataTypeDecimal<Decimal32> *>(nested.get());
DataTypeDecimal<Decimal32> res(getDecimalPrecision(*type), getDecimalScale(*type));
return convertFieldToType(field, res);
};
else if (which.isDecimal64())
parser = [nested](std::string & field) -> Field
{
const auto & type = typeid_cast<const DataTypeDecimal<Decimal64> *>(nested.get());
DataTypeDecimal<Decimal64> res(getDecimalPrecision(*type), getDecimalScale(*type));
return convertFieldToType(field, res);
};
else if (which.isDecimal128())
parser = [nested](std::string & field) -> Field
{
const auto & type = typeid_cast<const DataTypeDecimal<Decimal128> *>(nested.get());
DataTypeDecimal<Decimal128> res(getDecimalPrecision(*type), getDecimalScale(*type));
return convertFieldToType(field, res);
};
else if (which.isDecimal256())
parser = [nested](std::string & field) -> Field
{
const auto & type = typeid_cast<const DataTypeDecimal<Decimal256> *>(nested.get());
DataTypeDecimal<Decimal256> res(getDecimalPrecision(*type), getDecimalScale(*type));
return convertFieldToType(field, res);
};
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Type conversion to {} is not supported", nested->getName());
array_info[column_idx] = {count_dimensions, default_value, parser};
}
}
#endif

View File

@ -0,0 +1,38 @@
#pragma once
#if !defined(ARCADIA_BUILD)
#include "config_core.h"
#endif
#if USE_LIBPQXX
#include <Core/Block.h>
#include <DataStreams/IBlockInputStream.h>
#include <Core/ExternalResultDescription.h>
#include <Core/Field.h>
namespace DB
{
struct PostgreSQLArrayInfo
{
size_t num_dimensions;
Field default_value;
std::function<Field(std::string & field)> pqxx_parser;
};
void insertPostgreSQLValue(
IColumn & column, std::string_view value,
const ExternalResultDescription::ValueType type, const DataTypePtr data_type,
std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t idx);
void preparePostgreSQLArrayInfo(
std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t column_idx, const DataTypePtr data_type);
void insertDefaultPostgreSQLValue(IColumn & column, const IColumn & sample_column);
}
#endif

View File

@ -93,6 +93,7 @@ class IColumn;
M(Bool, distributed_directory_monitor_split_batch_on_failure, false, "Should StorageDistributed DirectoryMonitors try to split batch into smaller in case of failures.", 0) \
\
M(Bool, optimize_move_to_prewhere, true, "Allows disabling WHERE to PREWHERE optimization in SELECT queries from MergeTree.", 0) \
M(Bool, optimize_move_to_prewhere_if_final, false, "If query has `FINAL`, the optimization `move_to_prewhere` is not always correct and it is enabled only if both settings `optimize_move_to_prewhere` and `optimize_move_to_prewhere_if_final` are turned on", 0) \
\
M(UInt64, replication_alter_partitions_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) \
M(UInt64, replication_alter_columns_timeout, 60, "Wait for actions to change the table structure within the specified number of seconds. 0 - wait unlimited time.", 0) \
@ -106,6 +107,8 @@ class IColumn;
M(Bool, allow_suspicious_low_cardinality_types, false, "In CREATE TABLE statement allows specifying LowCardinality modifier for types of small fixed size (8 or less). Enabling this may increase merge times and memory consumption.", 0) \
M(Bool, compile_expressions, true, "Compile some scalar functions and operators to native code.", 0) \
M(UInt64, min_count_to_compile_expression, 3, "The number of identical expressions before they are JIT-compiled", 0) \
M(Bool, compile_aggregate_expressions, true, "Compile aggregate functions to native code.", 0) \
M(UInt64, min_count_to_compile_aggregate_expression, 0, "The number of identical aggreagte expressions before they are JIT-compiled", 0) \
M(UInt64, group_by_two_level_threshold, 100000, "From what number of keys, a two-level aggregation starts. 0 - the threshold is not set.", 0) \
M(UInt64, group_by_two_level_threshold_bytes, 50000000, "From what size of the aggregation state in bytes, a two-level aggregation begins to be used. 0 - the threshold is not set. Two-level aggregation is used when at least one of the thresholds is triggered.", 0) \
M(Bool, distributed_aggregation_memory_efficient, true, "Is the memory-saving mode of distributed aggregation enabled.", 0) \
@ -429,6 +432,7 @@ class IColumn;
M(Bool, cast_keep_nullable, false, "CAST operator keep Nullable for result data type", 0) \
M(Bool, alter_partition_verbose_result, false, "Output information about affected parts. Currently works only for FREEZE and ATTACH commands.", 0) \
M(Bool, allow_experimental_database_materialize_mysql, false, "Allow to create database with Engine=MaterializeMySQL(...).", 0) \
M(Bool, allow_experimental_database_materialized_postgresql, false, "Allow to create database with Engine=MaterializedPostgreSQL(...).", 0) \
M(Bool, system_events_show_zero_values, false, "Include all metrics, even with zero values", 0) \
M(MySQLDataTypesSupport, mysql_datatypes_support_level, 0, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \
M(Bool, optimize_trivial_insert_select, true, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \
@ -445,7 +449,6 @@ class IColumn;
M(Bool, allow_experimental_window_functions, false, "Allow experimental window functions", 0) \
M(Bool, allow_experimental_projection_optimization, false, "Enable projection optimization when processing SELECT queries", 0) \
M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \
M(Bool, use_antlr_parser, false, "Parse incoming queries using ANTLR-generated experimental parser", 0) \
M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \
M(Bool, insert_null_as_default, true, "Insert DEFAULT values instead of NULL in INSERT SELECT (UNION ALL)", 0) \
\

View File

@ -22,12 +22,9 @@
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
PostgreSQLBlockInputStream::PostgreSQLBlockInputStream(
template<typename T>
PostgreSQLBlockInputStream<T>::PostgreSQLBlockInputStream(
postgres::ConnectionHolderPtr connection_holder_,
const std::string & query_str_,
const Block & sample_block,
@ -35,25 +32,52 @@ PostgreSQLBlockInputStream::PostgreSQLBlockInputStream(
: query_str(query_str_)
, max_block_size(max_block_size_)
, connection_holder(std::move(connection_holder_))
{
init(sample_block);
}
template<typename T>
PostgreSQLBlockInputStream<T>::PostgreSQLBlockInputStream(
std::shared_ptr<T> tx_,
const std::string & query_str_,
const Block & sample_block,
const UInt64 max_block_size_,
bool auto_commit_)
: query_str(query_str_)
, tx(std::move(tx_))
, max_block_size(max_block_size_)
, auto_commit(auto_commit_)
{
init(sample_block);
}
template<typename T>
void PostgreSQLBlockInputStream<T>::init(const Block & sample_block)
{
description.init(sample_block);
for (const auto idx : collections::range(0, description.sample_block.columns()))
if (description.types[idx].first == ValueType::vtArray)
prepareArrayInfo(idx, description.sample_block.getByPosition(idx).type);
if (description.types[idx].first == ExternalResultDescription::ValueType::vtArray)
preparePostgreSQLArrayInfo(array_info, idx, description.sample_block.getByPosition(idx).type);
/// pqxx::stream_from uses COPY command, will get error if ';' is present
if (query_str.ends_with(';'))
query_str.resize(query_str.size() - 1);
}
void PostgreSQLBlockInputStream::readPrefix()
template<typename T>
void PostgreSQLBlockInputStream<T>::readPrefix()
{
tx = std::make_unique<pqxx::read_transaction>(connection_holder->get());
tx = std::make_shared<T>(connection_holder->get());
stream = std::make_unique<pqxx::stream_from>(*tx, pqxx::from_query, std::string_view(query_str));
}
Block PostgreSQLBlockInputStream::readImpl()
template<typename T>
Block PostgreSQLBlockInputStream<T>::readImpl()
{
/// Check if pqxx::stream_from is finished
if (!stream || !(*stream))
@ -81,17 +105,22 @@ Block PostgreSQLBlockInputStream::readImpl()
{
ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[idx]);
const auto & data_type = assert_cast<const DataTypeNullable &>(*sample.type);
insertValue(column_nullable.getNestedColumn(), (*row)[idx], description.types[idx].first, data_type.getNestedType(), idx);
insertPostgreSQLValue(
column_nullable.getNestedColumn(), (*row)[idx],
description.types[idx].first, data_type.getNestedType(), array_info, idx);
column_nullable.getNullMapData().emplace_back(0);
}
else
{
insertValue(*columns[idx], (*row)[idx], description.types[idx].first, sample.type, idx);
insertPostgreSQLValue(
*columns[idx], (*row)[idx], description.types[idx].first, sample.type, array_info, idx);
}
}
else
{
insertDefaultValue(*columns[idx], *sample.column);
insertDefaultPostgreSQLValue(*columns[idx], *sample.column);
}
}
@ -104,216 +133,23 @@ Block PostgreSQLBlockInputStream::readImpl()
}
void PostgreSQLBlockInputStream::readSuffix()
template<typename T>
void PostgreSQLBlockInputStream<T>::readSuffix()
{
if (stream)
{
stream->complete();
tx->commit();
if (auto_commit)
tx->commit();
}
}
template
class PostgreSQLBlockInputStream<pqxx::ReplicationTransaction>;
void PostgreSQLBlockInputStream::insertValue(IColumn & column, std::string_view value,
const ExternalResultDescription::ValueType type, const DataTypePtr data_type, size_t idx)
{
switch (type)
{
case ValueType::vtUInt8:
{
if (value == "t")
assert_cast<ColumnUInt8 &>(column).insertValue(1);
else if (value == "f")
assert_cast<ColumnUInt8 &>(column).insertValue(0);
else
assert_cast<ColumnUInt8 &>(column).insertValue(pqxx::from_string<uint16_t>(value));
break;
}
case ValueType::vtUInt16:
assert_cast<ColumnUInt16 &>(column).insertValue(pqxx::from_string<uint16_t>(value));
break;
case ValueType::vtUInt32:
assert_cast<ColumnUInt32 &>(column).insertValue(pqxx::from_string<uint32_t>(value));
break;
case ValueType::vtUInt64:
assert_cast<ColumnUInt64 &>(column).insertValue(pqxx::from_string<uint64_t>(value));
break;
case ValueType::vtInt8:
assert_cast<ColumnInt8 &>(column).insertValue(pqxx::from_string<int16_t>(value));
break;
case ValueType::vtInt16:
assert_cast<ColumnInt16 &>(column).insertValue(pqxx::from_string<int16_t>(value));
break;
case ValueType::vtInt32:
assert_cast<ColumnInt32 &>(column).insertValue(pqxx::from_string<int32_t>(value));
break;
case ValueType::vtInt64:
assert_cast<ColumnInt64 &>(column).insertValue(pqxx::from_string<int64_t>(value));
break;
case ValueType::vtFloat32:
assert_cast<ColumnFloat32 &>(column).insertValue(pqxx::from_string<float>(value));
break;
case ValueType::vtFloat64:
assert_cast<ColumnFloat64 &>(column).insertValue(pqxx::from_string<double>(value));
break;
case ValueType::vtFixedString:[[fallthrough]];
case ValueType::vtEnum8:
case ValueType::vtEnum16:
case ValueType::vtString:
assert_cast<ColumnString &>(column).insertData(value.data(), value.size());
break;
case ValueType::vtUUID:
assert_cast<ColumnUUID &>(column).insert(parse<UUID>(value.data(), value.size()));
break;
case ValueType::vtDate:
assert_cast<ColumnUInt16 &>(column).insertValue(UInt16{LocalDate{std::string(value)}.getDayNum()});
break;
case ValueType::vtDateTime:
{
ReadBufferFromString in(value);
time_t time = 0;
readDateTimeText(time, in, assert_cast<const DataTypeDateTime *>(data_type.get())->getTimeZone());
if (time < 0)
time = 0;
assert_cast<ColumnUInt32 &>(column).insertValue(time);
break;
}
case ValueType::vtDateTime64:[[fallthrough]];
case ValueType::vtDecimal32: [[fallthrough]];
case ValueType::vtDecimal64: [[fallthrough]];
case ValueType::vtDecimal128: [[fallthrough]];
case ValueType::vtDecimal256:
{
ReadBufferFromString istr(value);
data_type->getDefaultSerialization()->deserializeWholeText(column, istr, FormatSettings{});
break;
}
case ValueType::vtArray:
{
pqxx::array_parser parser{value};
std::pair<pqxx::array_parser::juncture, std::string> parsed = parser.get_next();
size_t dimension = 0, max_dimension = 0, expected_dimensions = array_info[idx].num_dimensions;
const auto parse_value = array_info[idx].pqxx_parser;
std::vector<Row> dimensions(expected_dimensions + 1);
while (parsed.first != pqxx::array_parser::juncture::done)
{
if ((parsed.first == pqxx::array_parser::juncture::row_start) && (++dimension > expected_dimensions))
throw Exception("Got more dimensions than expected", ErrorCodes::BAD_ARGUMENTS);
else if (parsed.first == pqxx::array_parser::juncture::string_value)
dimensions[dimension].emplace_back(parse_value(parsed.second));
else if (parsed.first == pqxx::array_parser::juncture::null_value)
dimensions[dimension].emplace_back(array_info[idx].default_value);
else if (parsed.first == pqxx::array_parser::juncture::row_end)
{
max_dimension = std::max(max_dimension, dimension);
--dimension;
if (dimension == 0)
break;
dimensions[dimension].emplace_back(Array(dimensions[dimension + 1].begin(), dimensions[dimension + 1].end()));
dimensions[dimension + 1].clear();
}
parsed = parser.get_next();
}
if (max_dimension < expected_dimensions)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Got less dimensions than expected. ({} instead of {})", max_dimension, expected_dimensions);
assert_cast<ColumnArray &>(column).insert(Array(dimensions[1].begin(), dimensions[1].end()));
break;
}
}
}
void PostgreSQLBlockInputStream::prepareArrayInfo(size_t column_idx, const DataTypePtr data_type)
{
const auto * array_type = typeid_cast<const DataTypeArray *>(data_type.get());
auto nested = array_type->getNestedType();
size_t count_dimensions = 1;
while (isArray(nested))
{
++count_dimensions;
nested = typeid_cast<const DataTypeArray *>(nested.get())->getNestedType();
}
Field default_value = nested->getDefault();
if (nested->isNullable())
nested = static_cast<const DataTypeNullable *>(nested.get())->getNestedType();
WhichDataType which(nested);
std::function<Field(std::string & fields)> parser;
if (which.isUInt8() || which.isUInt16())
parser = [](std::string & field) -> Field { return pqxx::from_string<uint16_t>(field); };
else if (which.isInt8() || which.isInt16())
parser = [](std::string & field) -> Field { return pqxx::from_string<int16_t>(field); };
else if (which.isUInt32())
parser = [](std::string & field) -> Field { return pqxx::from_string<uint32_t>(field); };
else if (which.isInt32())
parser = [](std::string & field) -> Field { return pqxx::from_string<int32_t>(field); };
else if (which.isUInt64())
parser = [](std::string & field) -> Field { return pqxx::from_string<uint64_t>(field); };
else if (which.isInt64())
parser = [](std::string & field) -> Field { return pqxx::from_string<int64_t>(field); };
else if (which.isFloat32())
parser = [](std::string & field) -> Field { return pqxx::from_string<float>(field); };
else if (which.isFloat64())
parser = [](std::string & field) -> Field { return pqxx::from_string<double>(field); };
else if (which.isString() || which.isFixedString())
parser = [](std::string & field) -> Field { return field; };
else if (which.isDate())
parser = [](std::string & field) -> Field { return UInt16{LocalDate{field}.getDayNum()}; };
else if (which.isDateTime())
parser = [nested](std::string & field) -> Field
{
ReadBufferFromString in(field);
time_t time = 0;
readDateTimeText(time, in, assert_cast<const DataTypeDateTime *>(nested.get())->getTimeZone());
return time;
};
else if (which.isDecimal32())
parser = [nested](std::string & field) -> Field
{
const auto & type = typeid_cast<const DataTypeDecimal<Decimal32> *>(nested.get());
DataTypeDecimal<Decimal32> res(getDecimalPrecision(*type), getDecimalScale(*type));
return convertFieldToType(field, res);
};
else if (which.isDecimal64())
parser = [nested](std::string & field) -> Field
{
const auto & type = typeid_cast<const DataTypeDecimal<Decimal64> *>(nested.get());
DataTypeDecimal<Decimal64> res(getDecimalPrecision(*type), getDecimalScale(*type));
return convertFieldToType(field, res);
};
else if (which.isDecimal128())
parser = [nested](std::string & field) -> Field
{
const auto & type = typeid_cast<const DataTypeDecimal<Decimal128> *>(nested.get());
DataTypeDecimal<Decimal128> res(getDecimalPrecision(*type), getDecimalScale(*type));
return convertFieldToType(field, res);
};
else if (which.isDecimal256())
parser = [nested](std::string & field) -> Field
{
const auto & type = typeid_cast<const DataTypeDecimal<Decimal256> *>(nested.get());
DataTypeDecimal<Decimal256> res(getDecimalPrecision(*type), getDecimalScale(*type));
return convertFieldToType(field, res);
};
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Type conversion to {} is not supported", nested->getName());
array_info[column_idx] = {count_dimensions, default_value, parser};
}
template
class PostgreSQLBlockInputStream<pqxx::ReadTransaction>;
}

View File

@ -9,54 +9,76 @@
#include <DataStreams/IBlockInputStream.h>
#include <Core/ExternalResultDescription.h>
#include <Core/Field.h>
#include <Storages/PostgreSQL/ConnectionHolder.h>
#include <Core/PostgreSQL/insertPostgreSQLValue.h>
#include <Core/PostgreSQL/ConnectionHolder.h>
#include <Core/PostgreSQL/Utils.h>
namespace DB
{
template <typename T = pqxx::ReadTransaction>
class PostgreSQLBlockInputStream : public IBlockInputStream
{
public:
PostgreSQLBlockInputStream(
postgres::ConnectionHolderPtr connection_holder_,
const std::string & query_str,
const String & query_str_,
const Block & sample_block,
const UInt64 max_block_size_);
String getName() const override { return "PostgreSQL"; }
Block getHeader() const override { return description.sample_block.cloneEmpty(); }
private:
using ValueType = ExternalResultDescription::ValueType;
void readPrefix() override;
protected:
PostgreSQLBlockInputStream(
std::shared_ptr<T> tx_,
const std::string & query_str_,
const Block & sample_block,
const UInt64 max_block_size_,
bool auto_commit_);
String query_str;
std::shared_ptr<T> tx;
std::unique_ptr<pqxx::stream_from> stream;
private:
Block readImpl() override;
void readSuffix() override;
void insertValue(IColumn & column, std::string_view value,
const ExternalResultDescription::ValueType type, const DataTypePtr data_type, size_t idx);
void insertDefaultValue(IColumn & column, const IColumn & sample_column)
{
column.insertFrom(sample_column, 0);
}
void prepareArrayInfo(size_t column_idx, const DataTypePtr data_type);
void init(const Block & sample_block);
String query_str;
const UInt64 max_block_size;
bool auto_commit = true;
ExternalResultDescription description;
postgres::ConnectionHolderPtr connection_holder;
std::unique_ptr<pqxx::read_transaction> tx;
std::unique_ptr<pqxx::stream_from> stream;
struct ArrayInfo
std::unordered_map<size_t, PostgreSQLArrayInfo> array_info;
};
/// Passes transaction object into PostgreSQLBlockInputStream and does not close transaction after read is finished.
template <typename T>
class PostgreSQLTransactionBlockInputStream : public PostgreSQLBlockInputStream<T>
{
public:
using Base = PostgreSQLBlockInputStream<T>;
PostgreSQLTransactionBlockInputStream(
std::shared_ptr<T> tx_,
const std::string & query_str_,
const Block & sample_block_,
const UInt64 max_block_size_)
: PostgreSQLBlockInputStream<T>(tx_, query_str_, sample_block_, max_block_size_, false) {}
void readPrefix() override
{
size_t num_dimensions;
Field default_value;
std::function<Field(std::string & field)> pqxx_parser;
};
std::unordered_map<size_t, ArrayInfo> array_info;
Base::stream = std::make_unique<pqxx::stream_from>(*Base::tx, pqxx::from_query, std::string_view(Base::query_str));
}
};
}

View File

@ -33,7 +33,8 @@ TTLAggregationAlgorithm::TTLAggregationAlgorithm(
Aggregator::Params params(header, keys, aggregates,
false, settings.max_rows_to_group_by, settings.group_by_overflow_mode, 0, 0,
settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set,
storage_.getContext()->getTemporaryVolume(), settings.max_threads, settings.min_free_disk_space_for_temporary_data);
storage_.getContext()->getTemporaryVolume(), settings.max_threads, settings.min_free_disk_space_for_temporary_data,
settings.compile_aggregate_expressions, settings.min_count_to_compile_aggregate_expression);
aggregator = std::make_unique<Aggregator>(params);
}

View File

@ -357,6 +357,7 @@ inline bool isTuple(const DataTypePtr & data_type) { return WhichDataType(data_t
inline bool isArray(const DataTypePtr & data_type) { return WhichDataType(data_type).isArray(); }
inline bool isMap(const DataTypePtr & data_type) { return WhichDataType(data_type).isMap(); }
inline bool isNothing(const DataTypePtr & data_type) { return WhichDataType(data_type).isNothing(); }
inline bool isUUID(const DataTypePtr & data_type) { return WhichDataType(data_type).isUUID(); }
template <typename T>
inline bool isUInt8(const T & data_type)

View File

@ -61,6 +61,44 @@ static inline llvm::Type * toNativeType(llvm::IRBuilderBase & builder, const IDa
return nullptr;
}
template <typename ToType>
static inline llvm::Type * toNativeType(llvm::IRBuilderBase & builder)
{
if constexpr (std::is_same_v<ToType, Int8> || std::is_same_v<ToType, UInt8>)
return builder.getInt8Ty();
else if constexpr (std::is_same_v<ToType, Int16> || std::is_same_v<ToType, UInt16>)
return builder.getInt16Ty();
else if constexpr (std::is_same_v<ToType, Int32> || std::is_same_v<ToType, UInt32>)
return builder.getInt32Ty();
else if constexpr (std::is_same_v<ToType, Int64> || std::is_same_v<ToType, UInt64>)
return builder.getInt64Ty();
else if constexpr (std::is_same_v<ToType, Float32>)
return builder.getFloatTy();
else if constexpr (std::is_same_v<ToType, Float64>)
return builder.getDoubleTy();
return nullptr;
}
template <typename Type>
static inline bool canBeNativeType()
{
if constexpr (std::is_same_v<Type, Int8> || std::is_same_v<Type, UInt8>)
return true;
else if constexpr (std::is_same_v<Type, Int16> || std::is_same_v<Type, UInt16>)
return true;
else if constexpr (std::is_same_v<Type, Int32> || std::is_same_v<Type, UInt32>)
return true;
else if constexpr (std::is_same_v<Type, Int64> || std::is_same_v<Type, UInt64>)
return true;
else if constexpr (std::is_same_v<Type, Float32>)
return true;
else if constexpr (std::is_same_v<Type, Float64>)
return true;
return false;
}
static inline bool canBeNativeType(const IDataType & type)
{
WhichDataType data_type(type);
@ -79,40 +117,62 @@ static inline llvm::Type * toNativeType(llvm::IRBuilderBase & builder, const Dat
return toNativeType(builder, *type);
}
static inline llvm::Value * nativeBoolCast(llvm::IRBuilder<> & b, const DataTypePtr & from, llvm::Value * value)
static inline llvm::Value * nativeBoolCast(llvm::IRBuilder<> & b, const DataTypePtr & from_type, llvm::Value * value)
{
if (from->isNullable())
if (from_type->isNullable())
{
auto * inner = nativeBoolCast(b, removeNullable(from), b.CreateExtractValue(value, {0}));
auto * inner = nativeBoolCast(b, removeNullable(from_type), b.CreateExtractValue(value, {0}));
return b.CreateAnd(b.CreateNot(b.CreateExtractValue(value, {1})), inner);
}
auto * zero = llvm::Constant::getNullValue(value->getType());
if (value->getType()->isIntegerTy())
return b.CreateICmpNE(value, zero);
if (value->getType()->isFloatingPointTy())
return b.CreateFCmpONE(value, zero); /// QNaN is false
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot cast non-number {} to bool", from->getName());
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot cast non-number {} to bool", from_type->getName());
}
static inline llvm::Value * nativeCast(llvm::IRBuilder<> & b, const DataTypePtr & from, llvm::Value * value, llvm::Type * to)
static inline llvm::Value * nativeCast(llvm::IRBuilder<> & b, const DataTypePtr & from, llvm::Value * value, llvm::Type * to_type)
{
auto * n_from = value->getType();
auto * from_type = value->getType();
if (n_from == to)
if (from_type == to_type)
return value;
else if (n_from->isIntegerTy() && to->isFloatingPointTy())
return typeIsSigned(*from) ? b.CreateSIToFP(value, to) : b.CreateUIToFP(value, to);
else if (n_from->isFloatingPointTy() && to->isIntegerTy())
return typeIsSigned(*from) ? b.CreateFPToSI(value, to) : b.CreateFPToUI(value, to);
else if (n_from->isIntegerTy() && to->isIntegerTy())
return b.CreateIntCast(value, to, typeIsSigned(*from));
else if (n_from->isFloatingPointTy() && to->isFloatingPointTy())
return b.CreateFPCast(value, to);
else if (from_type->isIntegerTy() && to_type->isFloatingPointTy())
return typeIsSigned(*from) ? b.CreateSIToFP(value, to_type) : b.CreateUIToFP(value, to_type);
else if (from_type->isFloatingPointTy() && to_type->isIntegerTy())
return typeIsSigned(*from) ? b.CreateFPToSI(value, to_type) : b.CreateFPToUI(value, to_type);
else if (from_type->isIntegerTy() && to_type->isIntegerTy())
return b.CreateIntCast(value, to_type, typeIsSigned(*from));
else if (from_type->isFloatingPointTy() && to_type->isFloatingPointTy())
return b.CreateFPCast(value, to_type);
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot cast {} to requested type", from->getName());
}
template <typename FromType>
static inline llvm::Value * nativeCast(llvm::IRBuilder<> & b, llvm::Value * value, llvm::Type * to_type)
{
auto * from_type = value->getType();
static constexpr bool from_type_is_signed = std::numeric_limits<FromType>::is_signed;
if (from_type == to_type)
return value;
else if (from_type->isIntegerTy() && to_type->isFloatingPointTy())
return from_type_is_signed ? b.CreateSIToFP(value, to_type) : b.CreateUIToFP(value, to_type);
else if (from_type->isFloatingPointTy() && to_type->isIntegerTy())
return from_type_is_signed ? b.CreateFPToSI(value, to_type) : b.CreateFPToUI(value, to_type);
else if (from_type->isIntegerTy() && to_type->isIntegerTy())
return b.CreateIntCast(value, to_type, from_type_is_signed);
else if (from_type->isFloatingPointTy() && to_type->isFloatingPointTy())
return b.CreateFPCast(value, to_type);
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot cast {} to requested type", TypeName<FromType>);
}
static inline llvm::Value * nativeCast(llvm::IRBuilder<> & b, const DataTypePtr & from, llvm::Value * value, const DataTypePtr & to)
{
auto * n_to = toNativeType(b, to);
@ -139,6 +199,37 @@ static inline llvm::Value * nativeCast(llvm::IRBuilder<> & b, const DataTypePtr
return nativeCast(b, from, value, n_to);
}
static inline std::pair<llvm::Value *, llvm::Value *> nativeCastToCommon(llvm::IRBuilder<> & b, const DataTypePtr & lhs_type, llvm::Value * lhs, const DataTypePtr & rhs_type, llvm::Value * rhs)
{
llvm::Type * common;
bool lhs_is_signed = typeIsSigned(*lhs_type);
bool rhs_is_signed = typeIsSigned(*rhs_type);
if (lhs->getType()->isIntegerTy() && rhs->getType()->isIntegerTy())
{
/// if one integer has a sign bit, make sure the other does as well. llvm generates optimal code
/// (e.g. uses overflow flag on x86) for (word size + 1)-bit integer operations.
size_t lhs_bit_width = lhs->getType()->getIntegerBitWidth() + (!lhs_is_signed && rhs_is_signed);
size_t rhs_bit_width = rhs->getType()->getIntegerBitWidth() + (!rhs_is_signed && lhs_is_signed);
size_t max_bit_width = std::max(lhs_bit_width, rhs_bit_width);
common = b.getIntNTy(max_bit_width);
}
else
{
/// TODO: Check
/// (double, float) or (double, int_N where N <= double's mantissa width) -> double
common = b.getDoubleTy();
}
auto * cast_lhs_to_common = nativeCast(b, lhs_type, lhs, common);
auto * cast_rhs_to_common = nativeCast(b, rhs_type, rhs, common);
return std::make_pair(cast_lhs_to_common, cast_rhs_to_common);
}
static inline llvm::Constant * getColumnNativeValue(llvm::IRBuilderBase & builder, const DataTypePtr & column_type, const IColumn & column, size_t index)
{
if (const auto * constant = typeid_cast<const ColumnConst *>(&column))

View File

@ -81,7 +81,7 @@ void SerializationMap::deserializeBinary(IColumn & column, ReadBuffer & istr) co
template <typename Writer>
void SerializationMap::serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, Writer && writer) const
void SerializationMap::serializeTextImpl(const IColumn & column, size_t row_num, bool quote_key, WriteBuffer & ostr, Writer && writer) const
{
const auto & column_map = assert_cast<const ColumnMap &>(column);
@ -97,7 +97,16 @@ void SerializationMap::serializeTextImpl(const IColumn & column, size_t row_num,
{
if (i != offset)
writeChar(',', ostr);
writer(key, nested_tuple.getColumn(0), i);
if (quote_key)
{
writeChar('"', ostr);
writer(key, nested_tuple.getColumn(0), i);
writeChar('"', ostr);
}
else
writer(key, nested_tuple.getColumn(0), i);
writeChar(':', ostr);
writer(value, nested_tuple.getColumn(1), i);
}
@ -161,7 +170,7 @@ void SerializationMap::deserializeTextImpl(IColumn & column, ReadBuffer & istr,
void SerializationMap::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeTextImpl(column, row_num, ostr,
serializeTextImpl(column, row_num, /*quote_key=*/ false, ostr,
[&](const SerializationPtr & subcolumn_serialization, const IColumn & subcolumn, size_t pos)
{
subcolumn_serialization->serializeTextQuoted(subcolumn, pos, ostr, settings);
@ -170,7 +179,6 @@ void SerializationMap::serializeText(const IColumn & column, size_t row_num, Wri
void SerializationMap::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeTextImpl(column, istr,
[&](const SerializationPtr & subcolumn_serialization, IColumn & subcolumn)
{
@ -178,10 +186,13 @@ void SerializationMap::deserializeText(IColumn & column, ReadBuffer & istr, cons
});
}
void SerializationMap::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeTextImpl(column, row_num, ostr,
/// We need to double-quote integer keys to produce valid JSON.
const auto & column_key = assert_cast<const ColumnMap &>(column).getNestedData().getColumn(0);
bool quote_key = !WhichDataType(column_key.getDataType()).isStringOrFixedString();
serializeTextImpl(column, row_num, quote_key, ostr,
[&](const SerializationPtr & subcolumn_serialization, const IColumn & subcolumn, size_t pos)
{
subcolumn_serialization->serializeTextJSON(subcolumn, pos, ostr, settings);

View File

@ -61,7 +61,7 @@ public:
private:
template <typename Writer>
void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, Writer && writer) const;
void serializeTextImpl(const IColumn & column, size_t row_num, bool quote_key, WriteBuffer & ostr, Writer && writer) const;
template <typename Reader>
void deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && reader) const;

View File

@ -109,12 +109,11 @@ StoragePtr DatabaseAtomic::detachTable(const String & name)
void DatabaseAtomic::dropTable(ContextPtr local_context, const String & table_name, bool no_delay)
{
if (auto * mv = dynamic_cast<StorageMaterializedView *>(tryGetTable(table_name, local_context).get()))
{
/// Remove the inner table (if any) to avoid deadlock
/// (due to attempt to execute DROP from the worker thread)
mv->dropInnerTable(no_delay, local_context);
}
auto storage = tryGetTable(table_name, local_context);
/// Remove the inner table (if any) to avoid deadlock
/// (due to attempt to execute DROP from the worker thread)
if (storage)
storage->dropInnerTableIfAny(no_delay, local_context);
String table_metadata_path = getObjectMetadataPath(table_name);
String table_metadata_path_drop;
@ -568,4 +567,3 @@ void DatabaseAtomic::checkDetachedTableNotInUse(const UUID & uuid)
}
}

View File

@ -36,7 +36,8 @@
#if USE_LIBPQXX
#include <Databases/PostgreSQL/DatabasePostgreSQL.h> // Y_IGNORE
#include <Storages/PostgreSQL/PoolWithFailover.h>
#include <Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h>
#include <Storages/PostgreSQL/MaterializedPostgreSQLSettings.h>
#endif
namespace fs = std::filesystem;
@ -99,14 +100,14 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
const UUID & uuid = create.uuid;
bool engine_may_have_arguments = engine_name == "MySQL" || engine_name == "MaterializeMySQL" || engine_name == "Lazy" ||
engine_name == "Replicated" || engine_name == "PostgreSQL";
engine_name == "Replicated" || engine_name == "PostgreSQL" || engine_name == "MaterializedPostgreSQL";
if (engine_define->engine->arguments && !engine_may_have_arguments)
throw Exception("Database engine " + engine_name + " cannot have arguments", ErrorCodes::BAD_ARGUMENTS);
bool has_unexpected_element = engine_define->engine->parameters || engine_define->partition_by ||
engine_define->primary_key || engine_define->order_by ||
engine_define->sample_by;
bool may_have_settings = endsWith(engine_name, "MySQL") || engine_name == "Replicated";
bool may_have_settings = endsWith(engine_name, "MySQL") || engine_name == "Replicated" || engine_name == "MaterializedPostgreSQL";
if (has_unexpected_element || (!may_have_settings && engine_define->settings))
throw Exception("Database engine " + engine_name + " cannot have parameters, primary_key, order_by, sample_by, settings",
ErrorCodes::UNKNOWN_ELEMENT_IN_AST);
@ -262,6 +263,41 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
return std::make_shared<DatabasePostgreSQL>(
context, metadata_path, engine_define, database_name, postgres_database_name, connection_pool, use_table_cache);
}
else if (engine_name == "MaterializedPostgreSQL")
{
const ASTFunction * engine = engine_define->engine;
if (!engine->arguments || engine->arguments->children.size() != 4)
{
throw Exception(
fmt::format("{} Database require host:port, database_name, username, password arguments ", engine_name),
ErrorCodes::BAD_ARGUMENTS);
}
ASTs & engine_args = engine->arguments->children;
for (auto & engine_arg : engine_args)
engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, context);
const auto & host_port = safeGetLiteralValue<String>(engine_args[0], engine_name);
const auto & postgres_database_name = safeGetLiteralValue<String>(engine_args[1], engine_name);
const auto & username = safeGetLiteralValue<String>(engine_args[2], engine_name);
const auto & password = safeGetLiteralValue<String>(engine_args[3], engine_name);
auto parsed_host_port = parseAddress(host_port, 5432);
auto connection_info = postgres::formatConnectionString(postgres_database_name, parsed_host_port.first, parsed_host_port.second, username, password);
auto postgresql_replica_settings = std::make_unique<MaterializedPostgreSQLSettings>();
if (engine_define->settings)
postgresql_replica_settings->loadFromQuery(*engine_define);
return std::make_shared<DatabaseMaterializedPostgreSQL>(
context, metadata_path, uuid, engine_define, create.attach,
database_name, postgres_database_name, connection_info,
std::move(postgresql_replica_settings));
}
#endif

View File

@ -0,0 +1,215 @@
#include <Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h>
#if USE_LIBPQXX
#include <Storages/PostgreSQL/StorageMaterializedPostgreSQL.h>
#include <Databases/PostgreSQL/fetchPostgreSQLTableStructure.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeArray.h>
#include <Databases/DatabaseOrdinary.h>
#include <Databases/DatabaseAtomic.h>
#include <Storages/StoragePostgreSQL.h>
#include <Interpreters/Context.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
#include <Common/escapeForFileName.h>
#include <Poco/DirectoryIterator.h>
#include <Poco/File.h>
#include <Common/Macros.h>
#include <common/logger_useful.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
extern const int LOGICAL_ERROR;
}
DatabaseMaterializedPostgreSQL::DatabaseMaterializedPostgreSQL(
ContextPtr context_,
const String & metadata_path_,
UUID uuid_,
const ASTStorage * database_engine_define_,
bool is_attach_,
const String & database_name_,
const String & postgres_database_name,
const postgres::ConnectionInfo & connection_info_,
std::unique_ptr<MaterializedPostgreSQLSettings> settings_)
: DatabaseAtomic(database_name_, metadata_path_, uuid_, "DatabaseMaterializedPostgreSQL (" + database_name_ + ")", context_)
, database_engine_define(database_engine_define_->clone())
, is_attach(is_attach_)
, remote_database_name(postgres_database_name)
, connection_info(connection_info_)
, settings(std::move(settings_))
{
}
void DatabaseMaterializedPostgreSQL::startSynchronization()
{
replication_handler = std::make_unique<PostgreSQLReplicationHandler>(
/* replication_identifier */database_name,
remote_database_name,
database_name,
connection_info,
getContext(),
is_attach,
settings->materialized_postgresql_max_block_size.value,
settings->materialized_postgresql_allow_automatic_update,
/* is_materialized_postgresql_database = */ true,
settings->materialized_postgresql_tables_list.value);
postgres::Connection connection(connection_info);
NameSet tables_to_replicate;
try
{
tables_to_replicate = replication_handler->fetchRequiredTables(connection);
}
catch (...)
{
LOG_ERROR(log, "Unable to load replicated tables list");
throw;
}
if (tables_to_replicate.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty list of tables to replicate");
for (const auto & table_name : tables_to_replicate)
{
/// Check nested ReplacingMergeTree table.
auto storage = DatabaseAtomic::tryGetTable(table_name, getContext());
if (storage)
{
/// Nested table was already created and synchronized.
storage = StorageMaterializedPostgreSQL::create(storage, getContext());
}
else
{
/// Nested table does not exist and will be created by replication thread.
storage = StorageMaterializedPostgreSQL::create(StorageID(database_name, table_name), getContext());
}
/// Cache MaterializedPostgreSQL wrapper over nested table.
materialized_tables[table_name] = storage;
/// Let replication thread know, which tables it needs to keep in sync.
replication_handler->addStorage(table_name, storage->as<StorageMaterializedPostgreSQL>());
}
LOG_TRACE(log, "Loaded {} tables. Starting synchronization", materialized_tables.size());
replication_handler->startup();
}
void DatabaseMaterializedPostgreSQL::loadStoredObjects(ContextMutablePtr local_context, bool has_force_restore_data_flag, bool force_attach)
{
DatabaseAtomic::loadStoredObjects(local_context, has_force_restore_data_flag, force_attach);
try
{
startSynchronization();
}
catch (...)
{
tryLogCurrentException(log, "Cannot load nested database objects for PostgreSQL database engine.");
if (!force_attach)
throw;
}
}
StoragePtr DatabaseMaterializedPostgreSQL::tryGetTable(const String & name, ContextPtr local_context) const
{
/// In otder to define which table access is needed - to MaterializedPostgreSQL table (only in case of SELECT queries) or
/// to its nested ReplacingMergeTree table (in all other cases), the context of a query os modified.
/// Also if materialzied_tables set is empty - it means all access is done to ReplacingMergeTree tables - it is a case after
/// replication_handler was shutdown.
if (local_context->isInternalQuery() || materialized_tables.empty())
{
return DatabaseAtomic::tryGetTable(name, local_context);
}
/// Note: In select query we call MaterializedPostgreSQL table and it calls tryGetTable from its nested.
/// So the only point, where synchronization is needed - access to MaterializedPostgreSQL table wrapper over nested table.
std::lock_guard lock(tables_mutex);
auto table = materialized_tables.find(name);
/// Return wrapper over ReplacingMergeTree table. If table synchronization just started, table will not
/// be accessible immediately. Table is considered to exist once its nested table was created.
if (table != materialized_tables.end() && table->second->as <StorageMaterializedPostgreSQL>()->hasNested())
{
return table->second;
}
return StoragePtr{};
}
void DatabaseMaterializedPostgreSQL::createTable(ContextPtr local_context, const String & table_name, const StoragePtr & table, const ASTPtr & query)
{
/// Create table query can only be called from replication thread.
if (local_context->isInternalQuery())
{
DatabaseAtomic::createTable(local_context, table_name, table, query);
return;
}
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
"Create table query allowed only for ReplacingMergeTree engine and from synchronization thread");
}
void DatabaseMaterializedPostgreSQL::shutdown()
{
stopReplication();
DatabaseAtomic::shutdown();
}
void DatabaseMaterializedPostgreSQL::stopReplication()
{
if (replication_handler)
replication_handler->shutdown();
/// Clear wrappers over nested, all access is not done to nested tables directly.
materialized_tables.clear();
}
void DatabaseMaterializedPostgreSQL::dropTable(ContextPtr local_context, const String & table_name, bool no_delay)
{
/// Modify context into nested_context and pass query to Atomic database.
DatabaseAtomic::dropTable(StorageMaterializedPostgreSQL::makeNestedTableContext(local_context), table_name, no_delay);
}
void DatabaseMaterializedPostgreSQL::drop(ContextPtr local_context)
{
if (replication_handler)
replication_handler->shutdownFinal();
DatabaseAtomic::drop(StorageMaterializedPostgreSQL::makeNestedTableContext(local_context));
}
DatabaseTablesIteratorPtr DatabaseMaterializedPostgreSQL::getTablesIterator(
ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name)
{
/// Modify context into nested_context and pass query to Atomic database.
return DatabaseAtomic::getTablesIterator(StorageMaterializedPostgreSQL::makeNestedTableContext(local_context), filter_by_table_name);
}
}
#endif

View File

@ -0,0 +1,79 @@
#pragma once
#if !defined(ARCADIA_BUILD)
#include "config_core.h"
#endif
#if USE_LIBPQXX
#include <Storages/PostgreSQL/PostgreSQLReplicationHandler.h>
#include <Storages/PostgreSQL/MaterializedPostgreSQLSettings.h>
#include <Databases/DatabasesCommon.h>
#include <Core/BackgroundSchedulePool.h>
#include <Parsers/ASTCreateQuery.h>
#include <Databases/IDatabase.h>
#include <Databases/DatabaseOnDisk.h>
#include <Databases/DatabaseAtomic.h>
namespace DB
{
class PostgreSQLConnection;
using PostgreSQLConnectionPtr = std::shared_ptr<PostgreSQLConnection>;
class DatabaseMaterializedPostgreSQL : public DatabaseAtomic
{
public:
DatabaseMaterializedPostgreSQL(
ContextPtr context_,
const String & metadata_path_,
UUID uuid_,
const ASTStorage * database_engine_define_,
bool is_attach_,
const String & database_name_,
const String & postgres_database_name,
const postgres::ConnectionInfo & connection_info,
std::unique_ptr<MaterializedPostgreSQLSettings> settings_);
String getEngineName() const override { return "MaterializedPostgreSQL"; }
String getMetadataPath() const override { return metadata_path; }
void loadStoredObjects(ContextMutablePtr, bool, bool force_attach) override;
DatabaseTablesIteratorPtr getTablesIterator(
ContextPtr context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) override;
StoragePtr tryGetTable(const String & name, ContextPtr context) const override;
void createTable(ContextPtr context, const String & name, const StoragePtr & table, const ASTPtr & query) override;
void dropTable(ContextPtr local_context, const String & name, bool no_delay) override;
void drop(ContextPtr local_context) override;
void stopReplication();
void shutdown() override;
private:
void startSynchronization();
ASTPtr database_engine_define;
bool is_attach;
String remote_database_name;
postgres::ConnectionInfo connection_info;
std::unique_ptr<MaterializedPostgreSQLSettings> settings;
std::shared_ptr<PostgreSQLReplicationHandler> replication_handler;
std::map<std::string, StoragePtr> materialized_tables;
mutable std::mutex tables_mutex;
};
}
#endif

View File

@ -40,14 +40,14 @@ DatabasePostgreSQL::DatabasePostgreSQL(
const ASTStorage * database_engine_define_,
const String & dbname_,
const String & postgres_dbname,
postgres::PoolWithFailoverPtr connection_pool_,
const bool cache_tables_)
postgres::PoolWithFailoverPtr pool_,
bool cache_tables_)
: IDatabase(dbname_)
, WithContext(context_->getGlobalContext())
, metadata_path(metadata_path_)
, database_engine_define(database_engine_define_->clone())
, dbname(postgres_dbname)
, connection_pool(std::move(connection_pool_))
, pool(std::move(pool_))
, cache_tables(cache_tables_)
{
cleaner_task = getContext()->getSchedulePool().createTask("PostgreSQLCleanerTask", [this]{ removeOutdatedTables(); });
@ -59,7 +59,8 @@ bool DatabasePostgreSQL::empty() const
{
std::lock_guard<std::mutex> lock(mutex);
auto tables_list = fetchTablesList();
auto connection_holder = pool->get();
auto tables_list = fetchPostgreSQLTablesList(connection_holder->get());
for (const auto & table_name : tables_list)
if (!detached_or_dropped.count(table_name))
@ -74,7 +75,8 @@ DatabaseTablesIteratorPtr DatabasePostgreSQL::getTablesIterator(ContextPtr local
std::lock_guard<std::mutex> lock(mutex);
Tables tables;
auto table_names = fetchTablesList();
auto connection_holder = pool->get();
auto table_names = fetchPostgreSQLTablesList(connection_holder->get());
for (const auto & table_name : table_names)
if (!detached_or_dropped.count(table_name))
@ -84,21 +86,6 @@ DatabaseTablesIteratorPtr DatabasePostgreSQL::getTablesIterator(ContextPtr local
}
std::unordered_set<std::string> DatabasePostgreSQL::fetchTablesList() const
{
std::unordered_set<std::string> tables;
std::string query = "SELECT tablename FROM pg_catalog.pg_tables "
"WHERE schemaname != 'pg_catalog' AND schemaname != 'information_schema'";
auto connection_holder = connection_pool->get();
pqxx::read_transaction tx(connection_holder->get());
for (auto table_name : tx.stream<std::string>(query))
tables.insert(std::get<0>(table_name));
return tables;
}
bool DatabasePostgreSQL::checkPostgresTable(const String & table_name) const
{
if (table_name.find('\'') != std::string::npos
@ -108,7 +95,7 @@ bool DatabasePostgreSQL::checkPostgresTable(const String & table_name) const
"PostgreSQL table name cannot contain single quote or backslash characters, passed {}", table_name);
}
auto connection_holder = connection_pool->get();
auto connection_holder = pool->get();
pqxx::nontransaction tx(connection_holder->get());
try
@ -163,20 +150,15 @@ StoragePtr DatabasePostgreSQL::fetchTable(const String & table_name, ContextPtr
if (!table_checked && !checkPostgresTable(table_name))
return StoragePtr{};
auto use_nulls = local_context->getSettingsRef().external_table_functions_use_nulls;
auto columns = fetchPostgreSQLTableStructure(connection_pool->get(), doubleQuoteString(table_name), use_nulls);
auto connection_holder = pool->get();
auto columns = fetchPostgreSQLTableStructure(connection_holder->get(), doubleQuoteString(table_name)).columns;
if (!columns)
return StoragePtr{};
auto storage = StoragePostgreSQL::create(
StorageID(database_name, table_name),
connection_pool,
table_name,
ColumnsDescription{*columns},
ConstraintsDescription{},
String{},
local_context);
StorageID(database_name, table_name), pool, table_name,
ColumnsDescription{*columns}, ConstraintsDescription{}, String{}, local_context);
if (cache_tables)
cached_tables[table_name] = storage;
@ -298,7 +280,8 @@ void DatabasePostgreSQL::loadStoredObjects(ContextMutablePtr /* context */, bool
void DatabasePostgreSQL::removeOutdatedTables()
{
std::lock_guard<std::mutex> lock{mutex};
auto actual_tables = fetchTablesList();
auto connection_holder = pool->get();
auto actual_tables = fetchPostgreSQLTablesList(connection_holder->get());
if (cache_tables)
{

View File

@ -9,7 +9,7 @@
#include <Databases/DatabasesCommon.h>
#include <Core/BackgroundSchedulePool.h>
#include <Parsers/ASTCreateQuery.h>
#include <Storages/PostgreSQL/PoolWithFailover.h>
#include <Core/PostgreSQL/PoolWithFailover.h>
namespace DB
@ -33,7 +33,7 @@ public:
const ASTStorage * database_engine_define,
const String & dbname_,
const String & postgres_dbname,
postgres::PoolWithFailoverPtr connection_pool_,
postgres::PoolWithFailoverPtr pool_,
bool cache_tables_);
String getEngineName() const override { return "PostgreSQL"; }
@ -70,7 +70,7 @@ private:
String metadata_path;
ASTPtr database_engine_define;
String dbname;
postgres::PoolWithFailoverPtr connection_pool;
postgres::PoolWithFailoverPtr pool;
const bool cache_tables;
mutable Tables cached_tables;
@ -78,9 +78,11 @@ private:
BackgroundSchedulePool::TaskHolder cleaner_task;
bool checkPostgresTable(const String & table_name) const;
std::unordered_set<std::string> fetchTablesList() const;
StoragePtr fetchTable(const String & table_name, ContextPtr context, bool table_checked) const;
StoragePtr fetchTable(const String & table_name, ContextPtr context, const bool table_checked) const;
void removeOutdatedTables();
ASTPtr getColumnDeclaration(const DataTypePtr & data_type) const;
};

View File

@ -12,7 +12,8 @@
#include <DataTypes/DataTypeDateTime.h>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <pqxx/pqxx>
#include <Common/quoteString.h>
#include <Core/PostgreSQL/Utils.h>
namespace DB
@ -25,7 +26,21 @@ namespace ErrorCodes
}
static DataTypePtr convertPostgreSQLDataType(String & type, bool is_nullable, uint16_t dimensions, const std::function<void()> & recheck_array)
template<typename T>
std::unordered_set<std::string> fetchPostgreSQLTablesList(T & tx)
{
std::unordered_set<std::string> tables;
std::string query = "SELECT tablename FROM pg_catalog.pg_tables "
"WHERE schemaname != 'pg_catalog' AND schemaname != 'information_schema'";
for (auto table_name : tx.template stream<std::string>(query))
tables.insert(std::get<0>(table_name));
return tables;
}
static DataTypePtr convertPostgreSQLDataType(String & type, const std::function<void()> & recheck_array, bool is_nullable = false, uint16_t dimensions = 0)
{
DataTypePtr res;
bool is_array = false;
@ -116,52 +131,51 @@ static DataTypePtr convertPostgreSQLDataType(String & type, bool is_nullable, ui
}
std::shared_ptr<NamesAndTypesList> fetchPostgreSQLTableStructure(
postgres::ConnectionHolderPtr connection_holder, const String & postgres_table_name, bool use_nulls)
template<typename T>
std::shared_ptr<NamesAndTypesList> readNamesAndTypesList(
T & tx, const String & postgres_table_name, const String & query, bool use_nulls, bool only_names_and_types)
{
auto columns = NamesAndTypes();
if (postgres_table_name.find('\'') != std::string::npos
|| postgres_table_name.find('\\') != std::string::npos)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "PostgreSQL table name cannot contain single quote or backslash characters, passed {}",
postgres_table_name);
}
std::string query = fmt::format(
"SELECT attname AS name, format_type(atttypid, atttypmod) AS type, "
"attnotnull AS not_null, attndims AS dims "
"FROM pg_attribute "
"WHERE attrelid = '{}'::regclass "
"AND NOT attisdropped AND attnum > 0", postgres_table_name);
try
{
std::set<size_t> recheck_arrays_indexes;
{
pqxx::read_transaction tx(connection_holder->get());
auto stream{pqxx::stream_from::query(tx, query)};
std::tuple<std::string, std::string, std::string, uint16_t> row;
size_t i = 0;
auto recheck_array = [&]() { recheck_arrays_indexes.insert(i); };
while (stream >> row)
if (only_names_and_types)
{
auto data_type = convertPostgreSQLDataType(std::get<1>(row),
use_nulls && (std::get<2>(row) == "f"), /// 'f' means that postgres `not_null` is false, i.e. value is nullable
std::get<3>(row),
recheck_array);
columns.push_back(NameAndTypePair(std::get<0>(row), data_type));
++i;
std::tuple<std::string, std::string> row;
while (stream >> row)
{
columns.push_back(NameAndTypePair(std::get<0>(row), convertPostgreSQLDataType(std::get<1>(row), recheck_array)));
++i;
}
}
else
{
std::tuple<std::string, std::string, std::string, uint16_t> row;
while (stream >> row)
{
auto data_type = convertPostgreSQLDataType(std::get<1>(row),
recheck_array,
use_nulls && (std::get<2>(row) == "f"), /// 'f' means that postgres `not_null` is false, i.e. value is nullable
std::get<3>(row));
columns.push_back(NameAndTypePair(std::get<0>(row), data_type));
++i;
}
}
stream.complete();
tx.commit();
}
for (const auto & i : recheck_arrays_indexes)
{
const auto & name_and_type = columns[i];
pqxx::nontransaction tx(connection_holder->get());
/// All rows must contain the same number of dimensions, so limit 1 is ok. If number of dimensions in all rows is not the same -
/// such arrays are not able to be used as ClickHouse Array at all.
pqxx::result result{tx.exec(fmt::format("SELECT array_ndims({}) FROM {} LIMIT 1", name_and_type.name, postgres_table_name))};
@ -178,9 +192,7 @@ std::shared_ptr<NamesAndTypesList> fetchPostgreSQLTableStructure(
catch (const pqxx::undefined_table &)
{
throw Exception(fmt::format(
"PostgreSQL table {}.{} does not exist",
connection_holder->get().dbname(), postgres_table_name), ErrorCodes::UNKNOWN_TABLE);
throw Exception(ErrorCodes::UNKNOWN_TABLE, "PostgreSQL table {} does not exist", postgres_table_name);
}
catch (Exception & e)
{
@ -188,12 +200,101 @@ std::shared_ptr<NamesAndTypesList> fetchPostgreSQLTableStructure(
throw;
}
if (columns.empty())
return nullptr;
return std::make_shared<NamesAndTypesList>(NamesAndTypesList(columns.begin(), columns.end()));
return !columns.empty() ? std::make_shared<NamesAndTypesList>(columns.begin(), columns.end()) : nullptr;
}
template<typename T>
PostgreSQLTableStructure fetchPostgreSQLTableStructure(
T & tx, const String & postgres_table_name, bool use_nulls, bool with_primary_key, bool with_replica_identity_index)
{
PostgreSQLTableStructure table;
std::string query = fmt::format(
"SELECT attname AS name, format_type(atttypid, atttypmod) AS type, "
"attnotnull AS not_null, attndims AS dims "
"FROM pg_attribute "
"WHERE attrelid = {}::regclass "
"AND NOT attisdropped AND attnum > 0", quoteString(postgres_table_name));
table.columns = readNamesAndTypesList(tx, postgres_table_name, query, use_nulls, false);
if (with_primary_key)
{
/// wiki.postgresql.org/wiki/Retrieve_primary_key_columns
query = fmt::format(
"SELECT a.attname, format_type(a.atttypid, a.atttypmod) AS data_type "
"FROM pg_index i "
"JOIN pg_attribute a ON a.attrelid = i.indrelid "
"AND a.attnum = ANY(i.indkey) "
"WHERE i.indrelid = {}::regclass AND i.indisprimary", quoteString(postgres_table_name));
table.primary_key_columns = readNamesAndTypesList(tx, postgres_table_name, query, use_nulls, true);
}
if (with_replica_identity_index && !table.primary_key_columns)
{
query = fmt::format(
"SELECT "
"a.attname AS column_name, " /// column name
"format_type(a.atttypid, a.atttypmod) as type " /// column type
"FROM "
"pg_class t, "
"pg_class i, "
"pg_index ix, "
"pg_attribute a "
"WHERE "
"t.oid = ix.indrelid "
"and i.oid = ix.indexrelid "
"and a.attrelid = t.oid "
"and a.attnum = ANY(ix.indkey) "
"and t.relkind = 'r' " /// simple tables
"and t.relname = {} " /// Connection is already done to a needed database, only table name is needed.
"and ix.indisreplident = 't' " /// index is is replica identity index
"ORDER BY a.attname", /// column names
quoteString(postgres_table_name));
table.replica_identity_columns = readNamesAndTypesList(tx, postgres_table_name, query, use_nulls, true);
}
return table;
}
PostgreSQLTableStructure fetchPostgreSQLTableStructure(pqxx::connection & connection, const String & postgres_table_name, bool use_nulls)
{
pqxx::ReadTransaction tx(connection);
auto result = fetchPostgreSQLTableStructure(tx, postgres_table_name, use_nulls, false, false);
tx.commit();
return result;
}
std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::connection & connection)
{
pqxx::ReadTransaction tx(connection);
auto result = fetchPostgreSQLTablesList(tx);
tx.commit();
return result;
}
template
PostgreSQLTableStructure fetchPostgreSQLTableStructure(
pqxx::ReadTransaction & tx, const String & postgres_table_name, bool use_nulls,
bool with_primary_key, bool with_replica_identity_index);
template
PostgreSQLTableStructure fetchPostgreSQLTableStructure(
pqxx::ReplicationTransaction & tx, const String & postgres_table_name, bool use_nulls,
bool with_primary_key, bool with_replica_identity_index);
template
std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::work & tx);
template
std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::ReadTransaction & tx);
}
#endif

View File

@ -5,15 +5,34 @@
#endif
#if USE_LIBPQXX
#include <Storages/PostgreSQL/ConnectionHolder.h>
#include <Core/PostgreSQL/ConnectionHolder.h>
#include <Core/NamesAndTypes.h>
namespace DB
{
std::shared_ptr<NamesAndTypesList> fetchPostgreSQLTableStructure(
postgres::ConnectionHolderPtr connection_holder, const String & postgres_table_name, bool use_nulls);
struct PostgreSQLTableStructure
{
std::shared_ptr<NamesAndTypesList> columns = nullptr;
std::shared_ptr<NamesAndTypesList> primary_key_columns = nullptr;
std::shared_ptr<NamesAndTypesList> replica_identity_columns = nullptr;
};
using PostgreSQLTableStructurePtr = std::unique_ptr<PostgreSQLTableStructure>;
std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::connection & connection);
PostgreSQLTableStructure fetchPostgreSQLTableStructure(
pqxx::connection & connection, const String & postgres_table_name, bool use_nulls = true);
template<typename T>
PostgreSQLTableStructure fetchPostgreSQLTableStructure(
T & tx, const String & postgres_table_name, bool use_nulls = true,
bool with_primary_key = false, bool with_replica_identity_index = false);
template<typename T>
std::unordered_set<std::string> fetchPostgreSQLTablesList(T & tx);
}

View File

@ -107,9 +107,10 @@ BlockInputStreamPtr PostgreSQLDictionarySource::loadKeys(const Columns & key_col
BlockInputStreamPtr PostgreSQLDictionarySource::loadBase(const String & query)
{
return std::make_shared<PostgreSQLBlockInputStream>(pool->get(), query, sample_block, max_block_size);
return std::make_shared<PostgreSQLBlockInputStream<>>(pool->get(), query, sample_block, max_block_size);
}
bool PostgreSQLDictionarySource::isModified() const
{
if (!configuration.invalidate_query.empty())
@ -128,7 +129,7 @@ std::string PostgreSQLDictionarySource::doInvalidateQuery(const std::string & re
Block invalidate_sample_block;
ColumnPtr column(ColumnString::create());
invalidate_sample_block.insert(ColumnWithTypeAndName(column, std::make_shared<DataTypeString>(), "Sample Block"));
PostgreSQLBlockInputStream block_input_stream(pool->get(), request, invalidate_sample_block, 1);
PostgreSQLBlockInputStream<> block_input_stream(pool->get(), request, invalidate_sample_block, 1);
return readInvalidateQuery(block_input_stream);
}

View File

@ -11,8 +11,7 @@
#include <Core/Block.h>
#include <common/LocalDateTime.h>
#include <common/logger_useful.h>
#include <Storages/PostgreSQL/PoolWithFailover.h>
#include <pqxx/pqxx>
#include <Core/PostgreSQL/PoolWithFailover.h>
namespace DB

View File

@ -110,6 +110,11 @@ struct CRCFunctionWrapper
throw Exception("Cannot apply function " + std::string(Impl::name) + " to Array argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
[[noreturn]] static void uuid(const ColumnUUID::Container & /*offsets*/, size_t /*n*/, PaddedPODArray<ReturnType> & /*res*/)
{
throw Exception("Cannot apply function " + std::string(Impl::name) + " to UUID argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
private:
static ReturnType doCRC(const ColumnString::Chars & buf, size_t offset, size_t size)
{

View File

@ -54,6 +54,12 @@ struct EmptyImpl
prev_offset = offsets[i];
}
}
static void uuid(const ColumnUUID::Container & container, size_t n, PaddedPODArray<UInt8> & res)
{
for (size_t i = 0; i < n; ++i)
res[i] = negative ^ (container.data()->toUnderType() == 0);
}
};
}

View File

@ -8,6 +8,7 @@
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnMap.h>
#include <Columns/ColumnsNumber.h>
namespace DB
@ -43,7 +44,9 @@ public:
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!isStringOrFixedString(arguments[0])
&& !isArray(arguments[0]) && !isMap(arguments[0]))
&& !isArray(arguments[0])
&& !isMap(arguments[0])
&& !isUUID(arguments[0]))
throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return std::make_shared<DataTypeNumber<ResultType>>();
@ -51,7 +54,7 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
const ColumnPtr column = arguments[0].column;
if (const ColumnString * col = checkAndGetColumn<ColumnString>(column.get()))
@ -104,6 +107,14 @@ public:
Impl::array(col_nested.getOffsets(), vec_res);
return col_res;
}
else if (const ColumnUUID * col_uuid = checkAndGetColumn<ColumnUUID>(column.get()))
{
auto col_res = ColumnVector<ResultType>::create();
typename ColumnVector<ResultType>::Container & vec_res = col_res->getData();
vec_res.resize(col_uuid->size());
Impl::uuid(col_uuid->getData(), input_rows_count, vec_res);
return col_res;
}
else
throw Exception("Illegal column " + arguments[0].column->getName() + " of argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);

View File

@ -1265,23 +1265,7 @@ public:
assert(2 == types.size() && 2 == values.size());
auto & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * x = values[0];
auto * y = values[1];
if (!types[0]->equals(*types[1]))
{
llvm::Type * common;
if (x->getType()->isIntegerTy() && y->getType()->isIntegerTy())
common = b.getIntNTy(std::max(
/// if one integer has a sign bit, make sure the other does as well. llvm generates optimal code
/// (e.g. uses overflow flag on x86) for (word size + 1)-bit integer operations.
x->getType()->getIntegerBitWidth() + (!typeIsSigned(*types[0]) && typeIsSigned(*types[1])),
y->getType()->getIntegerBitWidth() + (!typeIsSigned(*types[1]) && typeIsSigned(*types[0]))));
else
/// (double, float) or (double, int_N where N <= double's mantissa width) -> double
common = b.getDoubleTy();
x = nativeCast(b, types[0], x, common);
y = nativeCast(b, types[1], y, common);
}
auto [x, y] = nativeCastToCommon(b, types[0], values[0], types[1], values[1]);
auto * result = CompileOp<Op>::compile(b, x, y, typeIsSigned(*types[0]) || typeIsSigned(*types[1]));
return b.CreateSelect(result, b.getInt8(1), b.getInt8(0));
}

View File

@ -10,14 +10,20 @@
#include <Common/assert_cast.h>
#include <Core/Settings.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnLowCardinality.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnTuple.h>
#include <DataTypes/Serializations/SerializationDecimal.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeUUID.h>
#include <DataTypes/DataTypeEnum.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeNullable.h>
@ -528,6 +534,7 @@ public:
}
};
template <typename JSONParser>
using JSONExtractInt8Impl = JSONExtractNumericImpl<JSONParser, Int8>;
template <typename JSONParser>
@ -625,6 +632,60 @@ struct JSONExtractTree
}
};
class LowCardinalityNode : public Node
{
public:
LowCardinalityNode(DataTypePtr dictionary_type_, std::unique_ptr<Node> impl_)
: dictionary_type(dictionary_type_), impl(std::move(impl_)) {}
bool insertResultToColumn(IColumn & dest, const Element & element) override
{
auto from_col = dictionary_type->createColumn();
if (impl->insertResultToColumn(*from_col, element))
{
StringRef value = from_col->getDataAt(0);
assert_cast<ColumnLowCardinality &>(dest).insertData(value.data, value.size);
return true;
}
return false;
}
private:
DataTypePtr dictionary_type;
std::unique_ptr<Node> impl;
};
class UUIDNode : public Node
{
public:
bool insertResultToColumn(IColumn & dest, const Element & element) override
{
if (!element.isString())
return false;
auto uuid = parseFromString<UUID>(element.getString());
assert_cast<ColumnUUID &>(dest).insert(uuid);
return true;
}
};
template <typename DecimalType>
class DecimalNode : public Node
{
public:
DecimalNode(DataTypePtr data_type_) : data_type(data_type_) {}
bool insertResultToColumn(IColumn & dest, const Element & element) override
{
if (!element.isDouble())
return false;
const auto * type = assert_cast<const DataTypeDecimal<DecimalType> *>(data_type.get());
auto result = convertToDecimal<DataTypeNumber<Float64>, DataTypeDecimal<DecimalType>>(element.getDouble(), type->getScale());
assert_cast<ColumnDecimal<DecimalType> &>(dest).insert(result);
return true;
}
private:
DataTypePtr data_type;
};
class StringNode : public Node
{
public:
@ -864,6 +925,17 @@ struct JSONExtractTree
case TypeIndex::Float64: return std::make_unique<NumericNode<Float64>>();
case TypeIndex::String: return std::make_unique<StringNode>();
case TypeIndex::FixedString: return std::make_unique<FixedStringNode>();
case TypeIndex::UUID: return std::make_unique<UUIDNode>();
case TypeIndex::LowCardinality:
{
auto dictionary_type = typeid_cast<const DataTypeLowCardinality *>(type.get())->getDictionaryType();
auto impl = build(function_name, dictionary_type);
return std::make_unique<LowCardinalityNode>(dictionary_type, std::move(impl));
}
case TypeIndex::Decimal256: return std::make_unique<DecimalNode<Decimal256>>(type);
case TypeIndex::Decimal128: return std::make_unique<DecimalNode<Decimal128>>(type);
case TypeIndex::Decimal64: return std::make_unique<DecimalNode<Decimal64>>(type);
case TypeIndex::Decimal32: return std::make_unique<DecimalNode<Decimal32>>(type);
case TypeIndex::Enum8:
return std::make_unique<EnumNode<Int8>>(static_cast<const DataTypeEnum8 &>(*type).getValues());
case TypeIndex::Enum16:

View File

@ -5,7 +5,10 @@
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
/** Calculates the length of a string in bytes.
*/
@ -35,6 +38,11 @@ struct LengthImpl
for (size_t i = 0; i < size; ++i)
res[i] = offsets[i] - offsets[i - 1];
}
[[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray<UInt64> &)
{
throw Exception("Cannot apply function length to UUID argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
};

View File

@ -317,6 +317,11 @@ SOFTWARE.
{
throw Exception("Cannot apply function isValidUTF8 to Array argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
[[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray<UInt8> &)
{
throw Exception("Cannot apply function isValidUTF8 to UUID argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
};
struct NameIsValidUTF8

View File

@ -53,6 +53,11 @@ struct LengthUTF8Impl
{
throw Exception("Cannot apply function lengthUTF8 to Array argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
[[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray<UInt64> &)
{
throw Exception("Cannot apply function lengthUTF8 to UUID argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
};
struct NameLengthUTF8

View File

@ -29,6 +29,11 @@ public:
return name;
}
bool isInjective(const ColumnsWithTypeAndName & /*sample_columns*/) const override
{
return true;
}
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
size_t getNumberOfArguments() const override

View File

@ -2,6 +2,7 @@
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/IFunction.h>
#include <Formats/FormatFactory.h>
#include <IO/WriteBufferFromVector.h>
#include <IO/WriteHelpers.h>
@ -13,7 +14,9 @@ namespace
{
public:
static constexpr auto name = "toJSONString";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionToJSONString>(); }
static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionToJSONString>(context); }
explicit FunctionToJSONString(ContextPtr context) : format_settings(getFormatSettings(context)) {}
String getName() const override { return name; }
@ -35,7 +38,7 @@ namespace
WriteBufferFromVector<ColumnString::Chars> json(data_to);
for (size_t i = 0; i < input_rows_count; ++i)
{
serializer->serializeTextJSON(*arguments[0].column, i, json, FormatSettings());
serializer->serializeTextJSON(*arguments[0].column, i, json, format_settings);
writeChar(0, json);
offsets_to[i] = json.count();
}
@ -43,6 +46,10 @@ namespace
json.finalize();
return res;
}
private:
/// Affects only subset of part of settings related to json.
const FormatSettings format_settings;
};
}

View File

@ -8,7 +8,6 @@
#include <Functions/materialize.h>
#include <Functions/FunctionsLogical.h>
#include <Interpreters/Context.h>
#include <Interpreters/ExpressionJIT.h>
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>

View File

@ -27,8 +27,6 @@ using FunctionOverloadResolverPtr = std::shared_ptr<IFunctionOverloadResolver>;
class IDataType;
using DataTypePtr = std::shared_ptr<const IDataType>;
class CompiledExpressionCache;
namespace JSONBuilder
{
class JSONMap;

View File

@ -1,5 +1,6 @@
#include <future>
#include <Poco/Util/Application.h>
#include <Common/Stopwatch.h>
#include <Common/setThreadName.h>
#include <Common/formatReadable.h>
@ -21,6 +22,8 @@
#include <AggregateFunctions/AggregateFunctionArray.h>
#include <AggregateFunctions/AggregateFunctionState.h>
#include <IO/Operators.h>
#include <Interpreters/JIT/compileFunction.h>
#include <Interpreters/JIT/CompiledExpressionCache.h>
namespace ProfileEvents
@ -211,6 +214,32 @@ void Aggregator::Params::explain(JSONBuilder::JSONMap & map) const
}
}
#if USE_EMBEDDED_COMPILER
static CHJIT & getJITInstance()
{
static CHJIT jit;
return jit;
}
class CompiledAggregateFunctionsHolder final : public CompiledExpressionCacheEntry
{
public:
explicit CompiledAggregateFunctionsHolder(CompiledAggregateFunctions compiled_function_)
: CompiledExpressionCacheEntry(compiled_function_.compiled_module.size)
, compiled_aggregate_functions(compiled_function_)
{}
~CompiledAggregateFunctionsHolder() override
{
getJITInstance().deleteCompiledModule(compiled_aggregate_functions.compiled_module);
}
CompiledAggregateFunctions compiled_aggregate_functions;
};
#endif
Aggregator::Aggregator(const Params & params_)
: params(params_)
{
@ -262,8 +291,93 @@ Aggregator::Aggregator(const Params & params_)
HashMethodContext::Settings cache_settings;
cache_settings.max_threads = params.max_threads;
aggregation_state_cache = AggregatedDataVariants::createCache(method_chosen, cache_settings);
#if USE_EMBEDDED_COMPILER
compileAggregateFunctions();
#endif
}
#if USE_EMBEDDED_COMPILER
void Aggregator::compileAggregateFunctions()
{
static std::unordered_map<UInt128, UInt64, UInt128Hash> aggregate_functions_description_to_count;
static std::mutex mtx;
if (!params.compile_aggregate_expressions)
return;
std::vector<AggregateFunctionWithOffset> functions_to_compile;
size_t aggregate_instructions_size = 0;
String functions_description;
is_aggregate_function_compiled.resize(aggregate_functions.size());
/// Add values to the aggregate functions.
for (size_t i = 0; i < aggregate_functions.size(); ++i)
{
const auto * function = aggregate_functions[i];
size_t offset_of_aggregate_function = offsets_of_aggregate_states[i];
if (function->isCompilable())
{
AggregateFunctionWithOffset function_to_compile
{
.function = function,
.aggregate_data_offset = offset_of_aggregate_function
};
functions_to_compile.emplace_back(std::move(function_to_compile));
functions_description += function->getDescription();
functions_description += ' ';
functions_description += std::to_string(offset_of_aggregate_function);
functions_description += ' ';
}
++aggregate_instructions_size;
is_aggregate_function_compiled[i] = function->isCompilable();
}
if (functions_to_compile.empty())
return;
SipHash aggregate_functions_description_hash;
aggregate_functions_description_hash.update(functions_description);
UInt128 aggregate_functions_description_hash_key;
aggregate_functions_description_hash.get128(aggregate_functions_description_hash_key);
{
std::lock_guard<std::mutex> lock(mtx);
if (aggregate_functions_description_to_count[aggregate_functions_description_hash_key]++ < params.min_count_to_compile_aggregate_expression)
return;
if (auto * compilation_cache = CompiledExpressionCacheFactory::instance().tryGetCache())
{
auto [compiled_function_cache_entry, _] = compilation_cache->getOrSet(aggregate_functions_description_hash_key, [&] ()
{
LOG_TRACE(log, "Compile expression {}", functions_description);
auto compiled_aggregate_functions = compileAggregateFunctons(getJITInstance(), functions_to_compile, functions_description);
return std::make_shared<CompiledAggregateFunctionsHolder>(std::move(compiled_aggregate_functions));
});
compiled_aggregate_functions_holder = std::static_pointer_cast<CompiledAggregateFunctionsHolder>(compiled_function_cache_entry);
}
else
{
LOG_TRACE(log, "Compile expression {}", functions_description);
auto compiled_aggregate_functions = compileAggregateFunctons(getJITInstance(), functions_to_compile, functions_description);
compiled_aggregate_functions_holder = std::make_shared<CompiledAggregateFunctionsHolder>(std::move(compiled_aggregate_functions));
}
}
}
#endif
AggregatedDataVariants::Type Aggregator::chooseAggregationMethod()
{
@ -431,11 +545,15 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod()
return AggregatedDataVariants::Type::serialized;
}
template <bool skip_compiled_aggregate_functions>
void Aggregator::createAggregateStates(AggregateDataPtr & aggregate_data) const
{
for (size_t j = 0; j < params.aggregates_size; ++j)
{
if constexpr (skip_compiled_aggregate_functions)
if (is_aggregate_function_compiled[j])
continue;
try
{
/** An exception may occur if there is a shortage of memory.
@ -447,14 +565,19 @@ void Aggregator::createAggregateStates(AggregateDataPtr & aggregate_data) const
catch (...)
{
for (size_t rollback_j = 0; rollback_j < j; ++rollback_j)
{
if constexpr (skip_compiled_aggregate_functions)
if (is_aggregate_function_compiled[j])
continue;
aggregate_functions[rollback_j]->destroy(aggregate_data + offsets_of_aggregate_states[rollback_j]);
}
throw;
}
}
}
/** It's interesting - if you remove `noinline`, then gcc for some reason will inline this function, and the performance decreases (~ 10%).
* (Probably because after the inline of this function, more internal functions no longer be inlined.)
* Inline does not make sense, since the inner loop is entirely inside this function.
@ -472,13 +595,25 @@ void NO_INLINE Aggregator::executeImpl(
typename Method::State state(key_columns, key_sizes, aggregation_state_cache);
if (!no_more_keys)
executeImplBatch<false>(method, state, aggregates_pool, rows, aggregate_instructions, overflow_row);
{
#if USE_EMBEDDED_COMPILER
if (compiled_aggregate_functions_holder)
{
executeImplBatch<false, true>(method, state, aggregates_pool, rows, aggregate_instructions, overflow_row);
}
else
#endif
{
executeImplBatch<false, false>(method, state, aggregates_pool, rows, aggregate_instructions, overflow_row);
}
}
else
executeImplBatch<true>(method, state, aggregates_pool, rows, aggregate_instructions, overflow_row);
{
executeImplBatch<true, false>(method, state, aggregates_pool, rows, aggregate_instructions, overflow_row);
}
}
template <bool no_more_keys, typename Method>
template <bool no_more_keys, bool use_compiled_functions, typename Method>
void NO_INLINE Aggregator::executeImplBatch(
Method & method,
typename Method::State & state,
@ -535,8 +670,6 @@ void NO_INLINE Aggregator::executeImplBatch(
}
}
/// Generic case.
std::unique_ptr<AggregateDataPtr[]> places(new AggregateDataPtr[rows]);
/// For all rows.
@ -555,7 +688,37 @@ void NO_INLINE Aggregator::executeImplBatch(
emplace_result.setMapped(nullptr);
aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states);
createAggregateStates(aggregate_data);
#if USE_EMBEDDED_COMPILER
if constexpr (use_compiled_functions)
{
const auto & compiled_aggregate_functions = compiled_aggregate_functions_holder->compiled_aggregate_functions;
compiled_aggregate_functions.create_aggregate_states_function(aggregate_data);
if (compiled_aggregate_functions.functions_count != aggregate_functions.size())
{
static constexpr bool skip_compiled_aggregate_functions = true;
createAggregateStates<skip_compiled_aggregate_functions>(aggregate_data);
}
#if defined(MEMORY_SANITIZER)
/// We compile only functions that do not allocate some data in Arena. Only store necessary state in AggregateData place.
for (size_t aggregate_function_index = 0; aggregate_function_index < aggregate_functions.size(); ++aggregate_function_index)
{
if (!is_aggregate_function_compiled[aggregate_function_index])
continue;
auto aggregate_data_with_offset = aggregate_data + offsets_of_aggregate_states[aggregate_function_index];
auto data_size = params.aggregates[aggregate_function_index].function->sizeOfData();
__msan_unpoison(aggregate_data_with_offset, data_size);
}
#endif
}
else
#endif
{
createAggregateStates(aggregate_data);
}
emplace_result.setMapped(aggregate_data);
}
@ -577,9 +740,39 @@ void NO_INLINE Aggregator::executeImplBatch(
places[i] = aggregate_data;
}
/// Add values to the aggregate functions.
for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst)
#if USE_EMBEDDED_COMPILER
if constexpr (use_compiled_functions)
{
std::vector<ColumnData> columns_data;
for (size_t i = 0; i < aggregate_functions.size(); ++i)
{
if (!is_aggregate_function_compiled[i])
continue;
AggregateFunctionInstruction * inst = aggregate_instructions + i;
size_t arguments_size = inst->that->getArgumentTypes().size();
for (size_t argument_index = 0; argument_index < arguments_size; ++argument_index)
columns_data.emplace_back(getColumnData(inst->batch_arguments[argument_index]));
}
auto add_into_aggregate_states_function = compiled_aggregate_functions_holder->compiled_aggregate_functions.add_into_aggregate_states_function;
add_into_aggregate_states_function(rows, columns_data.data(), places.get());
}
#endif
/// Add values to the aggregate functions.
for (size_t i = 0; i < aggregate_functions.size(); ++i)
{
#if USE_EMBEDDED_COMPILER
if constexpr (use_compiled_functions)
if (is_aggregate_function_compiled[i])
continue;
#endif
AggregateFunctionInstruction * inst = aggregate_instructions + i;
if (inst->offsets)
inst->batch_that->addBatchArray(rows, places.get(), inst->state_offset, inst->batch_arguments, inst->offsets, aggregates_pool);
else
@ -720,6 +913,7 @@ bool Aggregator::executeOnBlock(Columns columns, UInt64 num_rows, AggregatedData
}
}
}
NestedColumnsHolder nested_columns_holder;
AggregateFunctionInstructions aggregate_functions_instructions;
prepareAggregateInstructions(columns, aggregate_columns, materialized_columns, aggregate_functions_instructions, nested_columns_holder);
@ -1025,9 +1219,23 @@ void Aggregator::convertToBlockImpl(
raw_key_columns.push_back(column.get());
if (final)
convertToBlockImplFinal(method, data, std::move(raw_key_columns), final_aggregate_columns, arena);
{
#if USE_EMBEDDED_COMPILER
if (compiled_aggregate_functions_holder)
{
static constexpr bool use_compiled_functions = !Method::low_cardinality_optimization;
convertToBlockImplFinal<Method, use_compiled_functions>(method, data, std::move(raw_key_columns), final_aggregate_columns, arena);
}
else
#endif
{
convertToBlockImplFinal<Method, false>(method, data, std::move(raw_key_columns), final_aggregate_columns, arena);
}
}
else
{
convertToBlockImplNotFinal(method, data, std::move(raw_key_columns), aggregate_columns);
}
/// In order to release memory early.
data.clearAndShrink();
}
@ -1101,7 +1309,7 @@ inline void Aggregator::insertAggregatesIntoColumns(
}
template <typename Method, typename Table>
template <typename Method, bool use_compiled_functions, typename Table>
void NO_INLINE Aggregator::convertToBlockImplFinal(
Method & method,
Table & data,
@ -1121,11 +1329,98 @@ void NO_INLINE Aggregator::convertToBlockImplFinal(
auto shuffled_key_sizes = method.shuffleKeyColumns(key_columns, key_sizes);
const auto & key_sizes_ref = shuffled_key_sizes ? *shuffled_key_sizes : key_sizes;
PaddedPODArray<AggregateDataPtr> places;
places.reserve(data.size());
data.forEachValue([&](const auto & key, auto & mapped)
{
method.insertKeyIntoColumns(key, key_columns, key_sizes_ref);
insertAggregatesIntoColumns(mapped, final_aggregate_columns, arena);
places.emplace_back(mapped);
/// Mark the cell as destroyed so it will not be destroyed in destructor.
mapped = nullptr;
});
std::exception_ptr exception;
size_t aggregate_functions_destroy_index = 0;
try
{
#if USE_EMBEDDED_COMPILER
if constexpr (use_compiled_functions)
{
/** For JIT compiled functions we need to resize columns before pass them into compiled code.
* insert_aggregates_into_columns_function function does not throw exception.
*/
std::vector<ColumnData> columns_data;
auto compiled_functions = compiled_aggregate_functions_holder->compiled_aggregate_functions;
for (size_t i = 0; i < params.aggregates_size; ++i)
{
if (!is_aggregate_function_compiled[i])
continue;
auto & final_aggregate_column = final_aggregate_columns[i];
final_aggregate_column = final_aggregate_column->cloneResized(places.size());
columns_data.emplace_back(getColumnData(final_aggregate_column.get()));
}
auto insert_aggregates_into_columns_function = compiled_functions.insert_aggregates_into_columns_function;
insert_aggregates_into_columns_function(places.size(), columns_data.data(), places.data());
}
#endif
for (; aggregate_functions_destroy_index < params.aggregates_size;)
{
if constexpr (use_compiled_functions)
{
if (is_aggregate_function_compiled[aggregate_functions_destroy_index])
{
++aggregate_functions_destroy_index;
continue;
}
}
auto & final_aggregate_column = final_aggregate_columns[aggregate_functions_destroy_index];
size_t offset = offsets_of_aggregate_states[aggregate_functions_destroy_index];
/** We increase aggregate_functions_destroy_index because by function contract if insertResultIntoBatch
* throws exception, it also must destroy all necessary states.
* Then code need to continue to destroy other aggregate function states with next function index.
*/
size_t destroy_index = aggregate_functions_destroy_index;
++aggregate_functions_destroy_index;
/// For State AggregateFunction ownership of aggregate place is passed to result column after insert
bool is_state = aggregate_functions[destroy_index]->isState();
bool destroy_place_after_insert = !is_state;
aggregate_functions[destroy_index]->insertResultIntoBatch(places.size(), places.data(), offset, *final_aggregate_column, arena, destroy_place_after_insert);
}
}
catch (...)
{
exception = std::current_exception();
}
for (; aggregate_functions_destroy_index < params.aggregates_size; ++aggregate_functions_destroy_index)
{
if constexpr (use_compiled_functions)
{
if (is_aggregate_function_compiled[aggregate_functions_destroy_index])
{
++aggregate_functions_destroy_index;
continue;
}
}
size_t offset = offsets_of_aggregate_states[aggregate_functions_destroy_index];
aggregate_functions[aggregate_functions_destroy_index]->destroyBatch(places.size(), places.data(), offset);
}
if (exception)
std::rethrow_exception(exception);
}
template <typename Method, typename Table>
@ -1545,7 +1840,7 @@ void NO_INLINE Aggregator::mergeDataNullKey(
}
template <typename Method, typename Table>
template <typename Method, bool use_compiled_functions, typename Table>
void NO_INLINE Aggregator::mergeDataImpl(
Table & table_dst,
Table & table_src,
@ -1554,19 +1849,40 @@ void NO_INLINE Aggregator::mergeDataImpl(
if constexpr (Method::low_cardinality_optimization)
mergeDataNullKey<Method, Table>(table_dst, table_src, arena);
table_src.mergeToViaEmplace(table_dst,
[&](AggregateDataPtr & __restrict dst, AggregateDataPtr & __restrict src, bool inserted)
table_src.mergeToViaEmplace(table_dst, [&](AggregateDataPtr & __restrict dst, AggregateDataPtr & __restrict src, bool inserted)
{
if (!inserted)
{
for (size_t i = 0; i < params.aggregates_size; ++i)
aggregate_functions[i]->merge(
dst + offsets_of_aggregate_states[i],
src + offsets_of_aggregate_states[i],
arena);
#if USE_EMBEDDED_COMPILER
if constexpr (use_compiled_functions)
{
const auto & compiled_functions = compiled_aggregate_functions_holder->compiled_aggregate_functions;
compiled_functions.merge_aggregate_states_function(dst, src);
for (size_t i = 0; i < params.aggregates_size; ++i)
aggregate_functions[i]->destroy(src + offsets_of_aggregate_states[i]);
if (compiled_aggregate_functions_holder->compiled_aggregate_functions.functions_count != params.aggregates_size)
{
for (size_t i = 0; i < params.aggregates_size; ++i)
{
if (!is_aggregate_function_compiled[i])
aggregate_functions[i]->merge(dst + offsets_of_aggregate_states[i], src + offsets_of_aggregate_states[i], arena);
}
for (size_t i = 0; i < params.aggregates_size; ++i)
{
if (!is_aggregate_function_compiled[i])
aggregate_functions[i]->destroy(src + offsets_of_aggregate_states[i]);
}
}
}
else
#endif
{
for (size_t i = 0; i < params.aggregates_size; ++i)
aggregate_functions[i]->merge(dst + offsets_of_aggregate_states[i], src + offsets_of_aggregate_states[i], arena);
for (size_t i = 0; i < params.aggregates_size; ++i)
aggregate_functions[i]->destroy(src + offsets_of_aggregate_states[i]);
}
}
else
{
@ -1575,6 +1891,7 @@ void NO_INLINE Aggregator::mergeDataImpl(
src = nullptr;
});
table_src.clearAndShrink();
}
@ -1677,21 +1994,39 @@ void NO_INLINE Aggregator::mergeSingleLevelDataImpl(
AggregatedDataVariants & current = *non_empty_data[result_num];
if (!no_more_keys)
mergeDataImpl<Method>(
getDataVariant<Method>(*res).data,
getDataVariant<Method>(current).data,
res->aggregates_pool);
{
#if USE_EMBEDDED_COMPILER
if (compiled_aggregate_functions_holder)
{
mergeDataImpl<Method, true>(
getDataVariant<Method>(*res).data,
getDataVariant<Method>(current).data,
res->aggregates_pool);
}
else
#endif
{
mergeDataImpl<Method, false>(
getDataVariant<Method>(*res).data,
getDataVariant<Method>(current).data,
res->aggregates_pool);
}
}
else if (res->without_key)
{
mergeDataNoMoreKeysImpl<Method>(
getDataVariant<Method>(*res).data,
res->without_key,
getDataVariant<Method>(current).data,
res->aggregates_pool);
}
else
{
mergeDataOnlyExistingKeysImpl<Method>(
getDataVariant<Method>(*res).data,
getDataVariant<Method>(current).data,
res->aggregates_pool);
}
/// `current` will not destroy the states of aggregate functions in the destructor
current.aggregator = nullptr;
@ -1716,11 +2051,22 @@ void NO_INLINE Aggregator::mergeBucketImpl(
return;
AggregatedDataVariants & current = *data[result_num];
mergeDataImpl<Method>(
getDataVariant<Method>(*res).data.impls[bucket],
getDataVariant<Method>(current).data.impls[bucket],
arena);
#if USE_EMBEDDED_COMPILER
if (compiled_aggregate_functions_holder)
{
mergeDataImpl<Method, true>(
getDataVariant<Method>(*res).data.impls[bucket],
getDataVariant<Method>(current).data.impls[bucket],
arena);
}
else
#endif
{
mergeDataImpl<Method, false>(
getDataVariant<Method>(*res).data.impls[bucket],
getDataVariant<Method>(current).data.impls[bucket],
arena);
}
}
}

View File

@ -26,6 +26,7 @@
#include <Interpreters/AggregateDescription.h>
#include <Interpreters/AggregationCommon.h>
#include <Interpreters/JIT/compileFunction.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnFixedString.h>
@ -851,6 +852,8 @@ using AggregatedDataVariantsPtr = std::shared_ptr<AggregatedDataVariants>;
using ManyAggregatedDataVariants = std::vector<AggregatedDataVariantsPtr>;
using ManyAggregatedDataVariantsPtr = std::shared_ptr<ManyAggregatedDataVariants>;
class CompiledAggregateFunctionsHolder;
/** How are "total" values calculated with WITH TOTALS?
* (For more details, see TotalsHavingTransform.)
*
@ -907,6 +910,10 @@ public:
size_t max_threads;
const size_t min_free_disk_space;
bool compile_aggregate_expressions;
size_t min_count_to_compile_aggregate_expression;
Params(
const Block & src_header_,
const ColumnNumbers & keys_, const AggregateDescriptions & aggregates_,
@ -916,6 +923,8 @@ public:
bool empty_result_for_aggregation_by_empty_set_,
VolumePtr tmp_volume_, size_t max_threads_,
size_t min_free_disk_space_,
bool compile_aggregate_expressions_,
size_t min_count_to_compile_aggregate_expression_,
const Block & intermediate_header_ = {})
: src_header(src_header_),
intermediate_header(intermediate_header_),
@ -925,14 +934,16 @@ public:
max_bytes_before_external_group_by(max_bytes_before_external_group_by_),
empty_result_for_aggregation_by_empty_set(empty_result_for_aggregation_by_empty_set_),
tmp_volume(tmp_volume_), max_threads(max_threads_),
min_free_disk_space(min_free_disk_space_)
min_free_disk_space(min_free_disk_space_),
compile_aggregate_expressions(compile_aggregate_expressions_),
min_count_to_compile_aggregate_expression(min_count_to_compile_aggregate_expression_)
{
}
/// Only parameters that matter during merge.
Params(const Block & intermediate_header_,
const ColumnNumbers & keys_, const AggregateDescriptions & aggregates_, bool overflow_row_, size_t max_threads_)
: Params(Block(), keys_, aggregates_, overflow_row_, 0, OverflowMode::THROW, 0, 0, 0, false, nullptr, max_threads_, 0)
: Params(Block(), keys_, aggregates_, overflow_row_, 0, OverflowMode::THROW, 0, 0, 0, false, nullptr, max_threads_, 0, false, 0)
{
intermediate_header = intermediate_header_;
}
@ -1074,11 +1085,22 @@ private:
/// For external aggregation.
TemporaryFiles temporary_files;
#if USE_EMBEDDED_COMPILER
std::shared_ptr<CompiledAggregateFunctionsHolder> compiled_aggregate_functions_holder;
#endif
std::vector<bool> is_aggregate_function_compiled;
/** Try to compile aggregate functions.
*/
void compileAggregateFunctions();
/** Select the aggregation method based on the number and types of keys. */
AggregatedDataVariants::Type chooseAggregationMethod();
/** Create states of aggregate functions for one key.
*/
template <bool skip_compiled_aggregate_functions = false>
void createAggregateStates(AggregateDataPtr & aggregate_data) const;
/** Call `destroy` methods for states of aggregate functions.
@ -1099,7 +1121,7 @@ private:
AggregateDataPtr overflow_row) const;
/// Specialization for a particular value no_more_keys.
template <bool no_more_keys, typename Method>
template <bool no_more_keys, bool use_compiled_expressions, typename Method>
void executeImplBatch(
Method & method,
typename Method::State & state,
@ -1136,7 +1158,7 @@ private:
Arena * arena) const;
/// Merge data from hash table `src` into `dst`.
template <typename Method, typename Table>
template <typename Method, bool use_compiled_functions, typename Table>
void mergeDataImpl(
Table & table_dst,
Table & table_src,
@ -1180,7 +1202,7 @@ private:
MutableColumns & final_aggregate_columns,
Arena * arena) const;
template <typename Method, typename Table>
template <typename Method, bool use_compiled_functions, typename Table>
void convertToBlockImplFinal(
Method & method,
Table & data,

View File

@ -1,6 +1,6 @@
#include <Interpreters/AsynchronousMetrics.h>
#include <Interpreters/AsynchronousMetricLog.h>
#include <Interpreters/ExpressionJIT.h>
#include <Interpreters/JIT/CompiledExpressionCache.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/Context.h>
#include <Common/Exception.h>

View File

@ -45,7 +45,6 @@
#include <Access/SettingsConstraints.h>
#include <Access/ExternalAuthenticators.h>
#include <Access/GSSAcceptor.h>
#include <Interpreters/ExpressionJIT.h>
#include <Dictionaries/Embedded/GeoDictionariesLoader.h>
#include <Interpreters/EmbeddedDictionaries.h>
#include <Interpreters/ExternalDictionariesLoader.h>
@ -74,6 +73,7 @@
#include <common/logger_useful.h>
#include <Common/RemoteHostFilter.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/JIT/CompiledExpressionCache.h>
#include <Storages/MergeTree/BackgroundJobsExecutor.h>
#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
#include <filesystem>

View File

@ -267,6 +267,9 @@ private:
/// XXX: move this stuff to shared part instead.
ContextMutablePtr buffer_context; /// Buffer context. Could be equal to this.
/// A flag, used to distinguish between user query and internal query to a database engine (MaterializePostgreSQL).
bool is_internal_query = false;
public:
// Top-level OpenTelemetry trace context for the query. Makes sense only for a query context.
OpenTelemetryTraceContext query_trace_context;
@ -742,6 +745,9 @@ public:
void shutdown();
bool isInternalQuery() const { return is_internal_query; }
void setInternalQuery(bool internal) { is_internal_query = internal; }
ActionLocksManagerPtr getActionLocksManager();
enum class ApplicationType

View File

@ -377,8 +377,8 @@ void DDLWorker::scheduleTasks(bool reinitialized)
/// The following message is too verbose, but it can be useful too debug mysterious test failures in CI
LOG_TRACE(log, "scheduleTasks: initialized={}, size_before_filtering={}, queue_size={}, "
"entries={}..{}, "
"first_failed_task_name={}, current_tasks_size={},"
"last_current_task={},"
"first_failed_task_name={}, current_tasks_size={}, "
"last_current_task={}, "
"last_skipped_entry_name={}",
initialized, size_before_filtering, queue_nodes.size(),
queue_nodes.empty() ? "none" : queue_nodes.front(), queue_nodes.empty() ? "none" : queue_nodes.back(),

View File

@ -28,6 +28,10 @@
# include <Storages/StorageMaterializeMySQL.h>
#endif
#if USE_LIBPQXX
# include <Storages/PostgreSQL/StorageMaterializedPostgreSQL.h>
#endif
namespace fs = std::filesystem;
namespace CurrentMetrics
@ -234,6 +238,13 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
return {};
}
#if USE_LIBPQXX
if (!context_->isInternalQuery() && (db_and_table.first->getEngineName() == "MaterializedPostgreSQL"))
{
db_and_table.second = std::make_shared<StorageMaterializedPostgreSQL>(std::move(db_and_table.second), getContext());
}
#endif
#if USE_MYSQL
/// It's definitely not the best place for this logic, but behaviour must be consistent with DatabaseMaterializeMySQL::tryGetTable(...)
if (db_and_table.first->getEngineName() == "MaterializeMySQL")
@ -245,6 +256,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
return db_and_table;
}
if (table_id.database_name == TEMPORARY_DATABASE)
{
/// For temporary tables UUIDs are set in Context::resolveStorageID(...).

View File

@ -231,7 +231,6 @@ void ExpressionAnalyzer::analyzeAggregation()
if (has_aggregation)
{
/// Find out aggregation keys.
if (select_query)
{
@ -252,6 +251,8 @@ void ExpressionAnalyzer::analyzeAggregation()
/// Constant expressions have non-null column pointer at this stage.
if (node->column && isColumnConst(*node->column))
{
select_query->group_by_with_constant_keys = true;
/// But don't remove last key column if no aggregate functions, otherwise aggregation will not work.
if (!aggregate_descriptions.empty() || size > 1)
{
@ -288,6 +289,11 @@ void ExpressionAnalyzer::analyzeAggregation()
else
aggregated_columns = temp_actions->getNamesAndTypesList();
/// Constant expressions are already removed during first 'analyze' run.
/// So for second `analyze` information is taken from select_query.
if (select_query)
has_const_aggregation_keys = select_query->group_by_with_constant_keys;
for (const auto & desc : aggregate_descriptions)
aggregated_columns.emplace_back(desc.column_name, desc.function->getReturnType());
}

View File

@ -65,6 +65,7 @@ struct ExpressionAnalyzerData
bool has_aggregation = false;
NamesAndTypesList aggregation_keys;
bool has_const_aggregation_keys = false;
AggregateDescriptions aggregate_descriptions;
WindowDescriptions window_descriptions;
@ -309,6 +310,7 @@ public:
bool hasTableJoin() const { return syntax->ast_join; }
const NamesAndTypesList & aggregationKeys() const { return aggregation_keys; }
bool hasConstAggregationKeys() const { return has_const_aggregation_keys; }
const AggregateDescriptions & aggregates() const { return aggregate_descriptions; }
const PreparedSets & getPreparedSets() const { return prepared_sets; }

View File

@ -1,4 +1,6 @@
#include <Interpreters/ExpressionJIT.h>
#if !defined(ARCADIA_BUILD)
# include "config_core.h"
#endif
#if USE_EMBEDDED_COMPILER
@ -20,6 +22,7 @@
#include <Interpreters/JIT/CHJIT.h>
#include <Interpreters/JIT/CompileDAG.h>
#include <Interpreters/JIT/compileFunction.h>
#include <Interpreters/JIT/CompiledExpressionCache.h>
#include <Interpreters/ActionsDAG.h>
namespace DB
@ -42,36 +45,30 @@ static Poco::Logger * getLogger()
return &logger;
}
class CompiledFunction
class CompiledFunctionHolder : public CompiledExpressionCacheEntry
{
public:
CompiledFunction(void * compiled_function_, CHJIT::CompiledModuleInfo module_info_)
: compiled_function(compiled_function_)
, module_info(std::move(module_info_))
explicit CompiledFunctionHolder(CompiledFunction compiled_function_)
: CompiledExpressionCacheEntry(compiled_function_.compiled_module.size)
, compiled_function(compiled_function_)
{}
void * getCompiledFunction() const { return compiled_function; }
~CompiledFunction()
~CompiledFunctionHolder() override
{
getJITInstance().deleteCompiledModule(module_info);
getJITInstance().deleteCompiledModule(compiled_function.compiled_module);
}
private:
void * compiled_function;
CHJIT::CompiledModuleInfo module_info;
CompiledFunction compiled_function;
};
class LLVMExecutableFunction : public IExecutableFunction
{
public:
explicit LLVMExecutableFunction(const std::string & name_, std::shared_ptr<CompiledFunction> compiled_function_)
explicit LLVMExecutableFunction(const std::string & name_, std::shared_ptr<CompiledFunctionHolder> compiled_function_holder_)
: name(name_)
, compiled_function(compiled_function_)
, compiled_function_holder(compiled_function_holder_)
{
}
@ -104,8 +101,8 @@ public:
columns[arguments.size()] = getColumnData(result_column.get());
JITCompiledFunction jit_compiled_function_typed = reinterpret_cast<JITCompiledFunction>(compiled_function->getCompiledFunction());
jit_compiled_function_typed(input_rows_count, columns.data());
auto jit_compiled_function = compiled_function_holder->compiled_function.compiled_function;
jit_compiled_function(input_rows_count, columns.data());
#if defined(MEMORY_SANITIZER)
/// Memory sanitizer don't know about stores from JIT-ed code.
@ -135,7 +132,7 @@ public:
private:
std::string name;
std::shared_ptr<CompiledFunction> compiled_function;
std::shared_ptr<CompiledFunctionHolder> compiled_function_holder;
};
class LLVMFunction : public IFunctionBase
@ -157,9 +154,9 @@ public:
}
}
void setCompiledFunction(std::shared_ptr<CompiledFunction> compiled_function_)
void setCompiledFunction(std::shared_ptr<CompiledFunctionHolder> compiled_function_holder_)
{
compiled_function = compiled_function_;
compiled_function_holder = compiled_function_holder_;
}
bool isCompilable() const override { return true; }
@ -177,10 +174,10 @@ public:
ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override
{
if (!compiled_function)
if (!compiled_function_holder)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Compiled function was not initialized {}", name);
return std::make_unique<LLVMExecutableFunction>(name, compiled_function);
return std::make_unique<LLVMExecutableFunction>(name, compiled_function_holder);
}
bool isDeterministic() const override
@ -269,7 +266,7 @@ private:
CompileDAG dag;
DataTypes argument_types;
std::vector<FunctionBasePtr> nested_functions;
std::shared_ptr<CompiledFunction> compiled_function;
std::shared_ptr<CompiledFunctionHolder> compiled_function_holder;
};
static FunctionBasePtr compile(
@ -293,22 +290,19 @@ static FunctionBasePtr compile(
auto [compiled_function_cache_entry, _] = compilation_cache->getOrSet(hash_key, [&] ()
{
LOG_TRACE(getLogger(), "Compile expression {}", llvm_function->getName());
CHJIT::CompiledModuleInfo compiled_module_info = compileFunction(getJITInstance(), *llvm_function);
auto * compiled_jit_function = getJITInstance().findCompiledFunction(compiled_module_info, llvm_function->getName());
auto compiled_function = std::make_shared<CompiledFunction>(compiled_jit_function, compiled_module_info);
return std::make_shared<CompiledFunctionCacheEntry>(std::move(compiled_function), compiled_module_info.size);
auto compiled_function = compileFunction(getJITInstance(), *llvm_function);
return std::make_shared<CompiledFunctionHolder>(compiled_function);
});
llvm_function->setCompiledFunction(compiled_function_cache_entry->getCompiledFunction());
std::shared_ptr<CompiledFunctionHolder> compiled_function_holder = std::static_pointer_cast<CompiledFunctionHolder>(compiled_function_cache_entry);
llvm_function->setCompiledFunction(std::move(compiled_function_holder));
}
else
{
LOG_TRACE(getLogger(), "Compile expression {}", llvm_function->getName());
CHJIT::CompiledModuleInfo compiled_module_info = compileFunction(getJITInstance(), *llvm_function);
auto * compiled_jit_function = getJITInstance().findCompiledFunction(compiled_module_info, llvm_function->getName());
auto compiled_function = std::make_shared<CompiledFunction>(compiled_jit_function, compiled_module_info);
llvm_function->setCompiledFunction(compiled_function);
auto compiled_function = compileFunction(getJITInstance(), *llvm_function);
auto compiled_function_holder = std::make_shared<CompiledFunctionHolder>(compiled_function);
llvm_function->setCompiledFunction(std::move(compiled_function_holder));
}
return llvm_function;
@ -577,25 +571,6 @@ void ActionsDAG::compileFunctions(size_t min_count_to_compile_expression)
}
}
CompiledExpressionCacheFactory & CompiledExpressionCacheFactory::instance()
{
static CompiledExpressionCacheFactory factory;
return factory;
}
void CompiledExpressionCacheFactory::init(size_t cache_size)
{
if (cache)
throw Exception(ErrorCodes::LOGICAL_ERROR, "CompiledExpressionCache was already initialized");
cache = std::make_unique<CompiledExpressionCache>(cache_size);
}
CompiledExpressionCache * CompiledExpressionCacheFactory::tryGetCache()
{
return cache.get();
}
}
#endif

View File

@ -1,66 +0,0 @@
#pragma once
#if !defined(ARCADIA_BUILD)
# include "config_core.h"
#endif
#if USE_EMBEDDED_COMPILER
# include <Common/LRUCache.h>
# include <Common/HashTable/Hash.h>
namespace DB
{
class CompiledFunction;
class CompiledFunctionCacheEntry
{
public:
CompiledFunctionCacheEntry(std::shared_ptr<CompiledFunction> compiled_function_, size_t compiled_function_size_)
: compiled_function(std::move(compiled_function_))
, compiled_function_size(compiled_function_size_)
{}
std::shared_ptr<CompiledFunction> getCompiledFunction() const { return compiled_function; }
size_t getCompiledFunctionSize() const { return compiled_function_size; }
private:
std::shared_ptr<CompiledFunction> compiled_function;
size_t compiled_function_size;
};
struct CompiledFunctionWeightFunction
{
size_t operator()(const CompiledFunctionCacheEntry & compiled_function) const
{
return compiled_function.getCompiledFunctionSize();
}
};
/** This child of LRUCache breaks one of it's invariants: total weight may be changed after insertion.
* We have to do so, because we don't known real memory consumption of generated LLVM code for every function.
*/
class CompiledExpressionCache : public LRUCache<UInt128, CompiledFunctionCacheEntry, UInt128Hash, CompiledFunctionWeightFunction>
{
public:
using Base = LRUCache<UInt128, CompiledFunctionCacheEntry, UInt128Hash, CompiledFunctionWeightFunction>;
using Base::Base;
};
class CompiledExpressionCacheFactory
{
private:
std::unique_ptr<CompiledExpressionCache> cache;
public:
static CompiledExpressionCacheFactory & instance();
void init(size_t cache_size);
CompiledExpressionCache * tryGetCache();
};
}
#endif

View File

@ -151,7 +151,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, "Unknown database engine: {}", serializeAST(*create.storage));
}
if (create.storage->engine->name == "Atomic" || create.storage->engine->name == "Replicated")
if (create.storage->engine->name == "Atomic" || create.storage->engine->name == "Replicated" || create.storage->engine->name == "MaterializedPostgreSQL")
{
if (create.attach && create.uuid == UUIDHelpers::Nil)
throw Exception(ErrorCodes::INCORRECT_QUERY, "UUID must be specified for ATTACH. "
@ -217,6 +217,12 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
"Enable allow_experimental_database_replicated to use it.", ErrorCodes::UNKNOWN_DATABASE_ENGINE);
}
if (create.storage->engine->name == "MaterializedPostgreSQL" && !getContext()->getSettingsRef().allow_experimental_database_materialized_postgresql && !internal)
{
throw Exception("MaterializedPostgreSQL is an experimental database engine. "
"Enable allow_experimental_database_postgresql_replica to use it.", ErrorCodes::UNKNOWN_DATABASE_ENGINE);
}
DatabasePtr database = DatabaseFactory::get(create, metadata_path / "", getContext());
if (create.uuid != UUIDHelpers::Nil)

View File

@ -20,6 +20,9 @@
# include <Databases/MySQL/DatabaseMaterializeMySQL.h>
#endif
#if USE_LIBPQXX
# include <Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h>
#endif
namespace DB
{
@ -317,6 +320,10 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query,
#endif
if (auto * replicated = typeid_cast<DatabaseReplicated *>(database.get()))
replicated->stopReplication();
#if USE_LIBPQXX
if (auto * materialize_postgresql = typeid_cast<DatabaseMaterializedPostgreSQL *>(database.get()))
materialize_postgresql->stopReplication();
#endif
if (database->shouldBeEmptyOnDetach())
{
@ -398,4 +405,33 @@ void InterpreterDropQuery::extendQueryLogElemImpl(QueryLogElement & elem, const
elem.query_kind = "Drop";
}
void InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind kind, ContextPtr global_context, ContextPtr current_context, const StorageID & target_table_id, bool no_delay)
{
if (DatabaseCatalog::instance().tryGetTable(target_table_id, current_context))
{
/// We create and execute `drop` query for internal table.
auto drop_query = std::make_shared<ASTDropQuery>();
drop_query->database = target_table_id.database_name;
drop_query->table = target_table_id.table_name;
drop_query->kind = kind;
drop_query->no_delay = no_delay;
drop_query->if_exists = true;
ASTPtr ast_drop_query = drop_query;
/// FIXME We have to use global context to execute DROP query for inner table
/// to avoid "Not enough privileges" error if current user has only DROP VIEW ON mat_view_name privilege
/// and not allowed to drop inner table explicitly. Allowing to drop inner table without explicit grant
/// looks like expected behaviour and we have tests for it.
auto drop_context = Context::createCopy(global_context);
drop_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
if (auto txn = current_context->getZooKeeperMetadataTransaction())
{
/// For Replicated database
drop_context->setQueryContext(std::const_pointer_cast<Context>(current_context));
drop_context->initZooKeeperMetadataTransaction(txn, true);
}
InterpreterDropQuery drop_interpreter(ast_drop_query, drop_context);
drop_interpreter.execute();
}
}
}

View File

@ -26,6 +26,8 @@ public:
void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr) const override;
static void executeDropQuery(ASTDropQuery::Kind kind, ContextPtr global_context, ContextPtr current_context, const StorageID & target_table_id, bool no_delay);
private:
AccessRightsElements getRequiredAccessForDDLOnCluster() const;
ASTPtr query_ptr;

View File

@ -506,7 +506,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
result_header = getSampleBlockImpl();
};
analyze(settings.optimize_move_to_prewhere);
analyze(shouldMoveToPrewhere());
bool need_analyze_again = false;
if (analysis_result.prewhere_constant_filter_description.always_false || analysis_result.prewhere_constant_filter_description.always_true)
@ -1532,16 +1532,22 @@ void InterpreterSelectQuery::addEmptySourceToQueryPlan(
}
}
void InterpreterSelectQuery::addPrewhereAliasActions()
bool InterpreterSelectQuery::shouldMoveToPrewhere()
{
const Settings & settings = context->getSettingsRef();
const ASTSelectQuery & query = getSelectQuery();
return settings.optimize_move_to_prewhere && (!query.final() || settings.optimize_move_to_prewhere_if_final);
}
void InterpreterSelectQuery::addPrewhereAliasActions()
{
auto & expressions = analysis_result;
if (expressions.filter_info)
{
if (!expressions.prewhere_info)
{
const bool does_storage_support_prewhere = !input && !input_pipe && storage && storage->supportsPrewhere();
if (does_storage_support_prewhere && settings.optimize_move_to_prewhere)
if (does_storage_support_prewhere && shouldMoveToPrewhere())
{
/// Execute row level filter in prewhere as a part of "move to prewhere" optimization.
expressions.prewhere_info = std::make_shared<PrewhereInfo>(
@ -2035,10 +2041,12 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac
settings.group_by_two_level_threshold,
settings.group_by_two_level_threshold_bytes,
settings.max_bytes_before_external_group_by,
settings.empty_result_for_aggregation_by_empty_set,
settings.empty_result_for_aggregation_by_empty_set || (keys.empty() && query_analyzer->hasConstAggregationKeys()),
context->getTemporaryVolume(),
settings.max_threads,
settings.min_free_disk_space_for_temporary_data);
settings.min_free_disk_space_for_temporary_data,
settings.compile_aggregate_expressions,
settings.min_count_to_compile_aggregate_expression);
SortDescription group_by_sort_description;
@ -2140,7 +2148,9 @@ void InterpreterSelectQuery::executeRollupOrCube(QueryPlan & query_plan, Modific
settings.empty_result_for_aggregation_by_empty_set,
context->getTemporaryVolume(),
settings.max_threads,
settings.min_free_disk_space_for_temporary_data);
settings.min_free_disk_space_for_temporary_data,
settings.compile_aggregate_expressions,
settings.min_count_to_compile_aggregate_expression);
auto transform_params = std::make_shared<AggregatingTransformParams>(params, true);

View File

@ -118,6 +118,7 @@ private:
ASTSelectQuery & getSelectQuery() { return query_ptr->as<ASTSelectQuery &>(); }
void addPrewhereAliasActions();
bool shouldMoveToPrewhere();
Block getSampleBlockImpl();

View File

@ -25,7 +25,7 @@
#include <Interpreters/MetricLog.h>
#include <Interpreters/AsynchronousMetricLog.h>
#include <Interpreters/OpenTelemetrySpanLog.h>
#include <Interpreters/ExpressionJIT.h>
#include <Interpreters/JIT/CompiledExpressionCache.h>
#include <Access/ContextAccess.h>
#include <Access/AllowedClientHosts.h>
#include <Databases/IDatabase.h>

View File

@ -80,6 +80,28 @@ private:
llvm::TargetMachine & target_machine;
};
// class AssemblyPrinter
// {
// public:
// explicit AssemblyPrinter(llvm::TargetMachine &target_machine_)
// : target_machine(target_machine_)
// {
// }
// void print(llvm::Module & module)
// {
// llvm::legacy::PassManager pass_manager;
// target_machine.Options.MCOptions.AsmVerbose = true;
// if (target_machine.addPassesToEmitFile(pass_manager, llvm::errs(), nullptr, llvm::CodeGenFileType::CGFT_AssemblyFile))
// throw Exception(ErrorCodes::CANNOT_COMPILE_CODE, "MachineCode cannot be printed");
// pass_manager.run(module);
// }
// private:
// llvm::TargetMachine & target_machine;
// };
/** MemoryManager for module.
* Keep total allocated size during RuntimeDyld linker execution.
* Actual compiled code memory is stored in llvm::SectionMemoryManager member, we cannot use ZeroBase optimization here
@ -189,7 +211,7 @@ CHJIT::CHJIT()
CHJIT::~CHJIT() = default;
CHJIT::CompiledModuleInfo CHJIT::compileModule(std::function<void (llvm::Module &)> compile_function)
CHJIT::CompiledModule CHJIT::compileModule(std::function<void (llvm::Module &)> compile_function)
{
std::lock_guard<std::mutex> lock(jit_lock);
@ -210,7 +232,7 @@ std::unique_ptr<llvm::Module> CHJIT::createModuleForCompilation()
return module;
}
CHJIT::CompiledModuleInfo CHJIT::compileModule(std::unique_ptr<llvm::Module> module)
CHJIT::CompiledModule CHJIT::compileModule(std::unique_ptr<llvm::Module> module)
{
runOptimizationPassesOnModule(*module);
@ -234,7 +256,7 @@ CHJIT::CompiledModuleInfo CHJIT::compileModule(std::unique_ptr<llvm::Module> mod
dynamic_linker.resolveRelocations();
module_memory_manager->getManager().finalizeMemory();
CompiledModuleInfo module_info;
CompiledModule compiled_module;
for (const auto & function : *module)
{
@ -250,47 +272,29 @@ CHJIT::CompiledModuleInfo CHJIT::compileModule(std::unique_ptr<llvm::Module> mod
throw Exception(ErrorCodes::CANNOT_COMPILE_CODE, "DynamicLinker could not found symbol {} after compilation", function_name);
auto * jit_symbol_address = reinterpret_cast<void *>(jit_symbol.getAddress());
std::string symbol_name = std::to_string(current_module_key) + '_' + function_name;
name_to_symbol[symbol_name] = jit_symbol_address;
module_info.compiled_functions.emplace_back(std::move(function_name));
compiled_module.function_name_to_symbol.emplace(std::move(function_name), jit_symbol_address);
}
module_info.size = module_memory_manager->getAllocatedSize();
module_info.identifier = current_module_key;
compiled_module.size = module_memory_manager->getAllocatedSize();
compiled_module.identifier = current_module_key;
module_identifier_to_memory_manager[current_module_key] = std::move(module_memory_manager);
compiled_code_size.fetch_add(module_info.size, std::memory_order_relaxed);
compiled_code_size.fetch_add(compiled_module.size, std::memory_order_relaxed);
return module_info;
return compiled_module;
}
void CHJIT::deleteCompiledModule(const CHJIT::CompiledModuleInfo & module_info)
void CHJIT::deleteCompiledModule(const CHJIT::CompiledModule & module)
{
std::lock_guard<std::mutex> lock(jit_lock);
auto module_it = module_identifier_to_memory_manager.find(module_info.identifier);
auto module_it = module_identifier_to_memory_manager.find(module.identifier);
if (module_it == module_identifier_to_memory_manager.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no compiled module with identifier {}", module_info.identifier);
for (const auto & function : module_info.compiled_functions)
name_to_symbol.erase(function);
throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no compiled module with identifier {}", module.identifier);
module_identifier_to_memory_manager.erase(module_it);
compiled_code_size.fetch_sub(module_info.size, std::memory_order_relaxed);
}
void * CHJIT::findCompiledFunction(const CompiledModuleInfo & module_info, const std::string & function_name) const
{
std::lock_guard<std::mutex> lock(jit_lock);
std::string symbol_name = std::to_string(module_info.identifier) + '_' + function_name;
auto it = name_to_symbol.find(symbol_name);
if (it != name_to_symbol.end())
return it->second;
return nullptr;
compiled_code_size.fetch_sub(module.size, std::memory_order_relaxed);
}
void CHJIT::registerExternalSymbol(const std::string & symbol_name, void * address)

View File

@ -9,9 +9,9 @@
#include <unordered_map>
#include <atomic>
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/Module.h>
#include <llvm/Target/TargetMachine.h>
#include <llvm/IR/LLVMContext.h> // Y_IGNORE
#include <llvm/IR/Module.h> // Y_IGNORE
#include <llvm/Target/TargetMachine.h> // Y_IGNORE
namespace DB
{
@ -52,32 +52,31 @@ public:
~CHJIT();
struct CompiledModuleInfo
struct CompiledModule
{
/// Size of compiled module code in bytes
size_t size;
/// Module identifier. Should not be changed by client
uint64_t identifier;
/// Vector of compiled function nameds. Should not be changed by client
std::vector<std::string> compiled_functions;
/// Vector of compiled functions. Should not be changed by client.
/// It is client responsibility to cast result function to right signature.
/// After call to deleteCompiledModule compiled functions from module become invalid.
std::unordered_map<std::string, void *> function_name_to_symbol;
};
/** Compile module. In compile function client responsibility is to fill module with necessary
* IR code, then it will be compiled by CHJIT instance.
* Return compiled module info.
* Return compiled module.
*/
CompiledModuleInfo compileModule(std::function<void (llvm::Module &)> compile_function);
CompiledModule compileModule(std::function<void (llvm::Module &)> compile_function);
/** Delete compiled module. Pointers to functions from module become invalid after this call.
* It is client responsibility to be sure that there are no pointers to compiled module code.
*/
void deleteCompiledModule(const CompiledModuleInfo & module_info);
/** Find compiled function using module_info, and function_name.
* It is client responsibility to case result function to right signature.
* After call to deleteCompiledModule compiled functions from module become invalid.
*/
void * findCompiledFunction(const CompiledModuleInfo & module_info, const std::string & function_name) const;
void deleteCompiledModule(const CompiledModule & module_info);
/** Register external symbol for CHJIT instance to use, during linking.
* It can be function, or global constant.
@ -93,7 +92,7 @@ private:
std::unique_ptr<llvm::Module> createModuleForCompilation();
CompiledModuleInfo compileModule(std::unique_ptr<llvm::Module> module);
CompiledModule compileModule(std::unique_ptr<llvm::Module> module);
std::string getMangledName(const std::string & name_to_mangle) const;
@ -107,7 +106,6 @@ private:
std::unique_ptr<JITCompiler> compiler;
std::unique_ptr<JITSymbolResolver> symbol_resolver;
std::unordered_map<std::string, void *> name_to_symbol;
std::unordered_map<uint64_t, std::unique_ptr<JITModuleMemoryManager>> module_identifier_to_memory_manager;
uint64_t current_module_key = 0;
std::atomic<size_t> compiled_code_size = 0;

View File

@ -0,0 +1,34 @@
#include "CompiledExpressionCache.h"
#if USE_EMBEDDED_COMPILER
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
CompiledExpressionCacheFactory & CompiledExpressionCacheFactory::instance()
{
static CompiledExpressionCacheFactory factory;
return factory;
}
void CompiledExpressionCacheFactory::init(size_t cache_size)
{
if (cache)
throw Exception(ErrorCodes::LOGICAL_ERROR, "CompiledExpressionCache was already initialized");
cache = std::make_unique<CompiledExpressionCache>(cache_size);
}
CompiledExpressionCache * CompiledExpressionCacheFactory::tryGetCache()
{
return cache.get();
}
}
#endif

View File

@ -0,0 +1,61 @@
#pragma once
#if !defined(ARCADIA_BUILD)
# include "config_core.h"
#endif
#if USE_EMBEDDED_COMPILER
# include <Common/LRUCache.h>
# include <Common/HashTable/Hash.h>
# include <Interpreters/JIT/CHJIT.h>
namespace DB
{
class CompiledExpressionCacheEntry
{
public:
explicit CompiledExpressionCacheEntry(size_t compiled_expression_size_)
: compiled_expression_size(compiled_expression_size_)
{}
size_t getCompiledExpressionSize() const { return compiled_expression_size; }
virtual ~CompiledExpressionCacheEntry() {}
private:
size_t compiled_expression_size = 0;
};
struct CompiledFunctionWeightFunction
{
size_t operator()(const CompiledExpressionCacheEntry & compiled_function) const
{
return compiled_function.getCompiledExpressionSize();
}
};
class CompiledExpressionCache : public LRUCache<UInt128, CompiledExpressionCacheEntry, UInt128Hash, CompiledFunctionWeightFunction>
{
public:
using Base = LRUCache<UInt128, CompiledExpressionCacheEntry, UInt128Hash, CompiledFunctionWeightFunction>;
using Base::Base;
};
class CompiledExpressionCacheFactory
{
private:
std::unique_ptr<CompiledExpressionCache> cache;
public:
static CompiledExpressionCacheFactory & instance();
void init(size_t cache_size);
CompiledExpressionCache * tryGetCache();
};
}
#endif

View File

@ -250,20 +250,356 @@ static void compileFunction(llvm::Module & module, const IFunctionBase & functio
b.CreateRetVoid();
}
CHJIT::CompiledModuleInfo compileFunction(CHJIT & jit, const IFunctionBase & function)
CompiledFunction compileFunction(CHJIT & jit, const IFunctionBase & function)
{
Stopwatch watch;
auto compiled_module_info = jit.compileModule([&](llvm::Module & module)
auto compiled_module = jit.compileModule([&](llvm::Module & module)
{
compileFunction(module, function);
});
ProfileEvents::increment(ProfileEvents::CompileExpressionsMicroseconds, watch.elapsedMicroseconds());
ProfileEvents::increment(ProfileEvents::CompileExpressionsBytes, compiled_module_info.size);
ProfileEvents::increment(ProfileEvents::CompileExpressionsBytes, compiled_module.size);
ProfileEvents::increment(ProfileEvents::CompileFunction);
return compiled_module_info;
auto compiled_function_ptr = reinterpret_cast<JITCompiledFunction>(compiled_module.function_name_to_symbol[function.getName()]);
assert(compiled_function_ptr);
CompiledFunction result_compiled_function
{
.compiled_function = compiled_function_ptr,
.compiled_module = compiled_module
};
return result_compiled_function;
}
static void compileCreateAggregateStatesFunctions(llvm::Module & module, const std::vector<AggregateFunctionWithOffset> & functions, const std::string & name)
{
auto & context = module.getContext();
llvm::IRBuilder<> b(context);
auto * aggregate_data_places_type = b.getInt8Ty()->getPointerTo();
auto * create_aggregate_states_function_type = llvm::FunctionType::get(b.getVoidTy(), { aggregate_data_places_type }, false);
auto * create_aggregate_states_function = llvm::Function::Create(create_aggregate_states_function_type, llvm::Function::ExternalLinkage, name, module);
auto * arguments = create_aggregate_states_function->args().begin();
llvm::Value * aggregate_data_place_arg = arguments++;
auto * entry = llvm::BasicBlock::Create(b.getContext(), "entry", create_aggregate_states_function);
b.SetInsertPoint(entry);
std::vector<ColumnDataPlaceholder> columns(functions.size());
for (const auto & function_to_compile : functions)
{
size_t aggregate_function_offset = function_to_compile.aggregate_data_offset;
const auto * aggregate_function = function_to_compile.function;
auto * aggregation_place_with_offset = b.CreateConstInBoundsGEP1_32(nullptr, aggregate_data_place_arg, aggregate_function_offset);
aggregate_function->compileCreate(b, aggregation_place_with_offset);
}
b.CreateRetVoid();
}
static void compileAddIntoAggregateStatesFunctions(llvm::Module & module, const std::vector<AggregateFunctionWithOffset> & functions, const std::string & name)
{
auto & context = module.getContext();
llvm::IRBuilder<> b(context);
auto * size_type = b.getIntNTy(sizeof(size_t) * 8);
auto * places_type = b.getInt8Ty()->getPointerTo()->getPointerTo();
auto * column_data_type = llvm::StructType::get(b.getInt8PtrTy(), b.getInt8PtrTy());
auto * aggregate_loop_func_declaration = llvm::FunctionType::get(b.getVoidTy(), { size_type, column_data_type->getPointerTo(), places_type }, false);
auto * aggregate_loop_func_definition = llvm::Function::Create(aggregate_loop_func_declaration, llvm::Function::ExternalLinkage, name, module);
auto * arguments = aggregate_loop_func_definition->args().begin();
llvm::Value * rows_count_arg = arguments++;
llvm::Value * columns_arg = arguments++;
llvm::Value * places_arg = arguments++;
/// Initialize ColumnDataPlaceholder llvm representation of ColumnData
auto * entry = llvm::BasicBlock::Create(b.getContext(), "entry", aggregate_loop_func_definition);
b.SetInsertPoint(entry);
std::vector<ColumnDataPlaceholder> columns;
size_t previous_columns_size = 0;
for (const auto & function : functions)
{
auto argument_types = function.function->getArgumentTypes();
ColumnDataPlaceholder data_placeholder;
size_t function_arguments_size = argument_types.size();
for (size_t column_argument_index = 0; column_argument_index < function_arguments_size; ++column_argument_index)
{
const auto & argument_type = argument_types[column_argument_index];
auto * data = b.CreateLoad(column_data_type, b.CreateConstInBoundsGEP1_32(column_data_type, columns_arg, previous_columns_size + column_argument_index));
data_placeholder.data_init = b.CreatePointerCast(b.CreateExtractValue(data, {0}), toNativeType(b, removeNullable(argument_type))->getPointerTo());
data_placeholder.null_init = argument_type->isNullable() ? b.CreateExtractValue(data, {1}) : nullptr;
columns.emplace_back(data_placeholder);
}
previous_columns_size += function_arguments_size;
}
/// Initialize loop
auto * end = llvm::BasicBlock::Create(b.getContext(), "end", aggregate_loop_func_definition);
auto * loop = llvm::BasicBlock::Create(b.getContext(), "loop", aggregate_loop_func_definition);
b.CreateCondBr(b.CreateICmpEQ(rows_count_arg, llvm::ConstantInt::get(size_type, 0)), end, loop);
b.SetInsertPoint(loop);
auto * counter_phi = b.CreatePHI(rows_count_arg->getType(), 2);
counter_phi->addIncoming(llvm::ConstantInt::get(size_type, 0), entry);
auto * places_phi = b.CreatePHI(places_arg->getType(), 2);
places_phi->addIncoming(places_arg, entry);
for (auto & col : columns)
{
col.data = b.CreatePHI(col.data_init->getType(), 2);
col.data->addIncoming(col.data_init, entry);
if (col.null_init)
{
col.null = b.CreatePHI(col.null_init->getType(), 2);
col.null->addIncoming(col.null_init, entry);
}
}
auto * aggregation_place = b.CreateLoad(b.getInt8Ty()->getPointerTo(), places_phi);
previous_columns_size = 0;
for (const auto & function : functions)
{
size_t aggregate_function_offset = function.aggregate_data_offset;
const auto * aggregate_function_ptr = function.function;
auto arguments_types = function.function->getArgumentTypes();
std::vector<llvm::Value *> arguments_values;
size_t function_arguments_size = arguments_types.size();
arguments_values.resize(function_arguments_size);
for (size_t column_argument_index = 0; column_argument_index < function_arguments_size; ++column_argument_index)
{
auto * column_argument_data = columns[previous_columns_size + column_argument_index].data;
auto * column_argument_null_data = columns[previous_columns_size + column_argument_index].null;
auto & argument_type = arguments_types[column_argument_index];
auto * value = b.CreateLoad(toNativeType(b, removeNullable(argument_type)), column_argument_data);
if (!argument_type->isNullable())
{
arguments_values[column_argument_index] = value;
continue;
}
auto * is_null = b.CreateICmpNE(b.CreateLoad(b.getInt8Ty(), column_argument_null_data), b.getInt8(0));
auto * nullable_unitilized = llvm::Constant::getNullValue(toNativeType(b, argument_type));
auto * nullable_value = b.CreateInsertValue(b.CreateInsertValue(nullable_unitilized, value, {0}), is_null, {1});
arguments_values[column_argument_index] = nullable_value;
}
auto * aggregation_place_with_offset = b.CreateConstInBoundsGEP1_32(nullptr, aggregation_place, aggregate_function_offset);
aggregate_function_ptr->compileAdd(b, aggregation_place_with_offset, arguments_types, arguments_values);
previous_columns_size += function_arguments_size;
}
/// End of loop
auto * cur_block = b.GetInsertBlock();
for (auto & col : columns)
{
col.data->addIncoming(b.CreateConstInBoundsGEP1_32(nullptr, col.data, 1), cur_block);
if (col.null)
col.null->addIncoming(b.CreateConstInBoundsGEP1_32(nullptr, col.null, 1), cur_block);
}
places_phi->addIncoming(b.CreateConstInBoundsGEP1_32(nullptr, places_phi, 1), cur_block);
auto * value = b.CreateAdd(counter_phi, llvm::ConstantInt::get(size_type, 1));
counter_phi->addIncoming(value, cur_block);
b.CreateCondBr(b.CreateICmpEQ(value, rows_count_arg), end, loop);
b.SetInsertPoint(end);
b.CreateRetVoid();
}
static void compileMergeAggregatesStates(llvm::Module & module, const std::vector<AggregateFunctionWithOffset> & functions, const std::string & name)
{
auto & context = module.getContext();
llvm::IRBuilder<> b(context);
auto * aggregate_data_places_type = b.getInt8Ty()->getPointerTo();
auto * aggregate_loop_func_declaration = llvm::FunctionType::get(b.getVoidTy(), { aggregate_data_places_type, aggregate_data_places_type }, false);
auto * aggregate_loop_func = llvm::Function::Create(aggregate_loop_func_declaration, llvm::Function::ExternalLinkage, name, module);
auto * arguments = aggregate_loop_func->args().begin();
llvm::Value * aggregate_data_place_dst_arg = arguments++;
llvm::Value * aggregate_data_place_src_arg = arguments++;
auto * entry = llvm::BasicBlock::Create(b.getContext(), "entry", aggregate_loop_func);
b.SetInsertPoint(entry);
for (const auto & function_to_compile : functions)
{
size_t aggregate_function_offset = function_to_compile.aggregate_data_offset;
const auto * aggregate_function_ptr = function_to_compile.function;
auto * aggregate_data_place_merge_dst_with_offset = b.CreateConstInBoundsGEP1_32(nullptr, aggregate_data_place_dst_arg, aggregate_function_offset);
auto * aggregate_data_place_merge_src_with_offset = b.CreateConstInBoundsGEP1_32(nullptr, aggregate_data_place_src_arg, aggregate_function_offset);
aggregate_function_ptr->compileMerge(b, aggregate_data_place_merge_dst_with_offset, aggregate_data_place_merge_src_with_offset);
}
b.CreateRetVoid();
}
static void compileInsertAggregatesIntoResultColumns(llvm::Module & module, const std::vector<AggregateFunctionWithOffset> & functions, const std::string & name)
{
auto & context = module.getContext();
llvm::IRBuilder<> b(context);
auto * size_type = b.getIntNTy(sizeof(size_t) * 8);
auto * column_data_type = llvm::StructType::get(b.getInt8PtrTy(), b.getInt8PtrTy());
auto * aggregate_data_places_type = b.getInt8Ty()->getPointerTo()->getPointerTo();
auto * aggregate_loop_func_declaration = llvm::FunctionType::get(b.getVoidTy(), { size_type, column_data_type->getPointerTo(), aggregate_data_places_type }, false);
auto * aggregate_loop_func = llvm::Function::Create(aggregate_loop_func_declaration, llvm::Function::ExternalLinkage, name, module);
auto * arguments = aggregate_loop_func->args().begin();
llvm::Value * rows_count_arg = &*arguments++;
llvm::Value * columns_arg = &*arguments++;
llvm::Value * aggregate_data_places_arg = &*arguments++;
auto * entry = llvm::BasicBlock::Create(b.getContext(), "entry", aggregate_loop_func);
b.SetInsertPoint(entry);
std::vector<ColumnDataPlaceholder> columns(functions.size());
for (size_t i = 0; i < functions.size(); ++i)
{
auto return_type = functions[i].function->getReturnType();
auto * data = b.CreateLoad(column_data_type, b.CreateConstInBoundsGEP1_32(column_data_type, columns_arg, i));
columns[i].data_init = b.CreatePointerCast(b.CreateExtractValue(data, {0}), toNativeType(b, removeNullable(return_type))->getPointerTo());
columns[i].null_init = return_type->isNullable() ? b.CreateExtractValue(data, {1}) : nullptr;
}
auto * end = llvm::BasicBlock::Create(b.getContext(), "end", aggregate_loop_func);
auto * loop = llvm::BasicBlock::Create(b.getContext(), "loop", aggregate_loop_func);
b.CreateCondBr(b.CreateICmpEQ(rows_count_arg, llvm::ConstantInt::get(size_type, 0)), end, loop);
b.SetInsertPoint(loop);
auto * counter_phi = b.CreatePHI(rows_count_arg->getType(), 2);
counter_phi->addIncoming(llvm::ConstantInt::get(size_type, 0), entry);
auto * aggregate_data_place_phi = b.CreatePHI(aggregate_data_places_type, 2);
aggregate_data_place_phi->addIncoming(aggregate_data_places_arg, entry);
for (auto & col : columns)
{
col.data = b.CreatePHI(col.data_init->getType(), 2);
col.data->addIncoming(col.data_init, entry);
if (col.null_init)
{
col.null = b.CreatePHI(col.null_init->getType(), 2);
col.null->addIncoming(col.null_init, entry);
}
}
for (size_t i = 0; i < functions.size(); ++i)
{
size_t aggregate_function_offset = functions[i].aggregate_data_offset;
const auto * aggregate_function_ptr = functions[i].function;
auto * aggregate_data_place = b.CreateLoad(b.getInt8Ty()->getPointerTo(), aggregate_data_place_phi);
auto * aggregation_place_with_offset = b.CreateConstInBoundsGEP1_32(nullptr, aggregate_data_place, aggregate_function_offset);
auto * final_value = aggregate_function_ptr->compileGetResult(b, aggregation_place_with_offset);
if (columns[i].null_init)
{
b.CreateStore(b.CreateExtractValue(final_value, {0}), columns[i].data);
b.CreateStore(b.CreateSelect(b.CreateExtractValue(final_value, {1}), b.getInt8(1), b.getInt8(0)), columns[i].null);
}
else
{
b.CreateStore(final_value, columns[i].data);
}
}
/// End of loop
auto * cur_block = b.GetInsertBlock();
for (auto & col : columns)
{
col.data->addIncoming(b.CreateConstInBoundsGEP1_32(nullptr, col.data, 1), cur_block);
if (col.null)
col.null->addIncoming(b.CreateConstInBoundsGEP1_32(nullptr, col.null, 1), cur_block);
}
auto * value = b.CreateAdd(counter_phi, llvm::ConstantInt::get(size_type, 1), "", true, true);
counter_phi->addIncoming(value, cur_block);
aggregate_data_place_phi->addIncoming(b.CreateConstInBoundsGEP1_32(nullptr, aggregate_data_place_phi, 1), cur_block);
b.CreateCondBr(b.CreateICmpEQ(value, rows_count_arg), end, loop);
b.SetInsertPoint(end);
b.CreateRetVoid();
}
CompiledAggregateFunctions compileAggregateFunctons(CHJIT & jit, const std::vector<AggregateFunctionWithOffset> & functions, std::string functions_dump_name)
{
std::string create_aggregate_states_functions_name = functions_dump_name + "_create";
std::string add_aggregate_states_functions_name = functions_dump_name + "_add";
std::string merge_aggregate_states_functions_name = functions_dump_name + "_merge";
std::string insert_aggregate_states_functions_name = functions_dump_name + "_insert";
auto compiled_module = jit.compileModule([&](llvm::Module & module)
{
compileCreateAggregateStatesFunctions(module, functions, create_aggregate_states_functions_name);
compileAddIntoAggregateStatesFunctions(module, functions, add_aggregate_states_functions_name);
compileMergeAggregatesStates(module, functions, merge_aggregate_states_functions_name);
compileInsertAggregatesIntoResultColumns(module, functions, insert_aggregate_states_functions_name);
});
auto create_aggregate_states_function = reinterpret_cast<JITCreateAggregateStatesFunction>(compiled_module.function_name_to_symbol[create_aggregate_states_functions_name]);
auto add_into_aggregate_states_function = reinterpret_cast<JITAddIntoAggregateStatesFunction>(compiled_module.function_name_to_symbol[add_aggregate_states_functions_name]);
auto merge_aggregate_states_function = reinterpret_cast<JITMergeAggregateStatesFunction>(compiled_module.function_name_to_symbol[merge_aggregate_states_functions_name]);
auto insert_aggregate_states_function = reinterpret_cast<JITInsertAggregateStatesIntoColumnsFunction>(compiled_module.function_name_to_symbol[insert_aggregate_states_functions_name]);
assert(create_aggregate_states_function);
assert(add_into_aggregate_states_function);
assert(merge_aggregate_states_function);
assert(insert_aggregate_states_function);
CompiledAggregateFunctions compiled_aggregate_functions
{
.create_aggregate_states_function = create_aggregate_states_function,
.add_into_aggregate_states_function = add_into_aggregate_states_function,
.merge_aggregate_states_function = merge_aggregate_states_function,
.insert_aggregates_into_columns_function = insert_aggregate_states_function,
.functions_count = functions.size(),
.compiled_module = std::move(compiled_module)
};
return compiled_aggregate_functions;
}
}

View File

@ -7,6 +7,7 @@
#if USE_EMBEDDED_COMPILER
#include <Functions/IFunction.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <Interpreters/JIT/CHJIT.h>
namespace DB
@ -28,18 +29,56 @@ struct ColumnData
ColumnData getColumnData(const IColumn * column);
using ColumnDataRowsSize = size_t;
using JITCompiledFunction = void (*)(ColumnDataRowsSize, ColumnData *);
struct CompiledFunction
{
JITCompiledFunction compiled_function;
CHJIT::CompiledModule compiled_module;
};
/** Compile function to native jit code using CHJIT instance.
* Function is compiled as single module.
* After this function execution, code for function will be compiled and can be queried using
* findCompiledFunction with function name.
* Compiled function can be safely casted to JITCompiledFunction type and must be called with
* valid ColumnData and ColumnDataRowsSize.
* It is important that ColumnData parameter of JITCompiledFunction is result column,
* and will be filled by compiled function.
* It is client responsibility to match ColumnData arguments size with
* function arguments size and additional ColumnData for result.
*/
CHJIT::CompiledModuleInfo compileFunction(CHJIT & jit, const IFunctionBase & function);
CompiledFunction compileFunction(CHJIT & jit, const IFunctionBase & function);
struct AggregateFunctionWithOffset
{
const IAggregateFunction * function;
size_t aggregate_data_offset;
};
using JITCreateAggregateStatesFunction = void (*)(AggregateDataPtr);
using JITAddIntoAggregateStatesFunction = void (*)(ColumnDataRowsSize, ColumnData *, AggregateDataPtr *);
using JITMergeAggregateStatesFunction = void (*)(AggregateDataPtr, AggregateDataPtr);
using JITInsertAggregateStatesIntoColumnsFunction = void (*)(ColumnDataRowsSize, ColumnData *, AggregateDataPtr *);
struct CompiledAggregateFunctions
{
JITCreateAggregateStatesFunction create_aggregate_states_function;
JITAddIntoAggregateStatesFunction add_into_aggregate_states_function;
JITMergeAggregateStatesFunction merge_aggregate_states_function;
JITInsertAggregateStatesIntoColumnsFunction insert_aggregates_into_columns_function;
/// Count of functions that were compiled
size_t functions_count;
/// Compiled module. It is client responsibility to destroy it after functions are no longer required.
CHJIT::CompiledModule compiled_module;
};
/** Compile aggregate function to native jit code using CHJIT instance.
*
* JITCreateAggregateStatesFunction will initialize aggregate data ptr with initial aggregate states values.
* JITAddIntoAggregateStatesFunction will update aggregate states for aggregate functions with specified ColumnData.
* JITMergeAggregateStatesFunction will merge aggregate states for aggregate functions.
* JITInsertAggregateStatesIntoColumnsFunction will insert aggregate states for aggregate functions into result columns.
*/
CompiledAggregateFunctions compileAggregateFunctons(CHJIT & jit, const std::vector<AggregateFunctionWithOffset> & functions, std::string functions_dump_name);
}

View File

@ -5,6 +5,8 @@
#include <Interpreters/InJoinSubqueriesPreprocessor.h>
#include <Interpreters/TableJoin.h>
#include <Interpreters/getTableExpressions.h>
#include <Parsers/ASTAsterisk.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTQualifiedAsterisk.h>
@ -12,8 +14,7 @@
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ParserTablesInSelectQuery.h>
#include <Parsers/parseQuery.h>
#include <Storages/ColumnsDescription.h>
#include <Storages/IStorage.h>
#include <Storages/StorageDictionary.h>
@ -33,6 +34,23 @@ namespace ErrorCodes
namespace
{
template <typename T, typename ... Args>
std::shared_ptr<T> addASTChildrenTo(IAST & node, ASTPtr & children, Args && ... args)
{
auto new_children = std::make_shared<T>(std::forward<Args>(args)...);
children = new_children;
node.children.push_back(children);
return new_children;
}
template <typename T>
std::shared_ptr<T> addASTChildren(IAST & node)
{
auto children = std::make_shared<T>();
node.children.push_back(children);
return children;
}
void replaceJoinedTable(const ASTSelectQuery & select_query)
{
const ASTTablesInSelectQueryElement * join = select_query.join();
@ -48,15 +66,30 @@ void replaceJoinedTable(const ASTSelectQuery & select_query)
if (table_expr.database_and_table_name)
{
const auto & table_id = table_expr.database_and_table_name->as<ASTTableIdentifier &>();
String expr = "(SELECT * FROM " + backQuote(table_id.name()) + ") AS " + backQuote(table_id.shortName());
String table_name = table_id.name();
String table_short_name = table_id.shortName();
// FIXME: since the expression "a as b" exposes both "a" and "b" names, which is not equivalent to "(select * from a) as b",
// we can't replace aliased tables.
// FIXME: long table names include database name, which we can't save within alias.
if (table_id.alias.empty() && table_id.isShort())
{
ParserTableExpression parser;
table_expr = parseQuery(parser, expr, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH)->as<ASTTableExpression &>();
/// Build query of form '(SELECT * FROM table_name) AS table_short_name'
table_expr = ASTTableExpression();
auto subquery = addASTChildrenTo<ASTSubquery>(table_expr, table_expr.subquery);
subquery->setAlias(table_short_name);
auto sub_select_with_union = addASTChildren<ASTSelectWithUnionQuery>(*subquery);
auto list_of_selects = addASTChildrenTo<ASTExpressionList>(*sub_select_with_union, sub_select_with_union->list_of_selects);
auto new_select = addASTChildren<ASTSelectQuery>(*list_of_selects);
new_select->setExpression(ASTSelectQuery::Expression::SELECT, std::make_shared<ASTExpressionList>());
addASTChildren<ASTAsterisk>(*new_select->select());
new_select->setExpression(ASTSelectQuery::Expression::TABLES, std::make_shared<ASTTablesInSelectQuery>());
auto tables_elem = addASTChildren<ASTTablesInSelectQueryElement>(*new_select->tables());
auto sub_table_expr = addASTChildrenTo<ASTTableExpression>(*tables_elem, tables_elem->table_expression);
addASTChildrenTo<ASTTableIdentifier>(*sub_table_expr, sub_table_expr->database_and_table_name, table_name);
}
}
}

Some files were not shown because too many files have changed in this diff Show More