Merge remote-tracking branch 'upstream/master' into fix25

This commit is contained in:
proller 2019-08-15 12:02:07 +00:00
commit 4b8564efa0
35 changed files with 435 additions and 201 deletions

View File

@ -2,7 +2,7 @@
#include <Poco/Logger.h> #include <Poco/Logger.h>
#include <DataStreams/IBlockInputStream.h> #include <DataStreams/IBlockInputStream.h>
#include <Interpreters/ExpressionAnalyzer.h> /// SubqueriesForSets #include <Interpreters/SubqueryForSet.h>
namespace Poco { class Logger; } namespace Poco { class Logger; }

View File

@ -1,5 +1,6 @@
#include "LibraryDictionarySource.h" #include "LibraryDictionarySource.h"
#include <DataStreams/OneBlockInputStream.h> #include <DataStreams/OneBlockInputStream.h>
#include <Core/Defines.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <Poco/File.h> #include <Poco/File.h>
#include <common/logger_useful.h> #include <common/logger_useful.h>
@ -134,7 +135,7 @@ LibraryDictionarySource::LibraryDictionarySource(
ErrorCodes::FILE_DOESNT_EXIST); ErrorCodes::FILE_DOESNT_EXIST);
description.init(sample_block); description.init(sample_block);
library = std::make_shared<SharedLibrary>(path, RTLD_LAZY library = std::make_shared<SharedLibrary>(path, RTLD_LAZY
#if defined(RTLD_DEEPBIND) // Does not exists in freebsd #if defined(RTLD_DEEPBIND) && !defined(ADDRESS_SANITIZER) // Does not exists in FreeBSD. Cannot work with Address Sanitizer.
| RTLD_DEEPBIND | RTLD_DEEPBIND
#endif #endif
); );

View File

@ -83,7 +83,17 @@ BlockInputStreamPtr FormatFactory::getInput(
return std::make_shared<NativeBlockInputStream>(buf, sample, 0); return std::make_shared<NativeBlockInputStream>(buf, sample, 0);
if (!getCreators(name).input_processor_creator) if (!getCreators(name).input_processor_creator)
return getInput(name, buf, sample, context, max_block_size, rows_portion_size, std::move(callback)); {
const auto & input_getter = getCreators(name).inout_creator;
if (!input_getter)
throw Exception("Format " + name + " is not suitable for input", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_INPUT);
const Settings & settings = context.getSettingsRef();
FormatSettings format_settings = getInputFormatSetting(settings);
return input_getter(
buf, sample, context, max_block_size, rows_portion_size, callback ? callback : ReadCallback(), format_settings);
}
auto format = getInputFormat(name, buf, sample, context, max_block_size, rows_portion_size, std::move(callback)); auto format = getInputFormat(name, buf, sample, context, max_block_size, rows_portion_size, std::move(callback));
return std::make_shared<InputStreamFromInputFormat>(std::move(format)); return std::make_shared<InputStreamFromInputFormat>(std::move(format));
@ -106,13 +116,22 @@ BlockOutputStreamPtr FormatFactory::getOutput(const String & name, WriteBuffer &
} }
if (!getCreators(name).output_processor_creator) if (!getCreators(name).output_processor_creator)
return getOutput(name, buf, sample, context); {
const auto & output_getter = getCreators(name).output_creator;
if (!output_getter)
throw Exception("Format " + name + " is not suitable for output", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT);
const Settings & settings = context.getSettingsRef();
FormatSettings format_settings = getOutputFormatSetting(settings);
/** Materialization is needed, because formats can use the functions `IDataType`,
* which only work with full columns.
*/
return std::make_shared<MaterializingBlockOutputStream>(
output_getter(buf, sample, context, format_settings), sample);
}
auto format = getOutputFormat(name, buf, sample, context); auto format = getOutputFormat(name, buf, sample, context);
/** Materialization is needed, because formats can use the functions `IDataType`,
* which only work with full columns.
*/
return std::make_shared<MaterializingBlockOutputStream>(std::make_shared<OutputStreamToOutputFormat>(format), sample); return std::make_shared<MaterializingBlockOutputStream>(std::make_shared<OutputStreamToOutputFormat>(format), sample);
} }

View File

@ -0,0 +1,54 @@
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <Interpreters/Context.h>
#include <DataTypes/DataTypeString.h>
namespace DB
{
class FunctionCurrentUser : public IFunction
{
const String user_name;
public:
static constexpr auto name = "currentUser";
static FunctionPtr create(const Context & context)
{
return std::make_shared<FunctionCurrentUser>(context.getClientInfo().initial_user);
}
explicit FunctionCurrentUser(const String & user_name_) : user_name{user_name_}
{
}
String getName() const override
{
return name;
}
size_t getNumberOfArguments() const override
{
return 0;
}
DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
{
return std::make_shared<DataTypeString>();
}
bool isDeterministic() const override { return false; }
void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override
{
block.getByPosition(result).column = DataTypeString().createColumnConst(input_rows_count, user_name);
}
};
void registerFunctionCurrentUser(FunctionFactory & factory)
{
factory.registerFunction<FunctionCurrentUser>();
factory.registerAlias("user", FunctionCurrentUser::name, FunctionFactory::CaseInsensitive);
}
}

View File

@ -6,6 +6,7 @@ namespace DB
class FunctionFactory; class FunctionFactory;
void registerFunctionCurrentDatabase(FunctionFactory &); void registerFunctionCurrentDatabase(FunctionFactory &);
void registerFunctionCurrentUser(FunctionFactory &);
void registerFunctionHostName(FunctionFactory &); void registerFunctionHostName(FunctionFactory &);
void registerFunctionVisibleWidth(FunctionFactory &); void registerFunctionVisibleWidth(FunctionFactory &);
void registerFunctionToTypeName(FunctionFactory &); void registerFunctionToTypeName(FunctionFactory &);
@ -56,6 +57,7 @@ void registerFunctionConvertCharset(FunctionFactory &);
void registerFunctionsMiscellaneous(FunctionFactory & factory) void registerFunctionsMiscellaneous(FunctionFactory & factory)
{ {
registerFunctionCurrentDatabase(factory); registerFunctionCurrentDatabase(factory);
registerFunctionCurrentUser(factory);
registerFunctionHostName(factory); registerFunctionHostName(factory);
registerFunctionVisibleWidth(factory); registerFunctionVisibleWidth(factory);
registerFunctionToTypeName(factory); registerFunctionToTypeName(factory);

View File

@ -54,7 +54,6 @@ struct ScopeStack
/// Collect ExpressionAction from AST. Returns PreparedSets and SubqueriesForSets too. /// Collect ExpressionAction from AST. Returns PreparedSets and SubqueriesForSets too.
/// After AST is visited source ExpressionActions should be updated with popActionsLevel() method.
class ActionsVisitor class ActionsVisitor
{ {
public: public:
@ -63,9 +62,11 @@ public:
PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_, PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_,
bool no_subqueries_, bool only_consts_, bool no_storage_or_local_, std::ostream * ostr_ = nullptr); bool no_subqueries_, bool only_consts_, bool no_storage_or_local_, std::ostream * ostr_ = nullptr);
void visit(const ASTPtr & ast); void visit(const ASTPtr & ast, ExpressionActionsPtr & actions)
{
ExpressionActionsPtr popActionsLevel() { return actions_stack.popLevel(); } visit(ast);
actions = actions_stack.popLevel();
}
private: private:
const Context & context; const Context & context;
@ -81,6 +82,7 @@ private:
std::ostream * ostr; std::ostream * ostr;
ScopeStack actions_stack; ScopeStack actions_stack;
void visit(const ASTPtr & ast);
SetPtr makeSet(const ASTFunction * node, const Block & sample_block); SetPtr makeSet(const ASTFunction * node, const Block & sample_block);
}; };

View File

@ -320,31 +320,10 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_subqueries,
ActionsVisitor actions_visitor(context, settings.size_limits_for_set, subquery_depth, ActionsVisitor actions_visitor(context, settings.size_limits_for_set, subquery_depth,
sourceColumns(), actions, prepared_sets, subqueries_for_sets, sourceColumns(), actions, prepared_sets, subqueries_for_sets,
no_subqueries, only_consts, !isRemoteStorage(), log.stream()); no_subqueries, only_consts, !isRemoteStorage(), log.stream());
actions_visitor.visit(ast); actions_visitor.visit(ast, actions);
actions = actions_visitor.popActionsLevel();
} }
void ExpressionAnalyzer::getActionsFromJoinKeys(const ASTTableJoin & table_join, bool no_subqueries, ExpressionActionsPtr & actions)
{
bool only_consts = false;
LogAST log;
ActionsVisitor actions_visitor(context, settings.size_limits_for_set, subquery_depth,
sourceColumns(), actions, prepared_sets, subqueries_for_sets,
no_subqueries, only_consts, !isRemoteStorage(), log.stream());
if (table_join.using_expression_list)
actions_visitor.visit(table_join.using_expression_list);
else if (table_join.on_expression)
{
for (const auto & ast : analyzedJoin().key_asts_left)
actions_visitor.visit(ast);
}
actions = actions_visitor.popActionsLevel();
}
bool ExpressionAnalyzer::makeAggregateDescriptions(ExpressionActionsPtr & actions) bool ExpressionAnalyzer::makeAggregateDescriptions(ExpressionActionsPtr & actions)
{ {
for (const ASTFunction * node : aggregates()) for (const ASTFunction * node : aggregates())
@ -479,9 +458,18 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty
throw Exception("Expected ANY or ALL in JOIN section, because setting (join_default_strictness) is empty", DB::ErrorCodes::EXPECTED_ALL_OR_ANY); throw Exception("Expected ANY or ALL in JOIN section, because setting (join_default_strictness) is empty", DB::ErrorCodes::EXPECTED_ALL_OR_ANY);
} }
const auto & table_to_join = join_element.table_expression->as<ASTTableExpression &>(); if (join_params.using_expression_list)
{
getRootActions(join_params.using_expression_list, only_types, step.actions);
}
else if (join_params.on_expression)
{
auto list = std::make_shared<ASTExpressionList>();
list->children = analyzedJoin().key_asts_left;
getRootActions(list, only_types, step.actions);
}
getActionsFromJoinKeys(join_params, only_types, step.actions); const auto & table_to_join = join_element.table_expression->as<ASTTableExpression &>();
/// Two JOINs are not supported with the same subquery, but different USINGs. /// Two JOINs are not supported with the same subquery, but different USINGs.
auto join_hash = join_element.getTreeHash(); auto join_hash = join_element.getTreeHash();

View File

@ -2,9 +2,9 @@
#include <Core/Settings.h> #include <Core/Settings.h>
#include <DataStreams/IBlockStream_fwd.h> #include <DataStreams/IBlockStream_fwd.h>
#include <Interpreters/ActionsVisitor.h>
#include <Interpreters/AggregateDescription.h> #include <Interpreters/AggregateDescription.h>
#include <Interpreters/SyntaxAnalyzer.h> #include <Interpreters/SyntaxAnalyzer.h>
#include <Interpreters/SubqueryForSet.h>
#include <Parsers/IAST_fwd.h> #include <Parsers/IAST_fwd.h>
#include <Storages/IStorage_fwd.h> #include <Storages/IStorage_fwd.h>
@ -183,9 +183,6 @@ private:
void addJoinAction(ExpressionActionsPtr & actions, bool only_types) const; void addJoinAction(ExpressionActionsPtr & actions, bool only_types) const;
/// If ast is ASTSelectQuery with JOIN, add actions for JOIN key columns.
void getActionsFromJoinKeys(const ASTTableJoin & table_join, bool no_subqueries, ExpressionActionsPtr & actions);
void getRootActions(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts = false); void getRootActions(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts = false);
/** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions, /** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions,

View File

@ -7,7 +7,6 @@
#include <Parsers/ASTSelectQuery.h> #include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTIdentifier.h> #include <Parsers/ASTIdentifier.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <Interpreters/ActionsVisitor.h>
#include <Interpreters/interpretSubquery.h> #include <Interpreters/interpretSubquery.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
#include <Core/Block.h> #include <Core/Block.h>

View File

@ -1,7 +1,6 @@
#pragma once #pragma once
#include "DatabaseAndTableWithAlias.h" #include "DatabaseAndTableWithAlias.h"
#include "ExpressionAnalyzer.h"
#include <Parsers/ASTSelectQuery.h> #include <Parsers/ASTSelectQuery.h>
#include <map> #include <map>

View File

@ -12,7 +12,6 @@ class Join;
using JoinPtr = std::shared_ptr<Join>; using JoinPtr = std::shared_ptr<Join>;
class InterpreterSelectWithUnionQuery; class InterpreterSelectWithUnionQuery;
struct AnalyzedJoin;
/// Information on what to do when executing a subquery in the [GLOBAL] IN/JOIN section. /// Information on what to do when executing a subquery in the [GLOBAL] IN/JOIN section.

View File

@ -15,6 +15,7 @@
#include <Interpreters/OptimizeIfWithConstantConditionVisitor.h> #include <Interpreters/OptimizeIfWithConstantConditionVisitor.h>
#include <Interpreters/RequiredSourceColumnsVisitor.h> #include <Interpreters/RequiredSourceColumnsVisitor.h>
#include <Interpreters/GetAggregatesVisitor.h> #include <Interpreters/GetAggregatesVisitor.h>
#include <Interpreters/ExpressionActions.h> /// getSmallestColumn()
#include <Parsers/ASTExpressionList.h> #include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h> #include <Parsers/ASTFunction.h>

View File

@ -177,7 +177,7 @@ private:
/// Prepare processor with pid number. /// Prepare processor with pid number.
/// Check parents and children of current processor and push them to stacks if they also need to be prepared. /// Check parents and children of current processor and push them to stacks if they also need to be prepared.
/// If processor wants to be expanded, ExpandPipelineTask from thread_number's execution context will be used. /// If processor wants to be expanded, ExpandPipelineTask from thread_number's execution context will be used.
bool prepareProcessor(size_t pid, Stack & children, Stack & parents, size_t thread_number, bool async); bool prepareProcessor(UInt64 pid, Stack & children, Stack & parents, size_t thread_number, bool async);
void doExpandPipeline(ExpandPipelineTask * task, bool processing); void doExpandPipeline(ExpandPipelineTask * task, bool processing);
void executeImpl(size_t num_threads); void executeImpl(size_t num_threads);

View File

@ -16,6 +16,7 @@
#include <Parsers/ASTLiteral.h> #include <Parsers/ASTLiteral.h>
#include <Parsers/ASTFunction.h> #include <Parsers/ASTFunction.h>
#include <Parsers/ASTSampleRatio.h> #include <Parsers/ASTSampleRatio.h>
#include <Interpreters/ExpressionAnalyzer.h>
/// Allow to use __uint128_t as a template parameter for boost::rational. /// Allow to use __uint128_t as a template parameter for boost::rational.
// https://stackoverflow.com/questions/41198673/uint128-t-not-working-with-clang-and-libstdc // https://stackoverflow.com/questions/41198673/uint128-t-not-working-with-clang-and-libstdc

View File

@ -4,7 +4,6 @@
#include <Core/Block.h> #include <Core/Block.h>
#include <DataTypes/DataTypesNumber.h> #include <DataTypes/DataTypesNumber.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Parsers/ASTExpressionList.h> #include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTSelectQuery.h> #include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTFunction.h> #include <Parsers/ASTFunction.h>

View File

@ -0,0 +1,4 @@
1
1
1 1
1

View File

@ -0,0 +1,5 @@
-- since actual user name is unknown, have to perform just smoke tests
select currentUser() IS NOT NULL;
select length(currentUser()) > 0;
select currentUser() = user(), currentUser() = USER();
select currentUser() = initial_user from system.processes where query like '%$!@#%';

View File

@ -0,0 +1 @@
SELECT * FROM url('http://127.0.0.1:1337/? HTTP/1.1\r\nTest: test', CSV, 'column1 String'); -- { serverError 1000 }

View File

@ -1,9 +1,11 @@
# Database Engines # Database Engines
Database engines provide working with tables. Database engines allow you to work with tables.
By default, ClickHouse uses its native database engine which provides configurable [table engines](../operations/table_engines/index.md) and [SQL dialect](../query_language/syntax.md). By default, ClickHouse uses its native database engine, which provides configurable [table engines](../operations/table_engines/index.md) and an [SQL dialect](../query_language/syntax.md).
Also you can use the following database engines: You can also use the following database engines:
- [MySQL](mysql.md) - [MySQL](mysql.md)
[Original article](https://clickhouse.yandex/docs/en/database_engines/) <!--hide-->

View File

@ -1,10 +1,10 @@
# MySQL # MySQL
Allows to connect to some database on remote MySQL server and perform `INSERT` and `SELECT` queries with tables to exchange data between ClickHouse and MySQL. Allows to connect to databases on a remote MySQL server and perform `INSERT` and `SELECT` queries with tables to exchange data between ClickHouse and MySQL.
The `MySQL` database engine translate queries to the MySQL server, so you can perform operations such as `SHOW TABLES` or `SHOW CREATE TABLE`. The `MySQL` database engine translate queries to the MySQL server so you can perform operations such as `SHOW TABLES` or `SHOW CREATE TABLE`.
You cannot perform with tables the following queries: You cannot perform the following queries:
- `ATTACH`/`DETACH` - `ATTACH`/`DETACH`
- `DROP` - `DROP`
@ -48,7 +48,7 @@ BINARY | [FixedString](../data_types/fixedstring.md)
All other MySQL data types are converted into [String](../data_types/string.md). All other MySQL data types are converted into [String](../data_types/string.md).
[Nullable](../data_types/nullable.md) data type is supported. [Nullable](../data_types/nullable.md) is supported.
## Examples of Use ## Examples of Use
@ -120,3 +120,5 @@ SELECT * FROM mysql_db.mysql_table
│ 3 │ 4 │ │ 3 │ 4 │
└────────┴───────┘ └────────┴───────┘
``` ```
[Original article](https://clickhouse.yandex/docs/en/database_engines/mysql/) <!--hide-->

View File

@ -65,6 +65,31 @@ You can cancel a long query by pressing Ctrl+C. However, you will still need to
The command-line client allows passing external data (external temporary tables) for querying. For more information, see the section "External data for query processing". The command-line client allows passing external data (external temporary tables) for querying. For more information, see the section "External data for query processing".
### Queries with Parameters {#cli-queries-with-parameters}
You can create a query with parameters, and pass values for these parameters with the parameters of the client app. For example:
```bash
clickhouse-client --param_parName="[1, 2]" -q "SELECT * FROM table WHERE a = {parName:Array(UInt16)}"
```
#### Syntax of a Query {#cli-queries-with-parameters-syntax}
Format a query by the standard method. Values that you want to put into the query from the app parameters place in braces and format as follows:
```
{<name>:<data type>}
```
- `name` — Identifier of a placeholder that should be used in app parameter as `--param_name = value`.
- `data type` — A data type of app parameter value. For example, data structure like `(integer, ('string', integer))` can have a data type `Tuple(UInt8, Tuple(String, UInt8))` (also you can use another [integer](../data_types/int_uint.md) types).
#### Example
```bash
clickhouse-client --param_tuple_in_tuple="(10, ('dt', 10))" -q "SELECT * FROM table WHERE val = {tuple_in_tuple:Tuple(UInt8, Tuple(String, UInt8))}"
```
## Configuring {#interfaces_cli_configuration} ## Configuring {#interfaces_cli_configuration}
You can pass parameters to `clickhouse-client` (all parameters have a default value) using: You can pass parameters to `clickhouse-client` (all parameters have a default value) using:

View File

@ -244,5 +244,14 @@ curl -sS 'http://localhost:8123/?max_result_bytes=4000000&buffer_size=3000000&wa
Use buffering to avoid situations where a query processing error occurred after the response code and HTTP headers were sent to the client. In this situation, an error message is written at the end of the response body, and on the client side, the error can only be detected at the parsing stage. Use buffering to avoid situations where a query processing error occurred after the response code and HTTP headers were sent to the client. In this situation, an error message is written at the end of the response body, and on the client side, the error can only be detected at the parsing stage.
### Queries with Parameters {#cli-queries-with-parameters}
You can create a query with parameters, and pass values for these parameters with the parameters of the HTTP request. For more information, see [CLI Formatted Queries](cli.md#cli-queries-with-parameters).
### Example
```bash
curl -sS "<address>?param_id=2&param_phrase=test" -d "SELECT * FROM table WHERE int_column = {id:UInt8} and string_column = {phrase:String}"
```
[Original article](https://clickhouse.yandex/docs/en/interfaces/http_interface/) <!--hide--> [Original article](https://clickhouse.yandex/docs/en/interfaces/http_interface/) <!--hide-->

View File

@ -28,7 +28,7 @@ Main features:
## Creating a Table {#table_engine-mergetree-creating-a-table} ## Creating a Table {#table_engine-mergetree-creating-a-table}
``` ```sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
( (
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1], name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
@ -47,7 +47,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
For descriptions of request parameters, see the [request description](../../query_language/create.md). For descriptions of request parameters, see the [request description](../../query_language/create.md).
**Query clauses** ### Query Clauses
- `ENGINE` — Name and parameters of the engine. `ENGINE = MergeTree()`. The `MergeTree` engine does not have parameters. - `ENGINE` — Name and parameters of the engine. `ENGINE = MergeTree()`. The `MergeTree` engine does not have parameters.
@ -81,7 +81,7 @@ For descriptions of request parameters, see the [request description](../../quer
<a name="mergetree_setting-merge_with_ttl_timeout"></a> <a name="mergetree_setting-merge_with_ttl_timeout"></a>
- `merge_with_ttl_timeout` — Minimum delay in seconds before repeating a merge with TTL. Default value: 86400 (1 day). - `merge_with_ttl_timeout` — Minimum delay in seconds before repeating a merge with TTL. Default value: 86400 (1 day).
**Example of setting the sections** **Example of Sections Setting**
```sql ```sql
ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity=8192 ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity=8192
@ -107,7 +107,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
) ENGINE [=] MergeTree(date-column [, sampling_expression], (primary, key), index_granularity) ) ENGINE [=] MergeTree(date-column [, sampling_expression], (primary, key), index_granularity)
``` ```
**MergeTree() parameters** **MergeTree() Parameters**
- `date-column` — The name of a column of the [Date](../../data_types/date.md) type. ClickHouse automatically creates partitions by month based on this column. The partition names are in the `"YYYYMM"` format. - `date-column` — The name of a column of the [Date](../../data_types/date.md) type. ClickHouse automatically creates partitions by month based on this column. The partition names are in the `"YYYYMM"` format.
- `sampling_expression` — An expression for sampling. - `sampling_expression` — An expression for sampling.
@ -312,16 +312,16 @@ Reading from a table is automatically parallelized.
Determines the lifetime of values. Determines the lifetime of values.
The `TTL` clause can be set for the whole table and for each individual column. If `TTL` is set for the whole table, individual `TTL` for columns are ignored. The `TTL` clause can be set for the whole table and for each individual column. If both `TTL` are set, ClickHouse uses that `TTL` which expires earlier.
The table must have the column in the [Date](../../data_types/date.md) or [DateTime](../../data_types/datetime.md) data type. To define the lifetime of data, use operations on this time column, for example:
The table must have the column of the [Date](../../data_types/date.md) or [DateTime](../../data_types/datetime.md) data type. This date column should be used in the `TTL` clause. You can only set lifetime of the data as an interval from the date column value.
``` ```
TTL date_time + interval TTL time_column
TTL time_column + interval
``` ```
You can set the `interval` by any expression, returning the value of the `DateTime` data type. For example, you can use [time interval](../../query_language/operators.md#operators-datetime) operators. To define `interval`, use [time interval](../../query_language/operators.md#operators-datetime) operators.
``` ```
TTL date_time + INTERVAL 1 MONTH TTL date_time + INTERVAL 1 MONTH
@ -330,20 +330,20 @@ TTL date_time + INTERVAL 15 HOUR
**Column TTL** **Column TTL**
When the values in the column expire, ClickHouse replace them with the default values for the column data type. If all the column values in the data part become expired, ClickHouse deletes this column from the data part in a filesystem. When the values in the column expire, ClickHouse replaces them with the default values for the column data type. If all the column values in the data part expire, ClickHouse deletes this column from the data part in a filesystem.
The `TTL` clause cannot be used for key columns. The `TTL` clause can't be used for key columns.
**Table TTL** **Table TTL**
When some data in table expires, ClickHouse deletes all the corresponding rows. When data in a table expires, ClickHouse deletes all corresponding rows.
**Cleaning up of Data** **Removing Data**
Data with expired TTL is removed, when ClickHouse merges data parts. Data with an expired TTL is removed when ClickHouse merges data parts.
When ClickHouse see that some data is expired, it performs off-schedule merge. To control frequency of such merges, you can set [merge_with_ttl_timeout](#mergetree_setting-merge_with_ttl_timeout). If it is too low, many off-schedule merges consume much resources. When ClickHouse see that data is expired, it performs an off-schedule merge. To control the frequency of such merges, you can set [merge_with_ttl_timeout](#mergetree_setting-merge_with_ttl_timeout). If the value is too low, it will perform many off-schedule merges that may consume a lot of resources.
If you perform the `SELECT` query between merges you can get the expired data. To avoid it, use the [OPTIMIZE](../../query_language/misc.md#misc_operations-optimize) query before `SELECT`. If you perform the `SELECT` query between merges, you may get expired data. To avoid it, use the [OPTIMIZE](../../query_language/misc.md#misc_operations-optimize) query before `SELECT`.
[Original article](https://clickhouse.yandex/docs/en/operations/table_engines/mergetree/) <!--hide--> [Original article](https://clickhouse.yandex/docs/en/operations/table_engines/mergetree/) <!--hide-->

View File

@ -44,5 +44,71 @@ Merges the intermediate aggregation states in the same way as the -Merge combina
Converts an aggregate function for tables into an aggregate function for arrays that aggregates the corresponding array items and returns an array of results. For example, `sumForEach` for the arrays `[1, 2]`, `[3, 4, 5]`and`[6, 7]`returns the result `[10, 13, 5]` after adding together the corresponding array items. Converts an aggregate function for tables into an aggregate function for arrays that aggregates the corresponding array items and returns an array of results. For example, `sumForEach` for the arrays `[1, 2]`, `[3, 4, 5]`and`[6, 7]`returns the result `[10, 13, 5]` after adding together the corresponding array items.
## -Resample
Allows to divide data by groups, and then separately aggregates the data in those groups. Groups are created by splitting the values of one of the columns into intervals.
```
<aggFunction>Resample(start, end, step)(<aggFunction_params>, resampling_key)
```
**Parameters**
- `start` — Starting value of the whole required interval for the values of `resampling_key`.
- `stop` — Ending value of the whole required interval for the values of `resampling_key`. The whole interval doesn't include the `stop` value `[start, stop)`.
- `step` — Step for separating the whole interval by subintervals. The `aggFunction` is executed over each of those subintervals independently.
- `resampling_key` — Column, which values are used for separating data by intervals.
- `aggFunction_params` — Parameters of `aggFunction`.
**Returned values**
- Array of `aggFunction` results for each of subintervals.
**Example**
Consider the `people` table with the following data:
```text
┌─name───┬─age─┬─wage─┐
│ John │ 16 │ 10 │
│ Alice │ 30 │ 15 │
│ Mary │ 35 │ 8 │
│ Evelyn │ 48 │ 11.5 │
│ David │ 62 │ 9.9 │
│ Brian │ 60 │ 16 │
└────────┴─────┴──────┘
```
Let's get the names of the persons which age lies in the intervals of `[30,60)` and `[60,75)`. As we use integer representation of age, then there are ages of `[30, 59]` and `[60,74]`.
For aggregating names into the array, we use the aggregate function [groupArray](reference.md#agg_function-grouparray). It takes a single argument. For our case, it is the `name` column. The `groupArrayResample` function should use the `age` column to aggregate names by age. To define required intervals, we pass the `(30, 75, 30)` arguments into the `groupArrayResample` function.
```sql
SELECT groupArrayResample(30, 75, 30)(name, age) from people
```
```text
┌─groupArrayResample(30, 75, 30)(name, age)─────┐
│ [['Alice','Mary','Evelyn'],['David','Brian']] │
└───────────────────────────────────────────────┘
```
Consider the results.
`Jonh` is out of the sample because he is too young. Other people are distributed according to the specified age intervals.
Now, let's count the total number of people and their average wage in the specified age intervals.
```sql
SELECT
countResample(30, 75, 30)(name, age) AS amount,
avgResample(30, 75, 30)(wage, age) AS avg_wage
FROM people
```
```text
┌─amount─┬─avg_wage──────────────────┐
│ [3,2] │ [11.5,12.949999809265137] │
└────────┴───────────────────────────┘
```
[Original article](https://clickhouse.yandex/docs/en/query_language/agg_functions/combinators/) <!--hide--> [Original article](https://clickhouse.yandex/docs/en/query_language/agg_functions/combinators/) <!--hide-->

View File

@ -650,7 +650,7 @@ The function takes a variable number of parameters. Parameters can be `Tuple`, `
- [uniqHLL12](#agg_function-uniqhll12) - [uniqHLL12](#agg_function-uniqhll12)
## groupArray(x), groupArray(max_size)(x) ## groupArray(x), groupArray(max_size)(x) {#agg_function-grouparray}
Creates an array of argument values. Creates an array of argument values.
Values can be added to the array in any (indeterminate) order. Values can be added to the array in any (indeterminate) order.

View File

@ -10,10 +10,10 @@ CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] [ENGINE = engine(..
- `IF NOT EXISTS` - `IF NOT EXISTS`
If the `db_name` database already exists then: If the `db_name` database already exists, then ClickHouse doesn't create a new database and:
- If clause is specified, ClickHouse doesn't create a new database and doesn't throw an exception. - Doesn't throw an exception if clause is specified.
- If clause is not specified, then ClickHouse doesn't create a new database and throw and exception. - Throws an exception if clause isn't specified.
- `ON CLUSTER` - `ON CLUSTER`
@ -23,7 +23,7 @@ CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] [ENGINE = engine(..
- [MySQL](../database_engines/mysql.md) - [MySQL](../database_engines/mysql.md)
Allows to retrieve data from the remote MySQL server. Allows you to retrieve data from the remote MySQL server.
By default, ClickHouse uses its own [database engine](../database_engines/index.md). By default, ClickHouse uses its own [database engine](../database_engines/index.md).

View File

@ -102,6 +102,9 @@ Sleeps 'seconds' seconds on each row. You can specify an integer or a floating-p
Returns the name of the current database. Returns the name of the current database.
You can use this function in table engine parameters in a CREATE TABLE query where you need to specify the database. You can use this function in table engine parameters in a CREATE TABLE query where you need to specify the database.
## currentUser()
Returns the login of authorized user (initiator of query execution).
## isFinite(x) ## isFinite(x)
Accepts Float32 and Float64 and returns UInt8 equal to 1 if the argument is not infinite and not a NaN, otherwise 0. Accepts Float32 and Float64 and returns UInt8 equal to 1 if the argument is not infinite and not a NaN, otherwise 0.

View File

@ -1,9 +1,10 @@
# Database Engines # Движки баз данных
Database engines provide working with tables. Движки баз данных обеспечивают работу с таблицами.
By default, ClickHouse uses its native database engine which provides configurable [table engines](../operations/table_engines/index.md) and [SQL dialect](../query_language/syntax.md). По умолчанию ClickHouse использует собственный движок баз данных, который поддерживает конфигурируемые [движки таблиц](../operations/table_engines/index.md) и [диалект SQL](../query_language/syntax.md).
Also you can use the following database engines: Также можно использовать следующие движки баз данных:
- [MySQL](mysql.md) - [MySQL](mysql.md)

View File

@ -1,10 +1,10 @@
# MySQL # MySQL
Allows to connect to some database on remote MySQL server and perform `INSERT` and `SELECT` queries with tables to exchange data between ClickHouse and MySQL. Позволяет подключаться к базам данных на удалённом MySQL сервере и выполнять запросы `INSERT` и `SELECT` для обмена данными между ClickHouse и MySQL.
The `MySQL` database engine translate queries to the MySQL server, so you can perform operations such as `SHOW TABLES` or `SHOW CREATE TABLE`. Движок баз данных `MySQL` транслирует запросы при передаче на сервер MySQL, что позволяет выполнять и другие виды запросов, например `SHOW TABLES` или `SHOW CREATE TABLE`.
You cannot perform with tables the following queries: Не поддерживаемые виды запросов:
- `ATTACH`/`DETACH` - `ATTACH`/`DETACH`
- `DROP` - `DROP`
@ -12,48 +12,45 @@ You cannot perform with tables the following queries:
- `CREATE TABLE` - `CREATE TABLE`
- `ALTER` - `ALTER`
## Создание базы данных
## Creating a Database ```sql
``` sql
CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster]
ENGINE = MySQL('host:port', 'database', 'user', 'password') ENGINE = MySQL('host:port', 'database', 'user', 'password')
``` ```
**Engine Parameters** **Параметры движка**
- `host:port` — MySQL server address. - `host:port`адрес сервера MySQL.
- `database`Remote database name. - `database`имя базы данных на удалённом сервере.
- `user` — MySQL user. - `user`пользователь MySQL.
- `password`User password. - `password`пароль пользователя.
## Поддержка типов данных
## Data Types Support | MySQL | ClickHouse |
| ------ | ------------ |
| UNSIGNED TINYINT | [UInt8](../data_types/int_uint.md) |
| TINYINT | [Int8](../data_types/int_uint.md) |
| UNSIGNED SMALLINT | [UInt16](../data_types/int_uint.md) |
| SMALLINT | [Int16](../data_types/int_uint.md) |
| UNSIGNED INT, UNSIGNED MEDIUMINT | [UInt32](../data_types/int_uint.md) |
| INT, MEDIUMINT | [Int32](../data_types/int_uint.md) |
| UNSIGNED BIGINT | [UInt64](../data_types/int_uint.md) |
| BIGINT | [Int64](../data_types/int_uint.md) |
| FLOAT | [Float32](../data_types/float.md) |
| DOUBLE | [Float64](../data_types/float.md) |
| DATE | [Date](../data_types/date.md) |
| DATETIME, TIMESTAMP | [DateTime](../data_types/datetime.md) |
| BINARY | [FixedString](../data_types/fixedstring.md) |
MySQL | ClickHouse Все прочие типы данных преобразуются в [String](../data_types/string.md).
------|------------
UNSIGNED TINYINT | [UInt8](../data_types/int_uint.md)
TINYINT | [Int8](../data_types/int_uint.md)
UNSIGNED SMALLINT | [UInt16](../data_types/int_uint.md)
SMALLINT | [Int16](../data_types/int_uint.md)
UNSIGNED INT, UNSIGNED MEDIUMINT | [UInt32](../data_types/int_uint.md)
INT, MEDIUMINT | [Int32](../data_types/int_uint.md)
UNSIGNED BIGINT | [UInt64](../data_types/int_uint.md)
BIGINT | [Int64](../data_types/int_uint.md)
FLOAT | [Float32](../data_types/float.md)
DOUBLE | [Float64](../data_types/float.md)
DATE | [Date](../data_types/date.md)
DATETIME, TIMESTAMP | [DateTime](../data_types/datetime.md)
BINARY | [FixedString](../data_types/fixedstring.md)
All other MySQL data types are converted into [String](../data_types/string.md). [Nullable](../data_types/nullable.md) поддержан.
[Nullable](../data_types/nullable.md) data type is supported. ## Примеры использования
Таблица в MySQL:
## Examples of Use
Table in MySQL:
``` ```
mysql> USE test; mysql> USE test;
@ -77,14 +74,16 @@ mysql> select * from mysql_table;
1 row in set (0,00 sec) 1 row in set (0,00 sec)
``` ```
Database in ClickHouse, exchanging data with the MySQL server: База данных в ClickHouse, позволяющая обмениваться данными с сервером MySQL:
```sql ```sql
CREATE DATABASE mysql_db ENGINE = MySQL('localhost:3306', 'test', 'my_user', 'user_password') CREATE DATABASE mysql_db ENGINE = MySQL('localhost:3306', 'test', 'my_user', 'user_password')
``` ```
```sql ```sql
SHOW DATABASES SHOW DATABASES
``` ```
```text ```text
┌─name─────┐ ┌─name─────┐
│ default │ │ default │
@ -92,31 +91,39 @@ SHOW DATABASES
│ system │ │ system │
└──────────┘ └──────────┘
``` ```
```sql ```sql
SHOW TABLES FROM mysql_db SHOW TABLES FROM mysql_db
``` ```
```text ```text
┌─name─────────┐ ┌─name─────────┐
│ mysql_table │ │ mysql_table │
└──────────────┘ └──────────────┘
``` ```
```sql ```sql
SELECT * FROM mysql_db.mysql_table SELECT * FROM mysql_db.mysql_table
``` ```
```text ```text
┌─int_id─┬─value─┐ ┌─int_id─┬─value─┐
│ 1 │ 2 │ │ 1 │ 2 │
└────────┴───────┘ └────────┴───────┘
``` ```
```sql ```sql
INSERT INTO mysql_db.mysql_table VALUES (3,4) INSERT INTO mysql_db.mysql_table VALUES (3,4)
``` ```
```sql ```sql
SELECT * FROM mysql_db.mysql_table SELECT * FROM mysql_db.mysql_table
``` ```
```text ```text
┌─int_id─┬─value─┐ ┌─int_id─┬─value─┐
│ 1 │ 2 │ │ 1 │ 2 │
│ 3 │ 4 │ │ 3 │ 4 │
└────────┴───────┘ └────────┴───────┘
``` ```

View File

@ -40,14 +40,15 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
[ORDER BY expr] [ORDER BY expr]
[PRIMARY KEY expr] [PRIMARY KEY expr]
[SAMPLE BY expr] [SAMPLE BY expr]
[TTL expr]
[SETTINGS name=value, ...] [SETTINGS name=value, ...]
``` ```
Описание параметров запроса смотрите в [описании запроса](../../query_language/create.md). Описание параметров запроса смотрите в [описании запроса](../../query_language/create.md).
**Секции запроса** ### Секции запроса
- `ENGINE`Имя и параметры движка. `ENGINE = MergeTree()`. `MergeTree` не имеет параметров. - `ENGINE`имя и параметры движка. `ENGINE = MergeTree()`. `MergeTree` не имеет параметров.
- `PARTITION BY` — [ключ партиционирования](custom_partitioning_key.md). - `PARTITION BY` — [ключ партиционирования](custom_partitioning_key.md).
@ -66,22 +67,22 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
Если используется выражение для сэмплирования, то первичный ключ должен содержать его. Пример: Если используется выражение для сэмплирования, то первичный ключ должен содержать его. Пример:
`SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`. `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`.
- `TTL` - выражение для задания времени хранения строк. - `TTL` — выражение, определяющее длительность хранения строк.
Оно должно зависеть от стобца типа `Date` или `DateTime` и в качестве результата вычислять столбец типа `Date` или `DateTime`. Пример: Должно зависеть от столбца `Date` или `DateTime` и возвращать столбец `Date` или `DateTime`. Пример:`TTL date + INTERVAL 1 DAY`
`TTL date + INTERVAL 1 DAY`
Подробнее смотрите в [TTL для стоблцов и таблиц](mergetree.md) Дополнительные сведения смотрите в разделе [TTL для столбцов и таблиц](mergetree.md)
- `SETTINGS` — дополнительные параметры, регулирующие поведение `MergeTree`: - `SETTINGS` — дополнительные параметры, регулирующие поведение `MergeTree`:
- `index_granularity` — гранулярность индекса. Число строк данных между «засечками» индекса. По умолчанию — 8192. Список всех доступных параметров можно посмотреть в [MergeTreeSettings.h](https://github.com/yandex/ClickHouse/blob/master/dbms/src/Storages/MergeTree/MergeTreeSettings.h). - `index_granularity` — гранулярность индекса. Число строк данных между «засечками» индекса. По умолчанию — 8192. Список всех доступных параметров можно посмотреть в [MergeTreeSettings.h](https://github.com/yandex/ClickHouse/blob/master/dbms/src/Storages/MergeTree/MergeTreeSettings.h).
- `min_merge_bytes_to_use_direct_io` — минимальный объем данных, необходимый для прямого (небуферизованного) чтения/записи (direct I/O) на диск. При слиянии частей данных ClickHouse вычисляет общий объем хранения всех данных, подлежащих слиянию. Если общий объем хранения всех данных для чтения превышает `min_bytes_to_use_direct_io` байт, тогда ClickHouse использует флаг `O_DIRECT` при чтении данных с диска. Если `min_merge_bytes_to_use_direct_io = 0`, тогда прямой ввод-вывод отключен. Значение по умолчанию: `10 * 1024 * 1024 * 1024` байт. - `min_merge_bytes_to_use_direct_io` — минимальный объем данных, необходимый для прямого (небуферизованного) чтения/записи (direct I/O) на диск. При слиянии частей данных ClickHouse вычисляет общий объем хранения всех данных, подлежащих слиянию. Если общий объем хранения всех данных для чтения превышает `min_bytes_to_use_direct_io` байт, тогда ClickHouse использует флаг `O_DIRECT` при чтении данных с диска. Если `min_merge_bytes_to_use_direct_io = 0`, тогда прямой ввод-вывод отключен. Значение по умолчанию: `10 * 1024 * 1024 * 1024` байт.
<a name="mergetree_setting-merge_with_ttl_timeout"></a>
- `merge_with_ttl_timeout` - Минимальное время в секундах для повторного выполнения слияний с TTL. По умолчанию - 86400 (1 день). - `merge_with_ttl_timeout` - Минимальное время в секундах для повторного выполнения слияний с TTL. По умолчанию - 86400 (1 день).
**Пример задания секций** **Пример задания секций**
``` ```sql
ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity=8192 ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity=8192
``` ```
@ -109,7 +110,8 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
- `date-column` — имя столбца с типом [Date](../../data_types/date.md). На основе этого столбца ClickHouse автоматически создаёт партиции по месяцам. Имена партиций имеют формат `"YYYYMM"`. - `date-column` — имя столбца с типом [Date](../../data_types/date.md). На основе этого столбца ClickHouse автоматически создаёт партиции по месяцам. Имена партиций имеют формат `"YYYYMM"`.
- `sampling_expression` — выражение для сэмплирования. - `sampling_expression` — выражение для сэмплирования.
- `(primary, key)` — первичный ключ. Тип — [Tuple()](../../data_types/tuple.md- `index_granularity` — гранулярность индекса. Число строк данных между «засечками» индекса. Для большинства задач подходит значение 8192. - `(primary, key)` — первичный ключ. Тип — [Tuple()](../../data_types/tuple.md)
- `index_granularity` — гранулярность индекса. Число строк данных между «засечками» индекса. Для большинства задач подходит значение 8192.
**Пример** **Пример**
@ -118,6 +120,7 @@ MergeTree(EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID)
``` ```
Движок `MergeTree` сконфигурирован таким же образом, как и в примере выше для основного способа конфигурирования движка. Движок `MergeTree` сконфигурирован таким же образом, как и в примере выше для основного способа конфигурирования движка.
</details> </details>
## Хранение данных ## Хранение данных
@ -298,13 +301,42 @@ INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARIT
Чтения из таблицы автоматически распараллеливаются. Чтения из таблицы автоматически распараллеливаются.
## TTL для столбцов и таблиц {#table_engine-mergetree-ttl}
## TTL для столбцов и таблиц Определяет время жизни значений.
Данные с истекшим TTL удаляются во время слияний. Секция `TTL` может быть установлена как для всей таблицы, так и для каждого отдельного столбца. Если установлены оба `TTL`, то ClickHouse использует тот, что истекает раньше.
Если TTL указан для столбца, то когда он истекает, значение заменяется на значение по умолчанию. Если все значения столбца обнулены в куске, то данные этого столбца удаляются с диска в куске. Если TTL указан для таблицы, то когда он истекает, удаляется строка. Таблица должна иметь столбец типа [Date](../../data_types/date.md) или [DateTime](../../data_types/datetime.md). Для установки времени жизни данных, следует использовать операцию со столбцом с временем, например:
Когда истекает TTL для какого-нибудь значения или строки в куске, назначается внеочередное слияние. Чтобы контролировать частоту слияний с TTL, вы можете задать настройку `merge_with_ttl_timeout`. Если ее значение слишком мало, то будет происходить слишком много внеочередных слияний и мало обычных, вследствие чего может ухудшиться производительность. ```
TTL time_column
TTL time_column + interval
```
Чтобы задать `interval`, используйте операторы [интервала времени](../../query_language/operators.md#operators-datetime).
```
TTL date_time + INTERVAL 1 MONTH
TTL date_time + INTERVAL 15 HOUR
```
**TTL столбца**
Когда срок действия значений в столбце истечет, ClickHouse заменит их значениями по умолчанию для типа данных столбца. Если срок действия всех значений столбцов в части данных истек, ClickHouse удаляет столбец из куска данных в файловой системе.
Секцию `TTL` нельзя использовать для ключевых столбцов.
**TTL таблицы**
Когда некоторые данные в таблице устаревают, ClickHouse удаляет все соответствующие строки.
**Удаление данных**
Данные с истекшим TTL удаляются, когда ClickHouse мёржит куски данных.
Когда ClickHouse видит, что некоторые данные устарели, он выполняет внеплановые мёржи. Для управление частотой подобных мёржей, можно задать настройку [merge_with_ttl_timeout](#mergetree_setting-merge_with_ttl_timeout). Если её значение слишком низкое, придется выполнять много внеплановых мёржей, которые могут начать потреблять значительную долю ресурсов сервера.
Если вы выполните запрос `SELECT` между слияниями вы можете получить устаревшие данные. Чтобы избежать этого используйте запрос [OPTIMIZE](../../query_language/misc.md#misc_operations-optimize) перед `SELECT`.
[Оригинальная статья](https://clickhouse.yandex/docs/ru/operations/table_engines/mergetree/) <!--hide--> [Оригинальная статья](https://clickhouse.yandex/docs/ru/operations/table_engines/mergetree/) <!--hide-->

View File

@ -1,13 +1,30 @@
## CREATE DATABASE ## CREATE DATABASE {#query_language-create-database}
Создание базы данных db\_name. Создает базу данных.
```sql ```sql
CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] [ENGINE = engine(...)]
``` ```
`База данных` - это просто директория для таблиц. ### Секции
Если написано `IF NOT EXISTS`, то запрос не будет возвращать ошибку, если база данных уже существует.
- `IF NOT EXISTS`
Если база данных с именем `db_name` уже существует, то ClickHouse не создаёт базу данных и:
- Не генерирует исключение, если секция указана.
- Генерирует исключение, если секция не указана.
- `ON CLUSTER`
ClickHouse создаёт базу данных `db_name` на всех серверах указанного кластера.
- `ENGINE`
- [MySQL](../database_engines/mysql.md)
Позволяет получать данные с удаленного сервера MySQL.
По умолчанию ClickHouse использует собственный [движок баз данных](../database_engines/index.md).
## CREATE TABLE {#create-table-query} ## CREATE TABLE {#create-table-query}
@ -48,7 +65,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ...
Во всех случаях, если указано `IF NOT EXISTS`, то запрос не будет возвращать ошибку, если таблица уже существует. В этом случае, запрос будет ничего не делать. Во всех случаях, если указано `IF NOT EXISTS`, то запрос не будет возвращать ошибку, если таблица уже существует. В этом случае, запрос будет ничего не делать.
После секции `ENGINE` в запросе могут использоваться и другие секции в зависимости от движка. Подробную документацию по созданию таблиц смотрите в описаниях [движков](../operations/table_engines/index.md#table_engines). После секции `ENGINE` в запросе могут использоваться и другие секции в зависимости от движка. Подробную документацию по созданию таблиц смотрите в описаниях [движков таблиц](../operations/table_engines/index.md#table_engines).
### Значения по умолчанию {#create-default-values} ### Значения по умолчанию {#create-default-values}
@ -88,11 +105,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ...
### Выражение для TTL ### Выражение для TTL
Может быть указано только для таблиц семейства MergeTree. Выражение для указания времени хранения значений. Оно должно зависеть от стобца типа `Date` или `DateTime` и в качестве результата вычислять столбец типа `Date` или `DateTime`. Пример: Определяет время хранения значений. Может быть указано только для таблиц семейства MergeTree. Подробнее смотрите в [TTL для столбцов и таблиц](../operations/table_engines/mergetree.md#table_engine-mergetree-ttl).
`TTL date + INTERVAL 1 DAY`
Нельзя указывать TTL для ключевых столбцов. Подробнее смотрите в [TTL для стоблцов и таблиц](../operations/table_engines/mergetree.md)
## Форматы сжатия для колонок ## Форматы сжатия для колонок

View File

@ -97,6 +97,9 @@ SELECT visibleWidth(NULL)
Возвращает имя текущей базы данных. Возвращает имя текущей базы данных.
Эта функция может использоваться в параметрах движка таблицы в запросе CREATE TABLE там, где нужно указать базу данных. Эта функция может использоваться в параметрах движка таблицы в запросе CREATE TABLE там, где нужно указать базу данных.
## currentUser()
Возвращает логин пользователя, от имени которого исполняется запрос. В случае распределённого запроса, возвращается имя пользователя - инициатора запроса.
## isFinite(x) ## isFinite(x)
Принимает Float32 или Float64 и возвращает UInt8, равный 1, если аргумент не бесконечный и не NaN, иначе 0. Принимает Float32 или Float64 и возвращает UInt8, равный 1, если аргумент не бесконечный и не NaN, иначе 0.

View File

@ -65,13 +65,13 @@
`a GLOBAL NOT IN ...` - функция `globalNotIn(a, b)` `a GLOBAL NOT IN ...` - функция `globalNotIn(a, b)`
## Оператор для работы с датами и временем ## Оператор для работы с датами и временем {#operators-datetime}
``` sql ``` sql
EXTRACT(part FROM date); EXTRACT(part FROM date);
``` ```
Позволяет извлечь отдельные части из переданной даты. Например, можно получить месяц из даты, или минуты из времени. Позволяет извлечь отдельные части из переданной даты. Например, можно получить месяц из даты, или минуты из времени.
В параметре `part` указывается, какой фрагмент даты нужно получить. Доступные значения: В параметре `part` указывается, какой фрагмент даты нужно получить. Доступные значения:
@ -99,8 +99,8 @@ SELECT EXTRACT(YEAR FROM toDate('2017-06-15'));
``` sql ``` sql
CREATE TABLE test.Orders CREATE TABLE test.Orders
( (
OrderId UInt64, OrderId UInt64,
OrderName String, OrderName String,
OrderDate DateTime OrderDate DateTime
) )
ENGINE = Log; ENGINE = Log;
@ -110,11 +110,11 @@ ENGINE = Log;
INSERT INTO test.Orders VALUES (1, 'Jarlsberg Cheese', toDateTime('2008-10-11 13:23:44')); INSERT INTO test.Orders VALUES (1, 'Jarlsberg Cheese', toDateTime('2008-10-11 13:23:44'));
``` ```
``` sql ``` sql
SELECT SELECT
toYear(OrderDate) AS OrderYear, toYear(OrderDate) AS OrderYear,
toMonth(OrderDate) AS OrderMonth, toMonth(OrderDate) AS OrderMonth,
toDayOfMonth(OrderDate) AS OrderDay, toDayOfMonth(OrderDate) AS OrderDay,
toHour(OrderDate) AS OrderHour, toHour(OrderDate) AS OrderHour,
toMinute(OrderDate) AS OrderMinute, toMinute(OrderDate) AS OrderMinute,
toSecond(OrderDate) AS OrderSecond toSecond(OrderDate) AS OrderSecond
FROM test.Orders; FROM test.Orders;

View File

@ -47,26 +47,26 @@ nav:
- 'AggregateFunction(name, types_of_arguments...)': 'data_types/nested_data_structures/aggregatefunction.md' - 'AggregateFunction(name, types_of_arguments...)': 'data_types/nested_data_structures/aggregatefunction.md'
- 'Tuple(T1, T2, ...)': 'data_types/tuple.md' - 'Tuple(T1, T2, ...)': 'data_types/tuple.md'
- 'Nullable': 'data_types/nullable.md' - 'Nullable': 'data_types/nullable.md'
- 'Nested data structures': - '嵌套数据结构':
- 'hidden': 'data_types/nested_data_structures/index.md' - 'hidden': 'data_types/nested_data_structures/index.md'
- 'Nested(Name1 Type1, Name2 Type2, ...)': 'data_types/nested_data_structures/nested.md' - 'Nested(Name1 Type1, Name2 Type2, ...)': 'data_types/nested_data_structures/nested.md'
- 'Special data types': - '特殊数据类型':
- 'hidden': 'data_types/special_data_types/index.md' - 'hidden': 'data_types/special_data_types/index.md'
- 'Expression': 'data_types/special_data_types/expression.md' - 'Expression': 'data_types/special_data_types/expression.md'
- 'Set': 'data_types/special_data_types/set.md' - 'Set': 'data_types/special_data_types/set.md'
- 'Nothing': 'data_types/special_data_types/nothing.md' - 'Nothing': 'data_types/special_data_types/nothing.md'
- 'Domains': - 'Domain类型':
- 'Overview': 'data_types/domains/overview.md' - '介绍': 'data_types/domains/overview.md'
- 'IPv4': 'data_types/domains/ipv4.md' - 'IPv4': 'data_types/domains/ipv4.md'
- 'IPv6': 'data_types/domains/ipv6.md' - 'IPv6': 'data_types/domains/ipv6.md'
- 'Database Engines': - '数据库引擎':
- 'Introduction': 'database_engines/index.md' - '介绍': 'database_engines/index.md'
- 'MySQL': 'database_engines/mysql.md' - 'MySQL': 'database_engines/mysql.md'
- 'Table Engines': - '表引擎':
- 'Introduction': 'operations/table_engines/index.md' - '介绍': 'operations/table_engines/index.md'
- 'MergeTree Family': - 'MergeTree':
- 'MergeTree': 'operations/table_engines/mergetree.md' - 'MergeTree': 'operations/table_engines/mergetree.md'
- 'Data Replication': 'operations/table_engines/replication.md' - 'Data Replication': 'operations/table_engines/replication.md'
- 'Custom Partitioning Key': 'operations/table_engines/custom_partitioning_key.md' - 'Custom Partitioning Key': 'operations/table_engines/custom_partitioning_key.md'
@ -76,17 +76,17 @@ nav:
- 'CollapsingMergeTree': 'operations/table_engines/collapsingmergetree.md' - 'CollapsingMergeTree': 'operations/table_engines/collapsingmergetree.md'
- 'VersionedCollapsingMergeTree': 'operations/table_engines/versionedcollapsingmergetree.md' - 'VersionedCollapsingMergeTree': 'operations/table_engines/versionedcollapsingmergetree.md'
- 'GraphiteMergeTree': 'operations/table_engines/graphitemergetree.md' - 'GraphiteMergeTree': 'operations/table_engines/graphitemergetree.md'
- 'Log Family': - 'Log':
- 'Introduction': 'operations/table_engines/log_family.md' - '介绍': 'operations/table_engines/log_family.md'
- 'StripeLog': 'operations/table_engines/stripelog.md' - 'StripeLog': 'operations/table_engines/stripelog.md'
- 'Log': 'operations/table_engines/log.md' - 'Log': 'operations/table_engines/log.md'
- 'TinyLog': 'operations/table_engines/tinylog.md' - 'TinyLog': 'operations/table_engines/tinylog.md'
- 'Integrations': - '外部表引擎':
- 'Kafka': 'operations/table_engines/kafka.md' - 'Kafka': 'operations/table_engines/kafka.md'
- 'MySQL': 'operations/table_engines/mysql.md' - 'MySQL': 'operations/table_engines/mysql.md'
- 'JDBC': 'operations/table_engines/jdbc.md' - 'JDBC': 'operations/table_engines/jdbc.md'
- 'ODBC': 'operations/table_engines/odbc.md' - 'ODBC': 'operations/table_engines/odbc.md'
- 'Special': - '其他表引擎':
- 'Distributed': 'operations/table_engines/distributed.md' - 'Distributed': 'operations/table_engines/distributed.md'
- 'External data': 'operations/table_engines/external_data.md' - 'External data': 'operations/table_engines/external_data.md'
- 'Dictionary': 'operations/table_engines/dictionary.md' - 'Dictionary': 'operations/table_engines/dictionary.md'
@ -108,8 +108,8 @@ nav:
- 'CREATE': 'query_language/create.md' - 'CREATE': 'query_language/create.md'
- 'ALTER': 'query_language/alter.md' - 'ALTER': 'query_language/alter.md'
- 'SYSTEM': 'query_language/system.md' - 'SYSTEM': 'query_language/system.md'
- 'Other kinds of queries': 'query_language/misc.md' - '其他类型的查询': 'query_language/misc.md'
- 'Functions': - '函数':
- '介绍': 'query_language/functions/index.md' - '介绍': 'query_language/functions/index.md'
- '算术函数': 'query_language/functions/arithmetic_functions.md' - '算术函数': 'query_language/functions/arithmetic_functions.md'
- '比较函数': 'query_language/functions/comparison_functions.md' - '比较函数': 'query_language/functions/comparison_functions.md'
@ -142,13 +142,13 @@ nav:
- 'Nullable处理函数': 'query_language/functions/functions_for_nulls.md' - 'Nullable处理函数': 'query_language/functions/functions_for_nulls.md'
- '机器学习函数': 'query_language/functions/machine_learning_functions.md' - '机器学习函数': 'query_language/functions/machine_learning_functions.md'
- '其他函数': 'query_language/functions/other_functions.md' - '其他函数': 'query_language/functions/other_functions.md'
- 'Aggregate functions': - '聚合函数':
- 'Introduction': 'query_language/agg_functions/index.md' - '介绍': 'query_language/agg_functions/index.md'
- 'Function reference': 'query_language/agg_functions/reference.md' - '函数列表': 'query_language/agg_functions/reference.md'
- 'Aggregate function combinators': 'query_language/agg_functions/combinators.md' - '聚合函数组合子': 'query_language/agg_functions/combinators.md'
- 'Parametric aggregate functions': 'query_language/agg_functions/parametric_functions.md' - '参数化聚合函数': 'query_language/agg_functions/parametric_functions.md'
- 'Table functions': - '表引擎函数':
- 'Introduction': 'query_language/table_functions/index.md' - '介绍': 'query_language/table_functions/index.md'
- 'file': 'query_language/table_functions/file.md' - 'file': 'query_language/table_functions/file.md'
- 'merge': 'query_language/table_functions/merge.md' - 'merge': 'query_language/table_functions/merge.md'
- 'numbers': 'query_language/table_functions/numbers.md' - 'numbers': 'query_language/table_functions/numbers.md'
@ -157,44 +157,44 @@ nav:
- 'mysql': 'query_language/table_functions/mysql.md' - 'mysql': 'query_language/table_functions/mysql.md'
- 'jdbc': 'query_language/table_functions/jdbc.md' - 'jdbc': 'query_language/table_functions/jdbc.md'
- 'odbc': 'query_language/table_functions/odbc.md' - 'odbc': 'query_language/table_functions/odbc.md'
- 'Dictionaries': - '字典':
- 'Introduction': 'query_language/dicts/index.md' - '介绍': 'query_language/dicts/index.md'
- 'External dictionaries': - '外部字典':
- 'General description': 'query_language/dicts/external_dicts.md' - '介绍': 'query_language/dicts/external_dicts.md'
- 'Configuring an external dictionary': 'query_language/dicts/external_dicts_dict.md' - '配置外部字典': 'query_language/dicts/external_dicts_dict.md'
- 'Storing dictionaries in memory': 'query_language/dicts/external_dicts_dict_layout.md' - '字典的内存布局': 'query_language/dicts/external_dicts_dict_layout.md'
- 'Dictionary updates': 'query_language/dicts/external_dicts_dict_lifetime.md' - '字典的刷新策略': 'query_language/dicts/external_dicts_dict_lifetime.md'
- 'Sources of external dictionaries': 'query_language/dicts/external_dicts_dict_sources.md' - '字典的外部数据源': 'query_language/dicts/external_dicts_dict_sources.md'
- 'Dictionary key and fields': 'query_language/dicts/external_dicts_dict_structure.md' - '字典的键和字段值': 'query_language/dicts/external_dicts_dict_structure.md'
- 'Internal dictionaries': 'query_language/dicts/internal_dicts.md' - '内部字典': 'query_language/dicts/internal_dicts.md'
- 'Operators': 'query_language/operators.md' - '操作符': 'query_language/operators.md'
- 'General syntax': 'query_language/syntax.md' - '语法说明': 'query_language/syntax.md'
- '运维': - '运维':
- 'Introduction': 'operations/index.md' - '介绍': 'operations/index.md'
- 'Requirements': 'operations/requirements.md' - '环境要求': 'operations/requirements.md'
- 'Monitoring': 'operations/monitoring.md' - '监控': 'operations/monitoring.md'
- 'Troubleshooting': 'operations/troubleshooting.md' - '故障排查': 'operations/troubleshooting.md'
- 'Usage recommendations': 'operations/tips.md' - '使用建议': 'operations/tips.md'
- 'ClickHouse Update': 'operations/update.md' - '版本升级': 'operations/update.md'
- 'Access rights': 'operations/access_rights.md' - '访问权限控制': 'operations/access_rights.md'
- 'Data backup': 'operations/backup.md' - '数据备份': 'operations/backup.md'
- 'Configuration files': 'operations/configuration_files.md' - '配置文件': 'operations/configuration_files.md'
- 'Quotas': 'operations/quotas.md' - '配额': 'operations/quotas.md'
- 'System tables': 'operations/system_tables.md' - '系统表': 'operations/system_tables.md'
- 'Server configuration parameters': - 'Server参数配置':
- 'Introduction': 'operations/server_settings/index.md' - '介绍': 'operations/server_settings/index.md'
- 'Server settings': 'operations/server_settings/settings.md' - 'Server参数说明': 'operations/server_settings/settings.md'
- 'Settings': - 'Settings配置':
- 'Introduction': 'operations/settings/index.md' - '介绍': 'operations/settings/index.md'
- 'Permissions for queries': 'operations/settings/permissions_for_queries.md' - '查询权限管理': 'operations/settings/permissions_for_queries.md'
- 'Restrictions on query complexity': 'operations/settings/query_complexity.md' - '查询复杂性的限制': 'operations/settings/query_complexity.md'
- 'Settings': 'operations/settings/settings.md' - 'Setting列表': 'operations/settings/settings.md'
- 'Settings profiles': 'operations/settings/settings_profiles.md' - 'Setting配置组': 'operations/settings/settings_profiles.md'
- 'User Settings': 'operations/settings/settings_users.md' - '用户配置': 'operations/settings/settings_users.md'
- 'Constraints on Settings': 'operations/settings/constraints_on_settings.md' - 'Settings的约束': 'operations/settings/constraints_on_settings.md'
- 'Utilities': - '常用工具':
- 'Overview': 'operations/utils/index.md' - '介绍': 'operations/utils/index.md'
- 'clickhouse-copier': 'operations/utils/clickhouse-copier.md' - 'clickhouse-copier': 'operations/utils/clickhouse-copier.md'
- 'clickhouse-local': 'operations/utils/clickhouse-local.md' - 'clickhouse-local': 'operations/utils/clickhouse-local.md'