Merge branch 'master' into nulls

This commit is contained in:
chertus 2019-03-26 23:04:39 +03:00
commit 274916d944
63 changed files with 619 additions and 521 deletions

View File

@ -1,7 +1,33 @@
if (HAVE_SSSE3)
option (ENABLE_HYPERSCAN "Enable hyperscan" ON)
endif ()
if (ENABLE_HYPERSCAN)
option (USE_INTERNAL_HYPERSCAN_LIBRARY "Set to FALSE to use system hyperscan instead of the bundled" ${NOT_UNBUNDLED})
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/hyperscan/CMakeLists.txt")
if (USE_INTERNAL_HYPERSCAN_LIBRARY)
message (WARNING "submodule contrib/hyperscan is missing. to fix try run: \n git submodule update --init --recursive")
endif ()
set (MISSING_INTERNAL_HYPERSCAN_LIBRARY 1)
set (USE_INTERNAL_HYPERSCAN_LIBRARY 0)
endif ()
if (NOT USE_INTERNAL_HYPERSCAN_LIBRARY)
find_library (HYPERSCAN_LIBRARY hs)
find_path (HYPERSCAN_INCLUDE_DIR NAMES hs/hs.h hs.h PATHS ${HYPERSCAN_INCLUDE_PATHS})
endif ()
if (HYPERSCAN_LIBRARY AND HYPERSCAN_INCLUDE_DIR)
set (USE_HYPERSCAN 1)
elseif (NOT MISSING_INTERNAL_HYPERSCAN_LIBRARY)
set (HYPERSCAN_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/hyperscan/src)
set (HYPERSCAN_LIBRARY hs)
set (USE_HYPERSCAN 1)
set (USE_INTERNAL_HYPERSCAN_LIBRARY 1)
message (STATUS "Using hyperscan: ${HYPERSCAN_INCLUDE_DIR} " : ${HYPERSCAN_LIBRARY})
endif()
message (STATUS "Using hyperscan=${USE_HYPERSCAN}: ${HYPERSCAN_INCLUDE_DIR} : ${HYPERSCAN_LIBRARY}")
endif ()

View File

@ -305,6 +305,6 @@ if (USE_BASE64)
add_subdirectory (base64-cmake)
endif()
if (USE_HYPERSCAN)
if (USE_INTERNAL_HYPERSCAN_LIBRARY)
add_subdirectory (hyperscan)
endif()

2
contrib/boost vendored

@ -1 +1 @@
Subproject commit 32abf16beb7bb8b243a4d100ccdd6acb271738c4
Subproject commit 471ea208abb92a5cba7d3a08a819bb728f27e95f

2
contrib/librdkafka vendored

@ -1 +1 @@
Subproject commit 73295a702cd1c85c11749ade500d713db7099cca
Subproject commit 8695b9d63ac0fe1b891b511d5b36302ffc84d4e2

View File

@ -42,6 +42,7 @@
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <IO/Operators.h>
#include <IO/UseSSL.h>
#include <DataStreams/AsynchronousBlockInputStream.h>
#include <DataStreams/AddingDefaultsBlockInputStream.h>
@ -1314,6 +1315,9 @@ private:
/// Received data block is immediately displayed to the user.
block_out_stream->flush();
/// Restore progress bar after data block.
writeProgress();
}
@ -1353,8 +1357,8 @@ private:
void clearProgress()
{
std::cerr << RESTORE_CURSOR_POSITION CLEAR_TO_END_OF_LINE;
written_progress_chars = 0;
std::cerr << RESTORE_CURSOR_POSITION CLEAR_TO_END_OF_LINE;
}
@ -1363,6 +1367,9 @@ private:
if (!need_render_progress)
return;
/// Output all progress bar commands to stderr at once to avoid flicker.
WriteBufferFromFileDescriptor message(STDERR_FILENO, 1024);
static size_t increment = 0;
static const char * indicators[8] =
{
@ -1377,13 +1384,15 @@ private:
};
if (written_progress_chars)
clearProgress();
message << RESTORE_CURSOR_POSITION CLEAR_TO_END_OF_LINE;
else
std::cerr << SAVE_CURSOR_POSITION;
message << SAVE_CURSOR_POSITION;
message << DISABLE_LINE_WRAPPING;
size_t prefix_size = message.count();
std::stringstream message;
message << indicators[increment % 8]
<< std::fixed << std::setprecision(3)
<< " Progress: ";
message
@ -1398,8 +1407,7 @@ private:
else
message << ". ";
written_progress_chars = message.str().size() - (increment % 8 == 7 ? 10 : 13);
std::cerr << DISABLE_LINE_WRAPPING << message.rdbuf();
written_progress_chars = message.count() - prefix_size - (increment % 8 == 7 ? 10 : 13); /// Don't count invisible output (escape sequences).
/// If the approximate number of rows to process is known, we can display a progress bar and percentage.
if (progress.total_rows > 0)
@ -1421,19 +1429,21 @@ private:
if (width_of_progress_bar > 0)
{
std::string bar = UnicodeBar::render(UnicodeBar::getWidth(progress.rows, 0, total_rows_corrected, width_of_progress_bar));
std::cerr << "\033[0;32m" << bar << "\033[0m";
message << "\033[0;32m" << bar << "\033[0m";
if (width_of_progress_bar > static_cast<ssize_t>(bar.size() / UNICODE_BAR_CHAR_SIZE))
std::cerr << std::string(width_of_progress_bar - bar.size() / UNICODE_BAR_CHAR_SIZE, ' ');
message << std::string(width_of_progress_bar - bar.size() / UNICODE_BAR_CHAR_SIZE, ' ');
}
}
}
/// Underestimate percentage a bit to avoid displaying 100%.
std::cerr << ' ' << (99 * progress.rows / total_rows_corrected) << '%';
message << ' ' << (99 * progress.rows / total_rows_corrected) << '%';
}
std::cerr << ENABLE_LINE_WRAPPING;
message << ENABLE_LINE_WRAPPING;
++increment;
message.next();
}

View File

@ -516,7 +516,7 @@ public:
template <typename ResultType, typename CountCharsCallback>
void searchFirstPosition(const ColumnString::Chars & haystack_data, const ColumnString::Offsets & haystack_offsets, const CountCharsCallback & count_chars_callback, ResultType & ans)
{
auto callback = [this, &count_chars_callback](const UInt8 * haystack, const UInt8 * haystack_end) -> size_t
auto callback = [this, &count_chars_callback](const UInt8 * haystack, const UInt8 * haystack_end) -> UInt64
{
return this->searchOneFirstPosition(haystack, haystack_end, count_chars_callback);
};
@ -676,11 +676,11 @@ private:
}
template <typename CountCharsCallback>
inline size_t searchOneFirstPosition(const UInt8 * haystack, const UInt8 * haystack_end, const CountCharsCallback & callback) const
inline UInt64 searchOneFirstPosition(const UInt8 * haystack, const UInt8 * haystack_end, const CountCharsCallback & callback) const
{
const size_t fallback_size = fallback_needles.size();
size_t ans = std::numeric_limits<size_t>::max();
UInt64 ans = std::numeric_limits<UInt64>::max();
for (size_t i = 0; i < fallback_size; ++i)
if (auto pos = fallback_searchers[fallback_needles[i]].search(haystack, haystack_end); pos != haystack_end)
@ -705,7 +705,7 @@ private:
}
}
}
if (ans == std::numeric_limits<size_t>::max())
if (ans == std::numeric_limits<UInt64>::max())
return 0;
return ans;
}

View File

@ -24,6 +24,7 @@
#cmakedefine01 USE_CPUINFO
#cmakedefine01 USE_BROTLI
#cmakedefine01 USE_SSL
#cmakedefine01 USE_HYPERSCAN
#cmakedefine01 CLICKHOUSE_SPLIT_BINARY
#cmakedefine01 LLVM_HAS_RTTI

View File

@ -206,28 +206,42 @@ CapnProtoRowInputStream::CapnProtoRowInputStream(ReadBuffer & istr_, const Block
createActions(list, root);
}
kj::Array<capnp::word> CapnProtoRowInputStream::readMessage()
{
uint32_t segment_count;
istr.readStrict(reinterpret_cast<char*>(&segment_count), sizeof(uint32_t));
// one for segmentCount and one because segmentCount starts from 0
const auto prefix_size = (2 + segment_count) * sizeof(uint32_t);
const auto words_prefix_size = (segment_count + 1) / 2 + 1;
auto prefix = kj::heapArray<capnp::word>(words_prefix_size);
auto prefix_chars = prefix.asChars();
::memcpy(prefix_chars.begin(), &segment_count, sizeof(uint32_t));
// read size of each segment
for (size_t i = 0; i <= segment_count; ++i)
istr.readStrict(prefix_chars.begin() + ((i + 1) * sizeof(uint32_t)), sizeof(uint32_t));
// calculate size of message
const auto expected_words = capnp::expectedSizeInWordsFromPrefix(prefix);
const auto expected_bytes = expected_words * sizeof(capnp::word);
const auto data_size = expected_bytes - prefix_size;
auto msg = kj::heapArray<capnp::word>(expected_words);
auto msg_chars = msg.asChars();
// read full message
::memcpy(msg_chars.begin(), prefix_chars.begin(), prefix_size);
istr.readStrict(msg_chars.begin() + prefix_size, data_size);
return msg;
}
bool CapnProtoRowInputStream::read(MutableColumns & columns, RowReadExtension &)
{
if (istr.eof())
return false;
// Read from underlying buffer directly
auto buf = istr.buffer();
auto base = reinterpret_cast<const capnp::word *>(istr.position());
// Check if there's enough bytes in the buffer to read the full message
kj::Array<capnp::word> heap_array;
auto array = kj::arrayPtr(base, buf.size() - istr.offset());
auto expected_words = capnp::expectedSizeInWordsFromPrefix(array);
if (expected_words * sizeof(capnp::word) > array.size())
{
// We'll need to reassemble the message in a contiguous buffer
heap_array = kj::heapArray<capnp::word>(expected_words);
istr.readStrict(heap_array.asChars().begin(), heap_array.asChars().size());
array = heap_array.asPtr();
}
auto array = readMessage();
#if CAPNP_VERSION >= 8000
capnp::UnalignedFlatArrayMessageReader msg(array);
@ -281,13 +295,6 @@ bool CapnProtoRowInputStream::read(MutableColumns & columns, RowReadExtension &)
}
}
// Advance buffer position if used directly
if (heap_array.size() == 0)
{
auto parsed = (msg.getEnd() - base) * sizeof(capnp::word);
istr.position() += parsed;
}
return true;
}

View File

@ -38,6 +38,8 @@ public:
bool read(MutableColumns & columns, RowReadExtension &) override;
private:
kj::Array<capnp::word> readMessage();
// Build a traversal plan from a sorted list of fields
void createActions(const NestedFieldList & sortedFields, capnp::StructSchema reader);

View File

@ -1,8 +1,6 @@
#include <Functions/FunctionsStringSearch.h>
#include "FunctionsStringSearch.h"
#include <Columns/ColumnFixedString.h>
#include <Common/config.h>
#include <DataTypes/DataTypeFixedString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/Regexps.h>
@ -11,12 +9,16 @@
#include <re2/stringpiece.h>
#include <Poco/UTF8String.h>
#include <Common/Volnitsky.h>
#include <algorithm>
#include <memory>
#ifdef __SSSE3__
# include <hs.h>
#include <Common/config.h>
#if USE_HYPERSCAN
# if __has_include(<hs/hs.h>)
# include <hs/hs.h>
# else
# include <hs.h>
# endif
#endif
#if USE_RE2_ST
@ -617,7 +619,7 @@ struct MultiMatchAnyImpl
{
(void)FindAny;
(void)FindAnyIndex;
#ifdef __SSSE3__
#if USE_HYPERSCAN
using ScratchPtr = std::unique_ptr<hs_scratch_t, DB::MultiRegexps::HyperscanDeleter<decltype(&hs_free_scratch), &hs_free_scratch>>;
const auto & hyperscan_regex = MultiRegexps::get<FindAnyIndex>(needles);
@ -670,7 +672,7 @@ struct MultiMatchAnyImpl
res[i] = j + 1;
}
}
#endif // __SSSE3__
#endif // USE_HYPERSCAN
}
};

View File

@ -5,13 +5,17 @@
#include <Common/OptimizedRegularExpression.h>
#include <Common/ProfileEvents.h>
#include <common/StringRef.h>
#include <memory>
#include <string>
#include <vector>
#ifdef __SSSE3__
# include <hs.h>
#include <Common/config.h>
#if USE_HYPERSCAN
# if __has_include(<hs/hs.h>)
# include <hs/hs.h>
# else
# include <hs.h>
# endif
#endif
namespace ProfileEvents
@ -63,7 +67,7 @@ namespace Regexps
}
}
#ifdef __SSSE3__
#if USE_HYPERSCAN
namespace MultiRegexps
{
@ -139,6 +143,6 @@ namespace MultiRegexps
}
}
#endif // __SSSE3__
#endif // USE_HYPERSCAN
}

View File

@ -58,7 +58,7 @@ namespace
BlockInputStreamPtr createLocalStream(const ASTPtr & query_ast, const Context & context, QueryProcessingStage::Enum processed_stage)
{
InterpreterSelectQuery interpreter{query_ast, context, Names{}, processed_stage};
InterpreterSelectQuery interpreter{query_ast, context, SelectQueryOptions(processed_stage)};
BlockInputStreamPtr stream = interpreter.execute().in;
/** Materialization is needed, since from remote servers the constants come materialized.

View File

@ -76,7 +76,7 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr
ASTPtr subquery_select = subquery.children.at(0);
BlockIO res = InterpreterSelectWithUnionQuery(
subquery_select, subquery_context, {}, QueryProcessingStage::Complete, data.subquery_depth + 1).execute();
subquery_select, subquery_context, SelectQueryOptions(QueryProcessingStage::Complete, data.subquery_depth + 1)).execute();
Block block;
try

View File

@ -51,7 +51,8 @@ BlockInputStreamPtr InterpreterExplainQuery::executeImpl()
}
else if (ast.getKind() == ASTExplainQuery::AnalyzedSyntax)
{
InterpreterSelectWithUnionQuery interpreter(ast.children.at(0), context, {}, QueryProcessingStage::FetchColumns, 0, true, true);
InterpreterSelectWithUnionQuery interpreter(ast.children.at(0), context,
SelectQueryOptions(QueryProcessingStage::FetchColumns).analyze().modify());
interpreter.getQuery()->format(IAST::FormatSettings(ss, false));
}

View File

@ -84,12 +84,12 @@ std::unique_ptr<IInterpreter> InterpreterFactory::get(ASTPtr & query, Context &
{
/// This is internal part of ASTSelectWithUnionQuery.
/// Even if there is SELECT without union, it is represented by ASTSelectWithUnionQuery with single ASTSelectQuery as a child.
return std::make_unique<InterpreterSelectQuery>(query, context, Names{}, stage);
return std::make_unique<InterpreterSelectQuery>(query, context, SelectQueryOptions(stage));
}
else if (query->as<ASTSelectWithUnionQuery>())
{
ProfileEvents::increment(ProfileEvents::SelectQuery);
return std::make_unique<InterpreterSelectWithUnionQuery>(query, context, Names{}, stage);
return std::make_unique<InterpreterSelectWithUnionQuery>(query, context, SelectQueryOptions(stage));
}
else if (query->as<ASTInsertQuery>())
{

View File

@ -128,7 +128,7 @@ BlockIO InterpreterInsertQuery::execute()
if (query.select)
{
/// Passing 1 as subquery_depth will disable limiting size of intermediate result.
InterpreterSelectWithUnionQuery interpreter_select{query.select, context, {}, QueryProcessingStage::Complete, 1};
InterpreterSelectWithUnionQuery interpreter_select{query.select, context, SelectQueryOptions(QueryProcessingStage::Complete, 1)};
res.in = interpreter_select.execute().in;

View File

@ -78,13 +78,9 @@ namespace ErrorCodes
InterpreterSelectQuery::InterpreterSelectQuery(
const ASTPtr & query_ptr_,
const Context & context_,
const Names & required_result_column_names,
QueryProcessingStage::Enum to_stage_,
size_t subquery_depth_,
bool only_analyze_,
bool modify_inplace)
: InterpreterSelectQuery(
query_ptr_, context_, nullptr, nullptr, required_result_column_names, to_stage_, subquery_depth_, only_analyze_, modify_inplace)
const SelectQueryOptions & options,
const Names & required_result_column_names)
: InterpreterSelectQuery(query_ptr_, context_, nullptr, nullptr, options, required_result_column_names)
{
}
@ -92,23 +88,17 @@ InterpreterSelectQuery::InterpreterSelectQuery(
const ASTPtr & query_ptr_,
const Context & context_,
const BlockInputStreamPtr & input_,
QueryProcessingStage::Enum to_stage_,
bool only_analyze_,
bool modify_inplace)
: InterpreterSelectQuery(query_ptr_, context_, input_, nullptr, Names{}, to_stage_, 0, only_analyze_, modify_inplace)
{
}
const SelectQueryOptions & options)
: InterpreterSelectQuery(query_ptr_, context_, input_, nullptr, options.copy().noSubquery())
{}
InterpreterSelectQuery::InterpreterSelectQuery(
const ASTPtr & query_ptr_,
const Context & context_,
const StoragePtr & storage_,
QueryProcessingStage::Enum to_stage_,
bool only_analyze_,
bool modify_inplace)
: InterpreterSelectQuery(query_ptr_, context_, nullptr, storage_, Names{}, to_stage_, 0, only_analyze_, modify_inplace)
{
}
const SelectQueryOptions & options)
: InterpreterSelectQuery(query_ptr_, context_, nullptr, storage_, options.copy().noSubquery())
{}
InterpreterSelectQuery::~InterpreterSelectQuery() = default;
@ -133,17 +123,12 @@ InterpreterSelectQuery::InterpreterSelectQuery(
const Context & context_,
const BlockInputStreamPtr & input_,
const StoragePtr & storage_,
const Names & required_result_column_names,
QueryProcessingStage::Enum to_stage_,
size_t subquery_depth_,
bool only_analyze_,
bool modify_inplace)
const SelectQueryOptions & options_,
const Names & required_result_column_names)
: options(options_)
/// NOTE: the query almost always should be cloned because it will be modified during analysis.
: query_ptr(modify_inplace ? query_ptr_ : query_ptr_->clone())
, query_ptr(options.modify_inplace ? query_ptr_ : query_ptr_->clone())
, context(context_)
, to_stage(to_stage_)
, subquery_depth(subquery_depth_)
, only_analyze(only_analyze_)
, storage(storage_)
, input(input_)
, log(&Logger::get("InterpreterSelectQuery"))
@ -151,7 +136,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
initSettings();
const Settings & settings = context.getSettingsRef();
if (settings.max_subquery_depth && subquery_depth > settings.max_subquery_depth)
if (settings.max_subquery_depth && options.subquery_depth > settings.max_subquery_depth)
throw Exception("Too deep subqueries. Maximum: " + settings.max_subquery_depth.toString(),
ErrorCodes::TOO_DEEP_SUBQUERIES);
@ -189,7 +174,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
{
/// Read from subquery.
interpreter_subquery = std::make_unique<InterpreterSelectWithUnionQuery>(
table_expression, getSubqueryContext(context), required_columns, QueryProcessingStage::Complete, subquery_depth + 1, only_analyze, modify_inplace);
table_expression, getSubqueryContext(context), options.subquery(), required_columns);
source_header = interpreter_subquery->getSampleBlock();
}
@ -215,13 +200,14 @@ InterpreterSelectQuery::InterpreterSelectQuery(
if (storage)
table_lock = storage->lockStructureForShare(false, context.getCurrentQueryId());
syntax_analyzer_result = SyntaxAnalyzer(context, subquery_depth).analyze(
syntax_analyzer_result = SyntaxAnalyzer(context, options).analyze(
query_ptr, source_header.getNamesAndTypesList(), required_result_column_names, storage);
query_analyzer = std::make_unique<ExpressionAnalyzer>(
query_ptr, syntax_analyzer_result, context, NamesAndTypesList(),
NameSet(required_result_column_names.begin(), required_result_column_names.end()), subquery_depth, !only_analyze);
NameSet(required_result_column_names.begin(), required_result_column_names.end()),
options.subquery_depth, !options.only_analyze);
if (!only_analyze)
if (!options.only_analyze)
{
if (query.sample_size() && (input || !storage || !storage->supportsSampling()))
throw Exception("Illegal SAMPLE: table doesn't support sampling", ErrorCodes::SAMPLING_NOT_SUPPORTED);
@ -238,7 +224,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
context.addExternalTable(it.first, it.second);
}
if (!only_analyze || modify_inplace)
if (!options.only_analyze || options.modify_inplace)
{
if (query_analyzer->isRewriteSubqueriesPredicate())
{
@ -247,11 +233,8 @@ InterpreterSelectQuery::InterpreterSelectQuery(
interpreter_subquery = std::make_unique<InterpreterSelectWithUnionQuery>(
table_expression,
getSubqueryContext(context),
required_columns,
QueryProcessingStage::Complete,
subquery_depth + 1,
only_analyze,
modify_inplace);
options.subquery(),
required_columns);
}
}
@ -304,7 +287,7 @@ Block InterpreterSelectQuery::getSampleBlock()
BlockIO InterpreterSelectQuery::execute()
{
Pipeline pipeline;
executeImpl(pipeline, input, only_analyze);
executeImpl(pipeline, input, options.only_analyze);
executeUnion(pipeline);
BlockIO res;
@ -315,7 +298,7 @@ BlockIO InterpreterSelectQuery::execute()
BlockInputStreams InterpreterSelectQuery::executeWithMultipleStreams()
{
Pipeline pipeline;
executeImpl(pipeline, input, only_analyze);
executeImpl(pipeline, input, options.only_analyze);
return pipeline.streams;
}
@ -325,10 +308,10 @@ InterpreterSelectQuery::AnalysisResult InterpreterSelectQuery::analyzeExpression
/// Do I need to perform the first part of the pipeline - running on remote servers during distributed processing.
res.first_stage = from_stage < QueryProcessingStage::WithMergeableState
&& to_stage >= QueryProcessingStage::WithMergeableState;
&& options.to_stage >= QueryProcessingStage::WithMergeableState;
/// Do I need to execute the second part of the pipeline - running on the initiating server during distributed processing.
res.second_stage = from_stage <= QueryProcessingStage::WithMergeableState
&& to_stage > QueryProcessingStage::WithMergeableState;
&& options.to_stage > QueryProcessingStage::WithMergeableState;
/** First we compose a chain of actions and remember the necessary steps from it.
* Regardless of from_stage and to_stage, we will compose a complete sequence of actions to perform optimization and
@ -553,16 +536,16 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt
expressions = analyzeExpressions(from_stage, false);
if (from_stage == QueryProcessingStage::WithMergeableState &&
to_stage == QueryProcessingStage::WithMergeableState)
options.to_stage == QueryProcessingStage::WithMergeableState)
throw Exception("Distributed on Distributed is not supported", ErrorCodes::NOT_IMPLEMENTED);
/** Read the data from Storage. from_stage - to what stage the request was completed in Storage. */
executeFetchColumns(from_stage, pipeline, expressions.prewhere_info, expressions.columns_to_remove_after_prewhere);
LOG_TRACE(log, QueryProcessingStage::toString(from_stage) << " -> " << QueryProcessingStage::toString(to_stage));
LOG_TRACE(log, QueryProcessingStage::toString(from_stage) << " -> " << QueryProcessingStage::toString(options.to_stage));
}
if (to_stage > QueryProcessingStage::FetchColumns)
if (options.to_stage > QueryProcessingStage::FetchColumns)
{
/// Do I need to aggregate in a separate row rows that have not passed max_rows_to_group_by.
bool aggregate_overflow_row =
@ -575,7 +558,7 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt
/// Do I need to immediately finalize the aggregate functions after the aggregation?
bool aggregate_final =
expressions.need_aggregate &&
to_stage > QueryProcessingStage::WithMergeableState &&
options.to_stage > QueryProcessingStage::WithMergeableState &&
!query.group_by_with_totals && !query.group_by_with_rollup && !query.group_by_with_cube;
if (expressions.first_stage)
@ -938,7 +921,7 @@ void InterpreterSelectQuery::executeFetchColumns(
/// Limitation on the number of columns to read.
/// It's not applied in 'only_analyze' mode, because the query could be analyzed without removal of unnecessary columns.
if (!only_analyze && settings.max_columns_to_read && required_columns.size() > settings.max_columns_to_read)
if (!options.only_analyze && settings.max_columns_to_read && required_columns.size() > settings.max_columns_to_read)
throw Exception("Limit for number of columns to read exceeded. "
"Requested: " + toString(required_columns.size())
+ ", maximum: " + settings.max_columns_to_read.toString(),
@ -1000,7 +983,8 @@ void InterpreterSelectQuery::executeFetchColumns(
throw Exception("Subquery expected", ErrorCodes::LOGICAL_ERROR);
interpreter_subquery = std::make_unique<InterpreterSelectWithUnionQuery>(
subquery, getSubqueryContext(context), required_columns, QueryProcessingStage::Complete, subquery_depth + 1, only_analyze);
subquery, getSubqueryContext(context),
options.copy().subquery().noModify(), required_columns);
if (query_analyzer->hasAggregation())
interpreter_subquery->ignoreWithTotals();
@ -1057,7 +1041,7 @@ void InterpreterSelectQuery::executeFetchColumns(
* additionally on each remote server, because these limits are checked per block of data processed,
* and remote servers may process way more blocks of data than are received by initiator.
*/
if (to_stage == QueryProcessingStage::Complete)
if (options.to_stage == QueryProcessingStage::Complete)
{
limits.min_execution_speed = settings.min_execution_speed;
limits.max_execution_speed = settings.max_execution_speed;
@ -1072,7 +1056,7 @@ void InterpreterSelectQuery::executeFetchColumns(
{
stream->setLimits(limits);
if (to_stage == QueryProcessingStage::Complete)
if (options.to_stage == QueryProcessingStage::Complete)
stream->setQuota(quota);
});
}

View File

@ -3,12 +3,13 @@
#include <memory>
#include <Core/QueryProcessingStage.h>
#include <Parsers/ASTSelectQuery.h>
#include <DataStreams/IBlockInputStream.h>
#include <Interpreters/Context.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/IInterpreter.h>
#include <Parsers/ASTSelectQuery.h>
#include <Interpreters/SelectQueryOptions.h>
#include <Storages/SelectQueryInfo.h>
@ -23,6 +24,7 @@ class InterpreterSelectWithUnionQuery;
struct SyntaxAnalyzerResult;
using SyntaxAnalyzerResultPtr = std::shared_ptr<const SyntaxAnalyzerResult>;
/** Interprets the SELECT query. Returns the stream of blocks with the results of the query before `to_stage` stage.
*/
class InterpreterSelectQuery : public IInterpreter
@ -32,14 +34,6 @@ public:
* query_ptr
* - A query AST to interpret.
*
* to_stage
* - the stage to which the query is to be executed. By default - till to the end.
* You can perform till the intermediate aggregation state, which are combined from different servers for distributed query processing.
*
* subquery_depth
* - to control the limit on the depth of nesting of subqueries. For subqueries, a value that is incremented by one is passed;
* for INSERT SELECT, a value 1 is passed instead of 0.
*
* required_result_column_names
* - don't calculate all columns except the specified ones from the query
* - it is used to remove calculation (and reading) of unnecessary columns from subqueries.
@ -49,29 +43,22 @@ public:
InterpreterSelectQuery(
const ASTPtr & query_ptr_,
const Context & context_,
const Names & required_result_column_names = Names{},
QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
size_t subquery_depth_ = 0,
bool only_analyze_ = false,
bool modify_inplace = false);
const SelectQueryOptions &,
const Names & required_result_column_names = Names{});
/// Read data not from the table specified in the query, but from the prepared source `input`.
InterpreterSelectQuery(
const ASTPtr & query_ptr_,
const Context & context_,
const BlockInputStreamPtr & input_,
QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
bool only_analyze_ = false,
bool modify_inplace = false);
const SelectQueryOptions & = {});
/// Read data not from the table specified in the query, but from the specified `storage_`.
InterpreterSelectQuery(
const ASTPtr & query_ptr_,
const Context & context_,
const StoragePtr & storage_,
QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
bool only_analyze_ = false,
bool modify_inplace = false);
const SelectQueryOptions & = {});
~InterpreterSelectQuery() override;
@ -93,11 +80,8 @@ private:
const Context & context_,
const BlockInputStreamPtr & input_,
const StoragePtr & storage_,
const Names & required_result_column_names,
QueryProcessingStage::Enum to_stage_,
size_t subquery_depth_,
bool only_analyze_,
bool modify_inplace);
const SelectQueryOptions &,
const Names & required_result_column_names = {});
ASTSelectQuery & getSelectQuery() { return query_ptr->as<ASTSelectQuery &>(); }
@ -223,10 +207,9 @@ private:
*/
void initSettings();
const SelectQueryOptions options;
ASTPtr query_ptr;
Context context;
QueryProcessingStage::Enum to_stage;
size_t subquery_depth = 0;
NamesAndTypesList source_columns;
SyntaxAnalyzerResultPtr syntax_analyzer_result;
std::unique_ptr<ExpressionAnalyzer> query_analyzer;
@ -234,9 +217,6 @@ private:
/// How many streams we ask for storage to produce, and in how many threads we will do further processing.
size_t max_streams = 1;
/// The object was created only for query analysis.
bool only_analyze = false;
/// List of columns to read to execute the query.
Names required_columns;
/// Structure of query source (table, subquery, etc).

View File

@ -26,15 +26,11 @@ namespace ErrorCodes
InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
const ASTPtr & query_ptr_,
const Context & context_,
const Names & required_result_column_names,
QueryProcessingStage::Enum to_stage_,
size_t subquery_depth_,
bool only_analyze,
bool modify_inplace)
: query_ptr(query_ptr_),
context(context_),
to_stage(to_stage_),
subquery_depth(subquery_depth_)
const SelectQueryOptions & options_,
const Names & required_result_column_names)
: options(options_),
query_ptr(query_ptr_),
context(context_)
{
const auto & ast = query_ptr->as<ASTSelectWithUnionQuery &>();
@ -57,7 +53,7 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
/// We use it to determine positions of 'required_result_column_names' in SELECT clause.
Block full_result_header = InterpreterSelectQuery(
ast.list_of_selects->children.at(0), context, Names(), to_stage, subquery_depth, true).getSampleBlock();
ast.list_of_selects->children.at(0), context, options.copy().analyze().noModify()).getSampleBlock();
std::vector<size_t> positions_of_required_result_columns(required_result_column_names.size());
for (size_t required_result_num = 0, size = required_result_column_names.size(); required_result_num < size; ++required_result_num)
@ -66,7 +62,7 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
for (size_t query_num = 1; query_num < num_selects; ++query_num)
{
Block full_result_header_for_current_select = InterpreterSelectQuery(
ast.list_of_selects->children.at(query_num), context, Names(), to_stage, subquery_depth, true).getSampleBlock();
ast.list_of_selects->children.at(query_num), context, options.copy().analyze().noModify()).getSampleBlock();
if (full_result_header_for_current_select.columns() != full_result_header.columns())
throw Exception("Different number of columns in UNION ALL elements:\n"
@ -89,11 +85,8 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
nested_interpreters.emplace_back(std::make_unique<InterpreterSelectQuery>(
ast.list_of_selects->children.at(query_num),
context,
current_required_result_column_names,
to_stage,
subquery_depth,
only_analyze,
modify_inplace));
options,
current_required_result_column_names));
}
/// Determine structure of the result.
@ -179,7 +172,7 @@ Block InterpreterSelectWithUnionQuery::getSampleBlock(
return cache[key];
}
return cache[key] = InterpreterSelectWithUnionQuery(query_ptr, context, {}, QueryProcessingStage::Complete, 0, true).getSampleBlock();
return cache[key] = InterpreterSelectWithUnionQuery(query_ptr, context, SelectQueryOptions().analyze()).getSampleBlock();
}

View File

@ -3,6 +3,7 @@
#include <Core/QueryProcessingStage.h>
#include <Interpreters/Context.h>
#include <Interpreters/IInterpreter.h>
#include <Interpreters/SelectQueryOptions.h>
namespace DB
@ -19,11 +20,8 @@ public:
InterpreterSelectWithUnionQuery(
const ASTPtr & query_ptr_,
const Context & context_,
const Names & required_result_column_names = Names{},
QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
size_t subquery_depth_ = 0,
bool only_analyze = false,
bool modify_inplace = false);
const SelectQueryOptions &,
const Names & required_result_column_names = {});
~InterpreterSelectWithUnionQuery() override;
@ -43,10 +41,9 @@ public:
ASTPtr getQuery() const { return query_ptr; }
private:
const SelectQueryOptions options;
ASTPtr query_ptr;
Context context;
QueryProcessingStage::Enum to_stage;
size_t subquery_depth;
std::vector<std::unique_ptr<InterpreterSelectQuery>> nested_interpreters;

View File

@ -72,7 +72,7 @@ bool MutationsInterpreter::isStorageTouchedByMutations() const
context_copy.getSettingsRef().merge_tree_uniform_read_distribution = 0;
context_copy.getSettingsRef().max_threads = 1;
BlockInputStreamPtr in = InterpreterSelectQuery(select, context_copy, storage, QueryProcessingStage::Complete).execute().in;
BlockInputStreamPtr in = InterpreterSelectQuery(select, context_copy, storage).execute().in;
Block block = in->read();
if (!block.rows())
@ -367,7 +367,7 @@ void MutationsInterpreter::prepare(bool dry_run)
select->children.push_back(where_expression);
}
interpreter_select = std::make_unique<InterpreterSelectQuery>(select, context, storage, QueryProcessingStage::Complete, dry_run);
interpreter_select = std::make_unique<InterpreterSelectQuery>(select, context, storage, SelectQueryOptions().analyze(dry_run));
is_prepared = true;
}

View File

@ -0,0 +1,76 @@
#pragma once
#include <Core/QueryProcessingStage.h>
namespace DB
{
/**
* to_stage
* - the stage to which the query is to be executed. By default - till to the end.
* You can perform till the intermediate aggregation state, which are combined from different servers for distributed query processing.
*
* subquery_depth
* - to control the limit on the depth of nesting of subqueries. For subqueries, a value that is incremented by one is passed;
* for INSERT SELECT, a value 1 is passed instead of 0.
*
* only_analyze
* - the object was created only for query analysis.
*
* is_subquery
* - there could be some specific for subqueries. Ex. there's no need to pass duplicated columns in results, cause of indirect results.
*/
struct SelectQueryOptions
{
QueryProcessingStage::Enum to_stage;
size_t subquery_depth;
bool only_analyze;
bool modify_inplace;
bool remove_duplicates;
SelectQueryOptions(QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, size_t depth = 0)
: to_stage(stage)
, subquery_depth(depth)
, only_analyze(false)
, modify_inplace(false)
, remove_duplicates(false)
{}
SelectQueryOptions copy() const { return *this; }
SelectQueryOptions subquery() const
{
SelectQueryOptions out = *this;
out.to_stage = QueryProcessingStage::Complete;
++out.subquery_depth;
return out;
}
SelectQueryOptions & analyze(bool value = true)
{
only_analyze = value;
return *this;
}
SelectQueryOptions & modify(bool value = true)
{
modify_inplace = value;
return *this;
}
SelectQueryOptions & noModify() { return modify(false); }
SelectQueryOptions & removeDuplicates(bool value = true)
{
remove_duplicates = value;
return *this;
}
SelectQueryOptions & noSubquery()
{
subquery_depth = 0;
return *this;
}
};
}

View File

@ -123,24 +123,69 @@ bool hasArrayJoin(const ASTPtr & ast)
return false;
}
/// Keep number of columns for 'GLOBAL IN (SELECT 1 AS a, a)'
void renameDuplicatedColumns(const ASTSelectQuery * select_query)
{
ASTs & elements = select_query->select_expression_list->children;
std::set<String> all_column_names;
std::set<String> assigned_column_names;
for (auto & expr : elements)
all_column_names.insert(expr->getAliasOrColumnName());
for (auto & expr : elements)
{
auto name = expr->getAliasOrColumnName();
if (!assigned_column_names.insert(name).second)
{
size_t i = 1;
while (all_column_names.end() != all_column_names.find(name + "_" + toString(i)))
++i;
name = name + "_" + toString(i);
expr = expr->clone(); /// Cancels fuse of the same expressions in the tree.
expr->setAlias(name);
all_column_names.insert(name);
assigned_column_names.insert(name);
}
}
}
/// Sometimes we have to calculate more columns in SELECT clause than will be returned from query.
/// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result.
void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, const Names & required_result_columns)
/// Also we have to remove duplicates in case of GLOBAL subqueries. Their results are placed into tables so duplicates are inpossible.
void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, const Names & required_result_columns, bool remove_dups)
{
if (required_result_columns.empty())
return;
ASTs & elements = select_query->select_expression_list->children;
std::map<String, size_t> required_columns_with_duplicate_count;
if (!required_result_columns.empty())
{
/// Some columns may be queried multiple times, like SELECT x, y, y FROM table.
for (const auto & name : required_result_columns)
{
if (remove_dups)
required_columns_with_duplicate_count[name] = 1;
else
++required_columns_with_duplicate_count[name];
}
}
else if (remove_dups)
{
/// Even if we have no requirements there could be duplicates cause of asterisks. SELECT *, t.*
for (const auto & elem : elements)
required_columns_with_duplicate_count.emplace(elem->getAliasOrColumnName(), 1);
}
else
return;
ASTs new_elements;
new_elements.reserve(elements.size());
/// Some columns may be queried multiple times, like SELECT x, y, y FROM table.
/// In that case we keep them exactly same number of times.
std::map<String, size_t> required_columns_with_duplicate_count;
for (const auto & name : required_result_columns)
++required_columns_with_duplicate_count[name];
for (const auto & elem : elements)
{
String name = elem->getAliasOrColumnName();
@ -645,6 +690,9 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
if (select_query)
{
if (remove_duplicates)
renameDuplicatedColumns(select_query);
if (const ASTTablesInSelectQueryElement * node = select_query->join())
{
if (settings.enable_optimize_predicate_expression)
@ -688,7 +736,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
/// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost)
/// and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations.
if (select_query)
removeUnneededColumnsFromSelectClause(select_query, required_result_columns);
removeUnneededColumnsFromSelectClause(select_query, required_result_columns, remove_duplicates);
/// Executing scalar subqueries - replacing them with constant values.
executeScalarSubqueries(query, context, subquery_depth);

View File

@ -2,6 +2,7 @@
#include <Interpreters/AnalyzedJoin.h>
#include <Interpreters/Aliases.h>
#include <Interpreters/SelectQueryOptions.h>
namespace DB
{
@ -55,9 +56,10 @@ using SyntaxAnalyzerResultPtr = std::shared_ptr<const SyntaxAnalyzerResult>;
class SyntaxAnalyzer
{
public:
SyntaxAnalyzer(const Context & context_, size_t subquery_depth_ = 0)
SyntaxAnalyzer(const Context & context_, const SelectQueryOptions & select_options = {})
: context(context_)
, subquery_depth(subquery_depth_)
, subquery_depth(select_options.subquery_depth)
, remove_duplicates(select_options.remove_duplicates)
{}
SyntaxAnalyzerResultPtr analyze(
@ -69,6 +71,7 @@ public:
private:
const Context & context;
size_t subquery_depth;
bool remove_duplicates;
};
}

View File

@ -41,6 +41,8 @@ std::shared_ptr<InterpreterSelectWithUnionQuery> interpretSubquery(
subquery_settings.extremes = 0;
subquery_context.setSettings(subquery_settings);
auto subquery_options = SelectQueryOptions(QueryProcessingStage::Complete, subquery_depth).subquery();
ASTPtr query;
if (table || function)
{
@ -83,48 +85,10 @@ std::shared_ptr<InterpreterSelectWithUnionQuery> interpretSubquery(
else
{
query = subquery->children.at(0);
/** Columns with the same name can be specified in a subquery. For example, SELECT x, x FROM t
* This is bad, because the result of such a query can not be saved to the table, because the table can not have the same name columns.
* Saving to the table is required for GLOBAL subqueries.
*
* To avoid this situation, we will rename the same columns.
*/
std::set<std::string> all_column_names;
std::set<std::string> assigned_column_names;
if (const auto * select_with_union = query->as<ASTSelectWithUnionQuery>())
{
if (const auto * select = select_with_union->list_of_selects->children.at(0)->as<ASTSelectQuery>())
{
for (auto & expr : select->select_expression_list->children)
all_column_names.insert(expr->getAliasOrColumnName());
for (auto & expr : select->select_expression_list->children)
{
auto name = expr->getAliasOrColumnName();
if (!assigned_column_names.insert(name).second)
{
size_t i = 1;
while (all_column_names.end() != all_column_names.find(name + "_" + toString(i)))
++i;
name = name + "_" + toString(i);
expr = expr->clone(); /// Cancels fuse of the same expressions in the tree.
expr->setAlias(name);
all_column_names.insert(name);
assigned_column_names.insert(name);
}
}
}
}
subquery_options.removeDuplicates();
}
return std::make_shared<InterpreterSelectWithUnionQuery>(
query, subquery_context, required_source_columns, QueryProcessingStage::Complete, subquery_depth + 1);
return std::make_shared<InterpreterSelectWithUnionQuery>(query, subquery_context, subquery_options, required_source_columns);
}
}

View File

@ -205,7 +205,7 @@ void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescri
}
else if (type == MODIFY_ORDER_BY)
{
if (!primary_key_ast)
if (!primary_key_ast && order_by_ast)
{
/// Primary and sorting key become independent after this ALTER so we have to
/// save the old ORDER BY expression as the new primary key.

View File

@ -25,6 +25,8 @@
#include <Poco/Ext/ThreadNumber.h>
#include <ext/range.h>
#include <DataStreams/FilterBlockInputStream.h>
#include <DataStreams/ExpressionBlockInputStream.h>
namespace ProfileEvents
@ -221,7 +223,21 @@ BlockInputStreams StorageBuffer::read(
*/
if (processed_stage > QueryProcessingStage::FetchColumns)
for (auto & stream : streams_from_buffers)
stream = InterpreterSelectQuery(query_info.query, context, stream, processed_stage).execute().in;
stream = InterpreterSelectQuery(query_info.query, context, stream, SelectQueryOptions(processed_stage)).execute().in;
if (query_info.prewhere_info)
{
for (auto & stream : streams_from_buffers)
stream = std::make_shared<FilterBlockInputStream>(stream, query_info.prewhere_info->prewhere_actions,
query_info.prewhere_info->prewhere_column_name, query_info.prewhere_info->remove_prewhere_column);
if (query_info.prewhere_info->alias_actions)
{
for (auto & stream : streams_from_buffers)
stream = std::make_shared<ExpressionBlockInputStream>(stream, query_info.prewhere_info->alias_actions);
}
}
streams_from_dst.insert(streams_from_dst.end(), streams_from_buffers.begin(), streams_from_buffers.end());
return streams_from_dst;

View File

@ -74,7 +74,15 @@ public:
void rename(const String & /*new_path_to_db*/, const String & /*new_database_name*/, const String & new_table_name) override { name = new_table_name; }
bool supportsSampling() const override { return true; }
bool supportsPrewhere() const override { return false; }
bool supportsPrewhere() const override
{
if (no_destination)
return false;
auto dest = global_context.tryGetTable(destination_database, destination_table);
if (dest && dest.get() != this)
return dest->supportsPrewhere();
return false;
}
bool supportsFinal() const override { return true; }
bool supportsIndexForIn() const override { return true; }

View File

@ -286,7 +286,8 @@ BlockInputStreams StorageDistributed::read(
const auto & modified_query_ast = rewriteSelectQuery(
query_info.query, remote_database, remote_table, remote_table_function_ptr);
Block header = materializeBlock(InterpreterSelectQuery(query_info.query, context, Names{}, processed_stage).getSampleBlock());
Block header = materializeBlock(
InterpreterSelectQuery(query_info.query, context, SelectQueryOptions(processed_stage)).getSampleBlock());
ClusterProxy::SelectStreamFactory select_stream_factory = remote_table_function_ptr
? ClusterProxy::SelectStreamFactory(

View File

@ -274,7 +274,7 @@ BlockInputStreams StorageMerge::createSourceStreams(const SelectQueryInfo & quer
if (!storage)
return BlockInputStreams{
InterpreterSelectQuery(modified_query_info.query, modified_context, std::make_shared<OneBlockInputStream>(header),
processed_stage, true).execute().in};
SelectQueryOptions(processed_stage).analyze()).execute().in};
BlockInputStreams source_streams;
@ -295,7 +295,7 @@ BlockInputStreams StorageMerge::createSourceStreams(const SelectQueryInfo & quer
modified_context.getSettingsRef().max_threads = UInt64(streams_num);
modified_context.getSettingsRef().max_streams_to_max_threads_ratio = 1;
InterpreterSelectQuery interpreter{modified_query_info.query, modified_context, Names{}, processed_stage};
InterpreterSelectQuery interpreter{modified_query_info.query, modified_context, SelectQueryOptions(processed_stage)};
BlockInputStreamPtr interpreter_stream = interpreter.execute().in;
/** Materialization is needed, since from distributed storage the constants come materialized.
@ -429,7 +429,7 @@ Block StorageMerge::getQueryHeader(
case QueryProcessingStage::Complete:
return materializeBlock(InterpreterSelectQuery(
query_info.query, context, std::make_shared<OneBlockInputStream>(getSampleBlockForColumns(column_names)),
processed_stage, true).getSampleBlock());
SelectQueryOptions(processed_stage).analyze()).getSampleBlock());
}
throw Exception("Logical Error: unknown processed stage.", ErrorCodes::LOGICAL_ERROR);
}

View File

@ -63,7 +63,7 @@ BlockInputStreams StorageView::read(
current_inner_query = new_inner_query;
}
res = InterpreterSelectWithUnionQuery(current_inner_query, context, column_names).executeWithMultipleStreams();
res = InterpreterSelectWithUnionQuery(current_inner_query, context, {}, column_names).executeWithMultipleStreams();
/// It's expected that the columns read from storage are not constant.
/// Because method 'getSampleBlockForColumns' is used to obtain a structure of result in InterpreterSelectQuery.

View File

@ -56,6 +56,7 @@ const char * auto_config_build[]
"USE_PROTOBUF", "@USE_PROTOBUF@",
"USE_BROTLI", "@USE_BROTLI@",
"USE_SSL", "@USE_SSL@",
"USE_HYPERSCAN", "@USE_HYPERSCAN@",
nullptr, nullptr
};

View File

@ -404,6 +404,8 @@ def main(args):
def find_binary(name):
if os.path.exists(name) and os.access(name, os.X_OK):
return True
paths = os.environ.get("PATH").split(':')
for path in paths:
if os.access(os.path.join(path, name), os.X_OK):
@ -416,7 +418,7 @@ if __name__ == '__main__':
parser=ArgumentParser(description='ClickHouse functional tests')
parser.add_argument('-q', '--queries', help='Path to queries dir')
parser.add_argument('--tmp', help='Path to tmp dir')
parser.add_argument('-b', '--binary', default='clickhouse', help='Main clickhouse binary')
parser.add_argument('-b', '--binary', default='clickhouse', help='Path to clickhouse binary or name of binary in PATH')
parser.add_argument('-c', '--client', help='Client program')
parser.add_argument('--extract_from_config', help='extract-from-config program')
parser.add_argument('--configclient', help='Client config (if you use not default ports)')

View File

@ -9,18 +9,18 @@ ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && cd ../.. && pwd)
DATA_DIR=${DATA_DIR:=`mktemp -d /tmp/clickhouse.test..XXXXX`}
DATA_DIR_PATTERN=${DATA_DIR_PATTERN:=/tmp/clickhouse} # path from config file, will be replaced to temporary
LOG_DIR=${LOG_DIR:=$DATA_DIR/log}
export CLICKHOUSE_BINARY=${CLICKHOUSE_BINARY:="clickhouse"}
( [ -x "$ROOT_DIR/dbms/programs/${CLICKHOUSE_BINARY}-server" ] || [ -x "$ROOT_DIR/dbms/programs/${CLICKHOUSE_BINARY}" ] ) && BUILD_DIR=${BUILD_DIR:=$ROOT_DIR} # Build without separate build dir
export CLICKHOUSE_BINARY_NAME=${CLICKHOUSE_BINARY_NAME:="clickhouse"}
( [ -x "$ROOT_DIR/dbms/programs/${CLICKHOUSE_BINARY_NAME}-server" ] || [ -x "$ROOT_DIR/dbms/programs/${CLICKHOUSE_BINARY_NAME}" ] ) && BUILD_DIR=${BUILD_DIR:=$ROOT_DIR} # Build without separate build dir
[ -d "$ROOT_DIR/build${BUILD_TYPE}" ] && BUILD_DIR=${BUILD_DIR:=$ROOT_DIR/build${BUILD_TYPE}}
BUILD_DIR=${BUILD_DIR:=$ROOT_DIR}
[ -x ${CLICKHOUSE_BINARY}-server" ] && [ -x ${CLICKHOUSE_BINARY}-client" ] && BIN_DIR= # Allow run in /usr/bin
( [ -x "$BUILD_DIR/dbms/programs/${CLICKHOUSE_BINARY}" ] || [ -x "$BUILD_DIR/dbms/programs/${CLICKHOUSE_BINARY}-server" ] ) && BIN_DIR=${BIN_DIR:=$BUILD_DIR/dbms/programs/}
[ -x "$BIN_DIR/${CLICKHOUSE_BINARY}-server" ] && CLICKHOUSE_SERVER=${CLICKHOUSE_SERVER:=$BIN_DIR/${CLICKHOUSE_BINARY}-server}
[ -x "$BIN_DIR/${CLICKHOUSE_BINARY}" ] && CLICKHOUSE_SERVER=${CLICKHOUSE_SERVER:=$BIN_DIR/${CLICKHOUSE_BINARY} server}
[ -x "$BIN_DIR/${CLICKHOUSE_BINARY}-client" ] && CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT:=$BIN_DIR/${CLICKHOUSE_BINARY}-client}
[ -x "$BIN_DIR/${CLICKHOUSE_BINARY}" ] && CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT:=$BIN_DIR/${CLICKHOUSE_BINARY} client}
[ -x "$BIN_DIR/${CLICKHOUSE_BINARY}-extract-from-config" ] && CLICKHOUSE_EXTRACT=${CLICKHOUSE_EXTRACT:=$BIN_DIR/${CLICKHOUSE_BINARY}-extract-from-config}
[ -x "$BIN_DIR/${CLICKHOUSE_BINARY}" ] && CLICKHOUSE_EXTRACT=${CLICKHOUSE_EXTRACT:=$BIN_DIR/${CLICKHOUSE_BINARY} extract-from-config}
[ -x ${CLICKHOUSE_BINARY_NAME}-server" ] && [ -x ${CLICKHOUSE_BINARY_NAME}-client" ] && BIN_DIR= # Allow run in /usr/bin
( [ -x "$BUILD_DIR/dbms/programs/${CLICKHOUSE_BINARY_NAME}" ] || [ -x "$BUILD_DIR/dbms/programs/${CLICKHOUSE_BINARY_NAME}-server" ] ) && BIN_DIR=${BIN_DIR:=$BUILD_DIR/dbms/programs/}
[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-server" ] && CLICKHOUSE_SERVER=${CLICKHOUSE_SERVER:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-server}
[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}" ] && CLICKHOUSE_SERVER=${CLICKHOUSE_SERVER:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME} server}
[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-client" ] && CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-client}
[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}" ] && CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME} client}
[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-extract-from-config" ] && CLICKHOUSE_EXTRACT=${CLICKHOUSE_EXTRACT:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-extract-from-config}
[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}" ] && CLICKHOUSE_EXTRACT=${CLICKHOUSE_EXTRACT:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME} extract-from-config}
[ -f "$CUR_DIR/server-test.xml" ] && CONFIG_DIR=${CONFIG_DIR=$CUR_DIR}/
CONFIG_CLIENT_DIR=${CONFIG_CLIENT_DIR=$CONFIG_DIR}
@ -131,7 +131,7 @@ else
TEST_DICT=${TEST_DICT=1}
CLICKHOUSE_CLIENT_QUERY="${CLICKHOUSE_CLIENT} --config ${CLICKHOUSE_CONFIG_CLIENT} --port $CLICKHOUSE_PORT_TCP -m -n -q"
$CLICKHOUSE_CLIENT_QUERY 'SELECT * from system.build_options; SELECT * FROM system.clusters;'
CLICKHOUSE_TEST="env ${TEST_DIR}clickhouse-test --binary ${BIN_DIR}${CLICKHOUSE_BINARY} --configclient $CLICKHOUSE_CONFIG_CLIENT --configserver $CLICKHOUSE_CONFIG --tmp $DATA_DIR/tmp --queries $QUERIES_DIR $TEST_OPT0 $TEST_OPT"
CLICKHOUSE_TEST="env ${TEST_DIR}clickhouse-test --binary ${BIN_DIR}${CLICKHOUSE_BINARY_NAME} --configclient $CLICKHOUSE_CONFIG_CLIENT --configserver $CLICKHOUSE_CONFIG --tmp $DATA_DIR/tmp --queries $QUERIES_DIR $TEST_OPT0 $TEST_OPT"
CLICKHOUSE_PERFORMANCE_TEST="${BIN_DIR}clickhouse-performance-test --port $CLICKHOUSE_PORT_TCP --recursive $CUR_DIR/performance --skip-tags=long"
if [ "${TEST_RUN_STRESS}" ]; then
# Running test in parallel will fail some results (tests can create/fill/drop same tables)

View File

@ -1,2 +1,7 @@
1
0
0
0 0
0
0 0
0 0

View File

@ -35,49 +35,49 @@ GLOBAL INNER JOIN
) USING dummy;
-- SET asterisk_left_columns_only = 0;
--
-- SELECT * FROM remote('127.0.0.2', system.one)
-- GLOBAL INNER JOIN
-- (
-- SELECT *, dummy
-- FROM ( SELECT dummy FROM remote('127.0.0.2', system.one) ) t1
-- GLOBAL INNER JOIN ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t2
-- USING dummy
-- ) USING dummy;
--
-- SELECT * FROM remote('127.0.0.2', system.one)
-- GLOBAL INNER JOIN
-- (
-- SELECT *, t1.*, t2.*
-- FROM ( SELECT toUInt8(1) AS dummy ) t1
-- INNER JOIN ( SELECT toUInt8(1) AS dummy ) t2
-- USING dummy
-- ) USING dummy;
--
-- SELECT * FROM remote('127.0.0.2', system.one)
-- GLOBAL INNER JOIN
-- (
-- SELECT *, dummy
-- FROM ( SELECT toUInt8(1) AS dummy ) t1
-- INNER JOIN ( SELECT toUInt8(1) AS dummy ) t2
-- USING dummy
-- ) USING dummy;
--
-- SELECT * FROM remote('127.0.0.2', system.one)
-- GLOBAL INNER JOIN
-- (
-- SELECT *
-- FROM ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t1
-- GLOBAL INNER JOIN ( SELECT toUInt8(1) AS dummy ) t2
-- USING dummy
-- ) USING dummy;
--
-- SELECT * FROM remote('127.0.0.2', system.one)
-- GLOBAL INNER JOIN
-- (
-- SELECT *
-- FROM ( SELECT toUInt8(1) AS dummy ) t1
-- GLOBAL INNER JOIN ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t2
-- USING dummy
-- ) USING dummy;
SET asterisk_left_columns_only = 0;
SELECT * FROM remote('127.0.0.2', system.one)
GLOBAL INNER JOIN
(
SELECT *, dummy
FROM ( SELECT dummy FROM remote('127.0.0.2', system.one) ) t1
GLOBAL INNER JOIN ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t2
USING dummy
) USING dummy;
SELECT * FROM remote('127.0.0.2', system.one)
GLOBAL INNER JOIN
(
SELECT *, t1.*, t2.*
FROM ( SELECT toUInt8(0) AS dummy ) t1
INNER JOIN ( SELECT toUInt8(0) AS dummy ) t2
USING dummy
) USING dummy;
SELECT * FROM remote('127.0.0.2', system.one)
GLOBAL INNER JOIN
(
SELECT *, dummy
FROM ( SELECT toUInt8(0) AS dummy ) t1
INNER JOIN ( SELECT toUInt8(0) AS dummy ) t2
USING dummy
) USING dummy;
SELECT * FROM remote('127.0.0.2', system.one)
GLOBAL INNER JOIN
(
SELECT *, dummy as other
FROM ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t1
GLOBAL INNER JOIN ( SELECT toUInt8(0) AS dummy ) t2
USING dummy
) USING dummy;
SELECT * FROM remote('127.0.0.2', system.one)
GLOBAL INNER JOIN
(
SELECT *, dummy, dummy as other
FROM ( SELECT toUInt8(0) AS dummy ) t1
GLOBAL INNER JOIN ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t2
USING dummy
) USING dummy;

View File

@ -0,0 +1 @@
1

View File

@ -0,0 +1,7 @@
DROP DATABASE IF EXISTS test_buffer;
CREATE DATABASE test_buffer;
CREATE TABLE test_buffer.mt (uid UInt64, ts DateTime, val Float64) ENGINE = MergeTree PARTITION BY toDate(ts) ORDER BY (uid, ts);
CREATE TABLE test_buffer.buf as test_buffer.mt ENGINE = Buffer(test_buffer, mt, 2, 10, 60, 10000, 100000, 1000000, 10000000);
INSERT INTO test_buffer.buf VALUES (1, '2019-03-01 10:00:00', 0.5), (2, '2019-03-02 10:00:00', 0.15), (1, '2019-03-03 10:00:00', 0.25);
SELECT count() from test_buffer.buf prewhere ts > toDateTime('2019-03-01 12:00:00') and ts < toDateTime('2019-03-02 12:00:00');
DROP DATABASE test_buffer;

View File

@ -0,0 +1,7 @@
DROP TABLE IF EXISTS test.union1;
DROP TABLE IF EXISTS test.union2;
CREATE TABLE test.union1 ( date Date, a Int32, b Int32, c Int32, d Int32) ENGINE = MergeTree(date, (a, date), 8192);
CREATE TABLE test.union2 ( date Date, a Int32, b Int32, c Int32, d Int32) ENGINE = Distributed(test_shard_localhost, 'test', 'union1');
ALTER TABLE test.union2 MODIFY ORDER BY a; -- { serverError 48 }
DROP TABLE test.union1;
DROP TABLE test.union2;

View File

@ -1,24 +0,0 @@
drop table if exists test.persons;
drop table if exists test.children;
create table test.persons (
id String,
name String
) engine MergeTree order by id;
create table test.children (
id String,
childName String
) engine MergeTree order by id;
insert into test.persons (id, name) values ('1', 'John'), ('2', 'Jack'), ('3', 'Daniel'), ('4', 'James'), ('5', 'Amanda');
insert into test.children (id, childName) values ('1', 'Robert'), ('1', 'Susan'), ('3', 'Sarah'), ('4', 'David'), ('4', 'Joseph'), ('5', 'Robert');
select * from test.persons all inner join test.children using id;
select * from test.persons all inner join (select * from test.children) as j using id;
select * from (select * from test.persons) as s all inner join (select * from test.children) as j using id;

View File

@ -0,0 +1 @@
SELECT a FROM (SELECT 1 AS a, (SELECT count() FROM system.numbers) AS b);

View File

@ -1,11 +0,0 @@
package config;
$default_host = "metrika";
$cfg{'metrika'} = {
fqdn => "",
method => "scpb",
incoming => "/repo/metrika/mini-dinstall/incoming/",
dinstall_runs => 0,
login => "@AUTHOR@"
};

View File

@ -128,16 +128,29 @@ echo 'DROP TABLE t' | curl 'http://localhost:8123/' --data-binary @-
For successful requests that don't return a data table, an empty response body is returned.
You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you will need to use the special clickhouse-compressor program to work with it (it is installed with the clickhouse-client package).
You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you will need to use the special `clickhouse-compressor` program to work with it (it is installed with the `clickhouse-client` package). To increase the efficiency of the data insertion, you may disable the server-side checksum verification with the [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress) setting.
If you specified 'compress=1' in the URL, the server will compress the data it sends you.
If you specified 'decompress=1' in the URL, the server will decompress the same data that you pass in the POST method.
If you specified `compress = 1` in the URL, the server compresses the data it sends you.
If you specified `decompress = 1` in the URL, the server decompresses the same data that you pass in the `POST` method.
It is also possible to use the standard gzip-based HTTP compression. To send a POST request compressed using gzip, append the request header `Content-Encoding: gzip`.
In order for ClickHouse to compress the response using gzip, you must append `Accept-Encoding: gzip` to the request headers, and enable the ClickHouse setting `enable_http_compression`.
It is also possible to use the standard `gzip`-based [HTTP compression](https://en.wikipedia.org/wiki/HTTP_compression). To send a `POST` request compressed using `gzip`, append the request header `Content-Encoding: gzip`.
In order for ClickHouse to compress the response using `gzip`, you must append `Accept-Encoding: gzip` to the request headers, and enable the ClickHouse [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) setting. You can configure the compression level of the data with the [http_zlib_compression_level](#settings-http_zlib_compression_level) setting.
You can use this to reduce network traffic when transmitting a large amount of data, or for creating dumps that are immediately compressed.
Examples of sending the data with compression:
```bash
#Sending the data to the server:
curl -vsS "http://localhost:8123/?enable_http_compression=1" -d 'SELECT number FROM system.numbers LIMIT 10' -H 'Accept-Encoding: gzip'
#Sending the data to the client:
echo "SELECT 1" | gzip -c | curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/'
```
!!! note "Note"
Some HTTP clients can decompress data (`gzip` and `deflate`) from the server by default and you may get the decompressed data even if you use the compression settings correctly.
You can use the 'database' URL parameter to specify the default database.
```bash

View File

@ -79,6 +79,41 @@ Enable or disable fsync when writing .sql files. Enabled by default.
It makes sense to disable it if the server has millions of tiny table chunks that are constantly being created and destroyed.
## enable_http_compression {#settings-enable_http_compression}
Enables/disables compression of the data in the response to an HTTP request.
For more information, read the [HTTP interface description](../../interfaces/http.md).
Possible values:
- 0 — The functionality is disabled.
- 1 — The functionality is enabled.
Default value: 0.
## http_zlib_compression_level {#settings-http_zlib_compression_level}
Sets the level of the compression of the data in the response to an HTTP request if [enable_http_compression = 1](#settings-enable_http_compression).
Possible values: numbers from 1 to 9.
Default value: 3.
## http_native_compression_disable_checksumming_on_decompress {#settings-http_native_compression_disable_checksumming_on_decompress}
Enables/disables the verification of the checksum when uncompressing the HTTP POST data from the client. Used only for ClickHouse native format of compression (neither `gzip` nor `deflate`).
For more information, read the [HTTP interface description](../../interfaces/http.md).
Possible values:
- 0 — The functionality is disabled.
- 1 — The functionality is enabled.
Default value: 0.
## input_format_allow_errors_num
Sets the maximum number of acceptable errors when reading from text formats (CSV, TSV, etc.).

View File

@ -191,9 +191,7 @@ added dimensions.
In this case it makes sense to leave only a few columns in the primary key that will provide efficient
range scans and add the remaining dimension columns to the sorting key tuple.
[ALTER of the sorting key](../../query_language/alter.md) is a
lightweight operation because when a new column is simultaneously added to the table and to the sorting key
data parts need not be changed (they remain sorted by the new sorting key expression).
[ALTER of the sorting key](../../query_language/alter.md) is a lightweight operation because when a new column is simultaneously added to the table and to the sorting key, existing data parts don't need to be changed. Since the old sorting key is a prefix of the new sorting key and there is no data in the just added column, the data at the moment of table modification is sorted by both the old and the new sorting key.
### Use of Indexes and Partitions in Queries

View File

@ -119,11 +119,31 @@ SELECT arraySort((x, y) -> y, ['hello', 'world'], [2, 1]);
└────────────────────┘
```
Note that NULLs and NaNs go last (NaNs go before NULLs). For example:
``` sql
SELECT arraySort([1, nan, 2, NULL, 3, nan, 4, NULL])
```
```
┌─arraySort([1, nan, 2, NULL, 3, nan, 4, NULL])─┐
│ [1,2,3,4,nan,nan,NULL,NULL] │
└───────────────────────────────────────────────┘
```
### arrayReverseSort(\[func,\] arr1, ...)
Returns an array as result of sorting the elements of `arr1` in descending order. If the `func` function is specified, sorting order is determined by the result of the function `func` applied to the elements of array (arrays)
Note that NULLs and NaNs go last (NaNs go before NULLs). For example:
``` sql
SELECT arrayReverseSort([1, nan, 2, NULL, 3, nan, 4, NULL])
```
```
┌─arrayReverseSort([1, nan, 2, NULL, 3, nan, 4, NULL])─┐
│ [4,3,2,1,nan,nan,NULL,NULL] │
└──────────────────────────────────────────────────────┘
```

View File

@ -51,6 +51,8 @@ Groups of operators are listed in order of priority (the higher it is in the lis
`a BETWEEN b AND c` The same as `a >= b AND a <= c.`
`a NOT BETWEEN b AND c` The same as `a < b OR a > c.`
## Operators for Working With Data Sets
*See the section [IN operators](select.md#select-in-operators).*

View File

@ -761,11 +761,12 @@ DISTINCT is not supported if SELECT has at least one array column.
### LIMIT Clause
`LIMIT m` allows you to select the first `m` rows from the result.
`LIMIT n`, m allows you to select the first `m` rows from the result after skipping the first `n` rows.
`LIMIT n, m` allows you to select the first `m` rows from the result after skipping the first `n` rows. The `LIMIT m OFFSET n` syntax is also supported.
`n` and `m` must be non-negative integers.
If there isn't an ORDER BY clause that explicitly sorts results, the result may be arbitrary and nondeterministic.
If there isn't an `ORDER BY` clause that explicitly sorts results, the result may be arbitrary and nondeterministic.
### UNION ALL Clause

View File

@ -189,7 +189,7 @@ ClickHouse не требует уникального первичного кл
В этом сценарии имеет смысл оставить в первичном ключе всего несколько столбцов, которые обеспечат эффективную
фильтрацию по индексу, а остальные столбцы-измерения добавить в выражение ключа сортировки.
[ALTER ключа сортировки](../../query_language/alter.md) — легкая операция, так как при одновременном добавлении нового столбца в таблицу и ключ сортировки не нужно изменять
[ALTER ключа сортировки](../../query_language/alter.md) — легкая операция, так как при одновременном добавлении нового столбца в таблицу и в ключ сортировки, не нужно изменять
данные кусков (они остаются упорядоченными и по новому выражению ключа).
### Использование индексов и партиций в запросах

View File

@ -61,6 +61,7 @@ SELECT
Вернуть первый элемент массива arr1, для которого функция func возвращает не 0.
### arrayFirstIndex(func, arr1, ...)
Вернуть индекс первого элемента массива arr1, для которого функция func возвращает не 0.
### arrayCumSum(\[func,\] arr1, ...)
@ -98,8 +99,31 @@ SELECT arraySort((x, y) -> y, ['hello', 'world'], [2, 1]);
└────────────────────┘
```
`NULL` и `NaN` будут последними в массиве (при этом `NaN` будет перед `NULL`). Например:
``` sql
SELECT arraySort([1, nan, 2, NULL, 3, nan, 4, NULL])
```
```
┌─arraySort([1, nan, 2, NULL, 3, nan, 4, NULL])─┐
│ [1,2,3,4,nan,nan,NULL,NULL] │
└───────────────────────────────────────────────┘
```
### arrayReverseSort(\[func,\] arr1, ...)
Возвращает отсортированный в нисходящем порядке массив `arr1`. Если задана функция `func`, то порядок сортировки определяется результатом применения функции `func` на элементы массива (массивов).
`NULL` и `NaN` будут последними в массиве (при этом `NaN` будет перед `NULL`). Например:
``` sql
SELECT arrayReverseSort([1, nan, 2, NULL, 3, nan, 4, NULL])
```
```
┌─arrayReverseSort([1, nan, 2, NULL, 3, nan, 4, NULL])─┐
│ [4,3,2,1,nan,nan,NULL,NULL] │
└──────────────────────────────────────────────────────┘
```
[Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/functions/higher_order_functions/) <!--hide-->

View File

@ -51,6 +51,8 @@
`a BETWEEN b AND c` - равнозначно `a >= b AND a <= c`
`a NOT BETWEEN b AND c` - равнозначно `a < b OR a > c`
## Операторы для работы с множествами
*Смотрите раздел [Операторы IN](select.md#select-in-operators).*

View File

@ -714,12 +714,13 @@ WHERE и HAVING отличаются тем, что WHERE выполняется
### Секция LIMIT
LIMIT m позволяет выбрать из результата первые m строк.
LIMIT n, m позволяет выбрать из результата первые m строк после пропуска первых n строк.
`LIMIT m` позволяет выбрать из результата первые `m` строк.
n и m должны быть неотрицательными целыми числами.
`LIMIT n, m` позволяет выбрать из результата первые `m` строк после пропуска первых `n` строк. Синтаксис `LIMIT m OFFSET n` также поддерживается.
При отсутствии секции ORDER BY, однозначно сортирующей результат, результат может быть произвольным и может являться недетерминированным.
`n` и `m` должны быть неотрицательными целыми числами.
При отсутствии секции `ORDER BY`, однозначно сортирующей результат, результат может быть произвольным и может являться недетерминированным.
### Секция UNION ALL

View File

@ -2,7 +2,7 @@
ClickHouse documentation is built using [build.py](build.py) script that uses [mkdocs](https://www.mkdocs.org) library and it's dependencies to separately build all version of documentations (all languages in either single and multi page mode) as static HTMLs. The results are then put in correct directory structure. It can also generate PDF version.
Finally [the infrustructure](../website) that builds ClickHouse [official website](https://clickhouse.yandex) just puts that directory structure into the same Docker container together with rest of website and deploys it to Yandex private cloud.
[release.sh](release.sh) also pulls static files needed for [official ClickHouse website](https://clickhouse.yandex) from [../../website](../../website) folder, packs them alongside docs into Docker container and tries to deploy it (possible only from Yandex private network).
## How to check if the documentation will look fine?

View File

@ -21,6 +21,7 @@ from mkdocs import exceptions
from mkdocs.commands import build as mkdocs_build
from concatenate import concatenate
from website import build_website, minify_website
import mdx_clickhouse
import test
@ -96,7 +97,7 @@ def build_for_lang(lang, args):
site_name=site_names.get(lang, site_names['en']),
site_url='https://clickhouse.yandex/docs/%s/' % lang,
docs_dir=os.path.join(args.docs_dir, lang),
site_dir=os.path.join(args.output_dir, lang),
site_dir=os.path.join(args.docs_output_dir, lang),
strict=True,
theme=theme_cfg,
copyright='©20162019 Yandex LLC',
@ -168,7 +169,7 @@ def build_single_page_version(lang, args, cfg):
mkdocs_build.build(cfg)
single_page_output_path = os.path.join(args.docs_dir, args.output_dir, lang, 'single')
single_page_output_path = os.path.join(args.docs_dir, args.docs_output_dir, lang, 'single')
if os.path.exists(single_page_output_path):
shutil.rmtree(single_page_output_path)
@ -212,29 +213,40 @@ def build_redirects(args):
to_path = '/docs/$1/' + to_path.replace('.md', '/')
rewrites.append(' '.join(['rewrite', from_path, to_path, 'permanent;']))
with open(os.path.join(args.output_dir, 'redirects.conf'), 'w') as f:
with open(os.path.join(args.docs_output_dir, 'redirects.conf'), 'w') as f:
f.write('\n'.join(rewrites))
def build(args):
if os.path.exists(args.output_dir):
shutil.rmtree(args.output_dir)
if not args.skip_website:
build_website(args)
for lang in args.lang.split(','):
build_for_lang(lang, args)
build_redirects(args)
if not args.skip_website:
minify_website(args)
if __name__ == '__main__':
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('--lang', default='en,ru,zh,fa')
arg_parser.add_argument('--docs-dir', default='.')
arg_parser.add_argument('--theme-dir', default='mkdocs-material-theme')
arg_parser.add_argument('--website-dir', default=os.path.join('..', 'website'))
arg_parser.add_argument('--output-dir', default='build')
arg_parser.add_argument('--skip-single-page', action='store_true')
arg_parser.add_argument('--skip-pdf', action='store_true')
arg_parser.add_argument('--skip-website', action='store_true')
arg_parser.add_argument('--save-raw-single-page', type=str)
arg_parser.add_argument('--verbose', action='store_true')
args = arg_parser.parse_args()
args.docs_output_dir = os.path.join(args.output_dir, 'docs')
os.chdir(os.path.join(os.path.dirname(__file__), '..'))
logging.basicConfig(

View File

@ -1,7 +1,7 @@
#!/usr/bin/env bash
set -ex
BASE_DIR=$(dirname $(readlink -f $0))
cd "${BASE_DIR}"
BUILD_DIR="${BASE_DIR}/../build"
IMAGE="clickhouse/website"
if [[ -z "$1" ]]
then
@ -12,12 +12,12 @@ fi
FULL_NAME="${IMAGE}:${TAG}"
REMOTE_NAME="registry.yandex.net/${FULL_NAME}"
DOCKER_HASH="$2"
GULP="$BASE_DIR/node_modules/gulp/bin/gulp.js"
if [[ -z "$1" ]]
then
$GULP clean
$GULP build
docker build -t "${FULL_NAME}" "${BASE_DIR}"
source "${BASE_DIR}/venv/bin/activate"
python "${BASE_DIR}/build.py"
cd "${BUILD_DIR}"
docker build -t "${FULL_NAME}" "${BUILD_DIR}"
docker tag "${FULL_NAME}" "${REMOTE_NAME}"
DOCKER_HASH=$(docker push "${REMOTE_NAME}" | tail -1 | awk '{print $3;}')
docker rmi "${FULL_NAME}"

View File

@ -6,11 +6,14 @@ certifi==2017.11.5
chardet==3.0.4
click==6.7
CommonMark==0.5.4
cssmin==0.2.0
docutils==0.14
futures==3.2.0
htmlmin==0.1.12
idna==2.6
imagesize==0.7.1
Jinja2==2.10
jsmin==2.2.2
livereload==2.5.1
Markdown==2.6.11
MarkupSafe==1.0
@ -18,7 +21,7 @@ mkdocs==1.0.4
Pygments==2.2.0
python-slugify==1.2.6
pytz==2017.3
PyYAML==4.2b1
PyYAML==3.12
recommonmark==0.4.0
requests==2.21.0
singledispatch==3.4.0.3

45
docs/tools/website.py Normal file
View File

@ -0,0 +1,45 @@
import logging
import os
import shutil
import cssmin
import htmlmin
import jsmin
def build_website(args):
logging.info('Building website')
shutil.copytree(
args.website_dir,
args.output_dir,
ignore=shutil.ignore_patterns(
'*.md',
'*.sh',
'build',
'docs',
'public',
'node_modules'
)
)
def minify_website(args):
for root, _, filenames in os.walk(args.output_dir):
for filename in filenames:
path = os.path.join(root, filename)
if not (
filename.endswith('.html') or
filename.endswith('.css') or
filename.endswith('.js')
):
continue
logging.info('Minifying %s', path)
with open(path, 'r') as f:
content = f.read().decode('utf-8')
if filename.endswith('.html'):
content = htmlmin.minify(content, remove_empty_space=False)
elif filename.endswith('.css'):
content = cssmin.cssmin(content)
elif filename.endswith('.js'):
content = jsmin.jsmin(content)
with open(path, 'w') as f:
f.write(content.encode('utf-8'))

View File

@ -22,5 +22,5 @@ env TEST_RUN=1 \
`# Use all possible contrib libs from system` \
`# psmisc - killall` \
`# gdb - symbol test in pbuilder` \
EXTRAPACKAGES="psmisc libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev libboost-regex-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev unixodbc-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev libcapnp-dev libjemalloc-dev libssl-dev libunwind-dev libgsasl7-dev libxml2-dev libbrotli-dev $EXTRAPACKAGES" \
EXTRAPACKAGES="psmisc libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev libboost-regex-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev unixodbc-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev libcapnp-dev libjemalloc-dev libssl-dev libunwind-dev libgsasl7-dev libxml2-dev libbrotli-dev libhyperscan-dev $EXTRAPACKAGES" \
pdebuild --configfile $ROOT_DIR/debian/.pbuilderrc $PDEBUILD_OPT

View File

@ -1,4 +1,4 @@
FROM nginx:mainline
COPY public /usr/share/nginx/html
COPY . /usr/share/nginx/html/public
COPY nginx/nginx.conf /etc/nginx/nginx.conf
COPY nginx/default.conf /etc/nginx/conf.d/default.conf

View File

@ -1,15 +1,2 @@
ClickHouse website quickstart:
ClickHouse website is built alongside it's documentation via [docs/tools](https://github.com/yandex/ClickHouse/tree/master/docs/tools), see [README.md there](https://github.com/yandex/ClickHouse/tree/master/docs/tools/README.md).
On Linux, do the following:
```
sudo apt-get install nodejs
sudo ln -s /usr/bin/nodejs /usr/bin/node
sudo npm install gulp-cli -g
sudo npm install gulp -D
```
1. Make sure you have `npm`, `docker` and `python` installed and available in your `$PATH`.
2. Run `setup\_gulp.sh` once to install build prerequisites via npm.
3. Use `gulp build` to minify website to "public" subfolder or just `gulp` to run local webserver with livereload serving it (note: livereload browser extension is required to make it actually reload pages on edits automatically).
4. There's Dockerfile that can be used to build and run ClickHouse website inside docker.
5. Deployment to https://clickhouse.yandex/ is managed by `release.sh`, but it is only usable from inside Yandex private network.

View File

@ -1,154 +0,0 @@
var gulp = require('gulp');
var concat = require('gulp-concat');
var uglify = require('gulp-uglify');
var cleanCss = require('gulp-clean-css');
var imagemin = require('gulp-imagemin');
var sourcemaps = require('gulp-sourcemaps');
var htmlmin = require('gulp-htmlmin');
var minifyInline = require('gulp-minify-inline');
var del = require('del');
var connect = require('gulp-connect');
var run = require('gulp-run');
var outputDir = 'public';
var docsDir = '../docs';
var paths = {
htmls: [
'**/*.html',
'!deprecated/reference_ru.html',
'!deprecated/reference_en.html',
'!node_modules/**/*.html',
'!presentations/**/*.html',
'!public/**/*.html'],
reference: ['deprecated/reference_ru.html', 'deprecated/reference_en.html'],
docs: [docsDir + '/build/**/*'],
docstxt: ['docs/**/*.txt', 'docs/redirects.conf'],
docsjson: ['docs/**/*.json'],
docsxml: ['docs/**/*.xml'],
docspdf: ['docs/**/*.pdf'],
docssitemap: ['sitemap.xml', 'sitemap_static.xml'],
scripts: [
'**/*.js',
'!gulpfile.js',
'!node_modules/**/*.js',
'!presentations/**/*.js',
'!public/**/*.js'],
styles: [
'**/*.css',
'!node_modules/**/*.css',
'!presentations/**/*.css',
'!public/**/*.css'],
images: [
'**/*.{jpg,jpeg,png,gif,svg,ico}',
'!node_modules/**/*.{jpg,jpeg,png,gif,svg,ico}',
'!presentations/**/*.{jpg,jpeg,png,gif,svg,ico}',
'!public/**/*.{jpg,jpeg,png,gif,svg,ico}'],
robotstxt: ['robots.txt'],
presentations: ['presentations/**/*']
};
gulp.task('clean', function () {
return del([outputDir + '/**']);
});
gulp.task('reference', [], function () {
return gulp.src(paths.reference)
.pipe(minifyInline())
.pipe(gulp.dest(outputDir + '/deprecated'))
});
gulp.task('docs', [], function () {
run('cd ' + docsDir + '/tools; ./build.py');
return gulp.src(paths.docs)
.pipe(gulp.dest(outputDir + '/../docs'))
});
gulp.task('docstxt', ['docs'], function () {
return gulp.src(paths.docstxt)
.pipe(gulp.dest(outputDir + '/docs'))
});
gulp.task('docsjson', ['docs'], function () {
return gulp.src(paths.docsjson)
.pipe(gulp.dest(outputDir + '/docs'))
});
gulp.task('docsxml', ['docs'], function () {
return gulp.src(paths.docsxml)
.pipe(gulp.dest(outputDir + '/docs'))
});
gulp.task('docspdf', ['docs'], function () {
return gulp.src(paths.docspdf)
.pipe(gulp.dest(outputDir + '/docs'))
});
gulp.task('docssitemap', [], function () {
return gulp.src(paths.docssitemap)
.pipe(gulp.dest(outputDir + '/docs'))
});
gulp.task('presentations', [], function () {
return gulp.src(paths.presentations)
.pipe(gulp.dest(outputDir + '/presentations'))
});
gulp.task('robotstxt', [], function () {
return gulp.src(paths.robotstxt)
.pipe(gulp.dest(outputDir))
});
gulp.task('htmls', ['docs', 'docstxt', 'docsjson', 'docsxml', 'docspdf', 'docssitemap'], function () {
return gulp.src(paths.htmls)
.pipe(htmlmin({collapseWhitespace: true}))
.pipe(minifyInline())
.pipe(gulp.dest(outputDir))
});
gulp.task('sourcemaps', ['docs'], function () {
return gulp.src(paths.scripts)
.pipe(sourcemaps.init())
.pipe(uglify())
.pipe(sourcemaps.write())
.pipe(gulp.dest(outputDir))
});
gulp.task('scripts', ['docs'], function () {
return gulp.src(paths.scripts)
.pipe(uglify())
.pipe(gulp.dest(outputDir))
});
gulp.task('styles', ['docs'], function () {
return gulp.src(paths.styles)
.pipe(cleanCss())
.pipe(gulp.dest(outputDir))
});
gulp.task('images', ['docs'], function () {
return gulp.src(paths.images)
.pipe(imagemin({optimizationLevel: 9}))
.pipe(gulp.dest(outputDir))
});
gulp.task('watch', function () {
gulp.watch(paths.htmls, ['htmls']);
gulp.watch(paths.docs, ['docs']);
gulp.watch(paths.reference, ['reference']);
gulp.watch(paths.scripts, ['scripts']);
gulp.watch(paths.images, ['images']);
});
gulp.task('connect', function() {
connect.server({
root: outputDir,
port: 8080,
keepalive: true,
livereload: true
})
});
gulp.task('build', ['htmls', 'robotstxt', 'reference', 'scripts', 'styles', 'images', 'presentations']);
gulp.task('default', ['build', 'connect']);

View File

@ -1,3 +0,0 @@
#!/usr/bin/env bash
set -ex
grep require gulpfile.js | awk -F\' '{print $2;}' | xargs npm install