mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-26 19:50:51 +00:00
Header in every stream: development [#CLICKHOUSE-2]
This commit is contained in:
parent
30b657a0da
commit
69a9723ce2
@ -115,7 +115,6 @@ std::ostream & operator<<(std::ostream & stream, const Connection::Packet & what
|
||||
std::ostream & operator<<(std::ostream & stream, const SubqueryForSet & what)
|
||||
{
|
||||
stream << "SubqueryForSet(source = " << what.source
|
||||
<< ", source_sample = " << what.source_sample
|
||||
// TODO: << ", set = " << what.set << ", join = " << what.join
|
||||
<< ", table = " << what.table
|
||||
<< ")";
|
||||
|
@ -15,14 +15,13 @@ class LazyBlockInputStream : public IProfilingBlockInputStream
|
||||
public:
|
||||
using Generator = std::function<BlockInputStreamPtr()>;
|
||||
|
||||
LazyBlockInputStream(Generator generator_)
|
||||
: generator(std::move(generator_))
|
||||
LazyBlockInputStream(const Block & header_, Generator generator_)
|
||||
: header(header_), generator(std::move(generator_))
|
||||
{
|
||||
}
|
||||
|
||||
LazyBlockInputStream(const char * name_, Generator generator_)
|
||||
: name(name_)
|
||||
, generator(std::move(generator_))
|
||||
LazyBlockInputStream(const char * name_, const Block & header_, Generator generator_)
|
||||
: name(name_), header(header_), generator(std::move(generator_))
|
||||
{
|
||||
}
|
||||
|
||||
@ -36,88 +35,70 @@ public:
|
||||
|
||||
Block getHeader() override
|
||||
{
|
||||
std::cerr << "LazyBlockInputStream::getHeader()\n";
|
||||
|
||||
init();
|
||||
if (!input)
|
||||
return {};
|
||||
|
||||
return input->getHeader();
|
||||
return header;
|
||||
}
|
||||
|
||||
protected:
|
||||
Block readImpl() override
|
||||
{
|
||||
init();
|
||||
if (!input)
|
||||
return {};
|
||||
{
|
||||
input = generator();
|
||||
|
||||
if (!input)
|
||||
return Block();
|
||||
|
||||
auto * p_input = dynamic_cast<IProfilingBlockInputStream *>(input.get());
|
||||
|
||||
if (p_input)
|
||||
{
|
||||
/// They could have been set before, but were not passed into the `input`.
|
||||
if (progress_callback)
|
||||
p_input->setProgressCallback(progress_callback);
|
||||
if (process_list_elem)
|
||||
p_input->setProcessListElement(process_list_elem);
|
||||
}
|
||||
|
||||
input->readPrefix();
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(cancel_mutex);
|
||||
|
||||
/** TODO Data race here. See IProfilingBlockInputStream::collectAndSendTotalRowsApprox.
|
||||
Assume following pipeline:
|
||||
|
||||
RemoteBlockInputStream
|
||||
AsynchronousBlockInputStream
|
||||
LazyBlockInputStream
|
||||
|
||||
RemoteBlockInputStream calls AsynchronousBlockInputStream::readPrefix
|
||||
and AsynchronousBlockInputStream spawns a thread and returns.
|
||||
|
||||
The separate thread will call LazyBlockInputStream::read
|
||||
LazyBlockInputStream::read will add more children to itself
|
||||
|
||||
In the same moment, in main thread, RemoteBlockInputStream::read is called,
|
||||
then IProfilingBlockInputStream::collectAndSendTotalRowsApprox is called
|
||||
and iterates over set of children.
|
||||
*/
|
||||
children.push_back(input);
|
||||
|
||||
if (isCancelled() && p_input)
|
||||
p_input->cancel();
|
||||
}
|
||||
}
|
||||
|
||||
return input->read();
|
||||
}
|
||||
|
||||
private:
|
||||
const char * name = "Lazy";
|
||||
Block header;
|
||||
Generator generator;
|
||||
|
||||
bool initialized = false;
|
||||
BlockInputStreamPtr input;
|
||||
|
||||
std::mutex cancel_mutex;
|
||||
|
||||
void init()
|
||||
{
|
||||
if (initialized)
|
||||
return;
|
||||
|
||||
std::cerr << "LazyBlockInputStream::init()\n";
|
||||
|
||||
input = generator();
|
||||
initialized = true;
|
||||
|
||||
if (!input)
|
||||
return;
|
||||
|
||||
std::cerr << "!\n";
|
||||
|
||||
auto * p_input = dynamic_cast<IProfilingBlockInputStream *>(input.get());
|
||||
|
||||
if (p_input)
|
||||
{
|
||||
/// They could have been set before, but were not passed into the `input`.
|
||||
if (progress_callback)
|
||||
p_input->setProgressCallback(progress_callback);
|
||||
if (process_list_elem)
|
||||
p_input->setProcessListElement(process_list_elem);
|
||||
}
|
||||
|
||||
input->readPrefix();
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(cancel_mutex);
|
||||
|
||||
/** TODO Data race here. See IProfilingBlockInputStream::collectAndSendTotalRowsApprox.
|
||||
Assume following pipeline:
|
||||
|
||||
RemoteBlockInputStream
|
||||
AsynchronousBlockInputStream
|
||||
LazyBlockInputStream
|
||||
|
||||
RemoteBlockInputStream calls AsynchronousBlockInputStream::readPrefix
|
||||
and AsynchronousBlockInputStream spawns a thread and returns.
|
||||
|
||||
The separate thread will call LazyBlockInputStream::read
|
||||
LazyBlockInputStream::read will add more children to itself
|
||||
|
||||
In the same moment, in main thread, RemoteBlockInputStream::read is called,
|
||||
then IProfilingBlockInputStream::collectAndSendTotalRowsApprox is called
|
||||
and iterates over set of children.
|
||||
*/
|
||||
children.push_back(input);
|
||||
|
||||
if (isCancelled() && p_input)
|
||||
p_input->cancel();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -198,7 +198,7 @@ void SelectStreamFactory::createForShard(
|
||||
}
|
||||
};
|
||||
|
||||
res.emplace_back(std::make_shared<LazyBlockInputStream>("LazyShardWithLocalReplica", lazily_create_stream));
|
||||
res.emplace_back(std::make_shared<LazyBlockInputStream>("LazyShardWithLocalReplica", header, lazily_create_stream));
|
||||
}
|
||||
else
|
||||
emplace_remote_stream();
|
||||
|
@ -883,7 +883,6 @@ void ExpressionAnalyzer::addExternalStorage(ASTPtr & subquery_or_table_name_or_t
|
||||
|
||||
external_tables[external_table_name] = external_storage;
|
||||
subqueries_for_sets[external_table_name].source = interpreter->execute().in;
|
||||
subqueries_for_sets[external_table_name].source_sample = interpreter->getSampleBlock();
|
||||
subqueries_for_sets[external_table_name].table = external_storage;
|
||||
|
||||
/** NOTE If it was written IN tmp_table - the existing temporary (but not external) table,
|
||||
@ -1661,8 +1660,7 @@ void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_
|
||||
{
|
||||
auto interpreter = interpretSubquery(arg, context, subquery_depth, {});
|
||||
subquery_for_set.source = std::make_shared<LazyBlockInputStream>(
|
||||
[interpreter]() mutable { return interpreter->execute().in; });
|
||||
subquery_for_set.source_sample = interpreter->getSampleBlock();
|
||||
interpreter->getSampleBlock(), [interpreter]() mutable { return interpreter->execute().in; });
|
||||
|
||||
/** Why is LazyBlockInputStream used?
|
||||
*
|
||||
@ -2486,13 +2484,14 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty
|
||||
table = table_to_join.subquery;
|
||||
|
||||
auto interpreter = interpretSubquery(table, context, subquery_depth, required_joined_columns);
|
||||
subquery_for_set.source = std::make_shared<LazyBlockInputStream>([interpreter]() mutable { return interpreter->execute().in; });
|
||||
subquery_for_set.source_sample = interpreter->getSampleBlock();
|
||||
subquery_for_set.source = std::make_shared<LazyBlockInputStream>(
|
||||
interpreter->getSampleBlock(),
|
||||
[interpreter]() mutable { return interpreter->execute().in; });
|
||||
}
|
||||
|
||||
/// TODO You do not need to set this up when JOIN is only needed on remote servers.
|
||||
subquery_for_set.join = join;
|
||||
subquery_for_set.join->setSampleBlock(subquery_for_set.source_sample);
|
||||
subquery_for_set.join->setSampleBlock(subquery_for_set.source->getHeader());
|
||||
}
|
||||
|
||||
addJoinAction(step.actions, false);
|
||||
|
@ -41,7 +41,6 @@ struct SubqueryForSet
|
||||
{
|
||||
/// The source is obtained using the InterpreterSelectQuery subquery.
|
||||
BlockInputStreamPtr source;
|
||||
Block source_sample;
|
||||
|
||||
/// If set, build it from result.
|
||||
SetPtr set;
|
||||
|
@ -137,6 +137,7 @@ BlockInputStreams StorageMerge::read(
|
||||
const unsigned num_streams)
|
||||
{
|
||||
BlockInputStreams res;
|
||||
Block header = getSampleBlockForColumns(column_names);
|
||||
|
||||
Names virt_column_names, real_column_names;
|
||||
for (const auto & it : column_names)
|
||||
@ -223,13 +224,13 @@ BlockInputStreams StorageMerge::read(
|
||||
for (auto & stream : source_streams)
|
||||
{
|
||||
/// will throw if some columns not convertible
|
||||
stream = std::make_shared<CastTypeBlockInputStream>(context, stream, getSampleBlock());
|
||||
stream = std::make_shared<CastTypeBlockInputStream>(context, stream, header);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/// If many streams, initialize it lazily, to avoid long delay before start of query processing.
|
||||
source_streams.emplace_back(std::make_shared<LazyBlockInputStream>([=]
|
||||
source_streams.emplace_back(std::make_shared<LazyBlockInputStream>(header, [=]
|
||||
{
|
||||
QueryProcessingStage::Enum processed_stage_in_source_table = processed_stage;
|
||||
BlockInputStreams streams = table->read(
|
||||
@ -247,11 +248,11 @@ BlockInputStreams StorageMerge::read(
|
||||
throw Exception("Source tables for Merge table are processing data up to different stages",
|
||||
ErrorCodes::INCOMPATIBLE_SOURCE_TABLES);
|
||||
|
||||
auto stream = streams.empty() ? std::make_shared<NullBlockInputStream>(getSampleBlock()) : streams.front();
|
||||
auto stream = streams.empty() ? std::make_shared<NullBlockInputStream>(header) : streams.front();
|
||||
if (!streams.empty())
|
||||
{
|
||||
/// will throw if some columns not convertible
|
||||
stream = std::make_shared<CastTypeBlockInputStream>(context, stream, getSampleBlock());
|
||||
stream = std::make_shared<CastTypeBlockInputStream>(context, stream, header);
|
||||
}
|
||||
return stream;
|
||||
}));
|
||||
|
Loading…
Reference in New Issue
Block a user