mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-26 17:41:59 +00:00
Remove all segmentation engines except TSV
This commit is contained in:
parent
83030b98a2
commit
4ab7ac14bd
@ -291,11 +291,7 @@ void registerOutputFormatProcessorTemplate(FormatFactory &factory);
|
||||
|
||||
/// File Segmentation Engines for parallel reading
|
||||
|
||||
void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory);
|
||||
void registerFileSegmentationEngineTabSeparated(FormatFactory & factory);
|
||||
//void registerFileSegmentationEngineValues(FormatFactory & factory);
|
||||
void registerFileSegmentationEngineCSV(FormatFactory & factory);
|
||||
void registerFileSegmentationEngineTSKV(FormatFactory & factory);
|
||||
|
||||
/// Output only (presentational) formats.
|
||||
|
||||
@ -347,10 +343,7 @@ FormatFactory::FormatFactory()
|
||||
registerInputFormatProcessorTemplate(*this);
|
||||
registerOutputFormatProcessorTemplate(*this);
|
||||
|
||||
registerFileSegmentationEngineJSONEachRow(*this);
|
||||
registerFileSegmentationEngineTabSeparated(*this);
|
||||
registerFileSegmentationEngineCSV(*this);
|
||||
registerFileSegmentationEngineTSKV(*this);
|
||||
|
||||
registerOutputFormatNull(*this);
|
||||
|
||||
|
@ -1053,26 +1053,4 @@ void skipToUnescapedNextLineOrEOF(ReadBuffer & buf)
|
||||
}
|
||||
}
|
||||
|
||||
bool eofWithSavingBufferState(ReadBuffer & buf, DB::Memory<> & memory, size_t & used_size, char * & begin_pos, bool force_saving_buffer_state)
|
||||
{
|
||||
/// If there is some pending data - no need to copy data from buffer to memory.
|
||||
if (force_saving_buffer_state || !buf.hasPendingData())
|
||||
{
|
||||
const auto capacity = memory.size();
|
||||
const auto block_size = static_cast<size_t>(buf.position() - begin_pos);
|
||||
|
||||
/// To avoid calling a function when not needed.
|
||||
if (capacity <= block_size + used_size)
|
||||
{
|
||||
memory.resize(used_size + block_size);
|
||||
}
|
||||
memcpy(memory.data() + used_size, begin_pos, buf.position() - begin_pos);
|
||||
used_size += block_size;
|
||||
bool res = buf.eof();
|
||||
begin_pos = buf.position();
|
||||
return res;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -422,68 +422,5 @@ void registerInputFormatProcessorCSV(FormatFactory & factory)
|
||||
}
|
||||
}
|
||||
|
||||
bool fileSegmentationEngineCSVImpl(ReadBuffer & in, DB::Memory<> & memory, size_t & used_size, size_t min_chunk_size)
|
||||
{
|
||||
if (in.eof())
|
||||
return false;
|
||||
|
||||
skipWhitespacesAndTabs(in);
|
||||
char * begin_pos = in.position();
|
||||
bool quotes = false;
|
||||
bool need_more_data = true;
|
||||
memory.resize(min_chunk_size);
|
||||
while (!eofWithSavingBufferState(in, memory, used_size, begin_pos) && need_more_data)
|
||||
{
|
||||
if (quotes)
|
||||
{
|
||||
in.position() = find_first_symbols<'"'>(in.position(), in.buffer().end());
|
||||
if (in.position() == in.buffer().end())
|
||||
continue;
|
||||
if (*in.position() == '"')
|
||||
{
|
||||
++in.position();
|
||||
if (!eofWithSavingBufferState(in, memory, used_size, begin_pos) && *in.position() == '"')
|
||||
++in.position();
|
||||
else
|
||||
quotes = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
in.position() = find_first_symbols<'"','\r', '\n'>(in.position(), in.buffer().end());
|
||||
if (in.position() == in.buffer().end())
|
||||
continue;
|
||||
if (*in.position() == '"')
|
||||
{
|
||||
quotes = true;
|
||||
++in.position();
|
||||
}
|
||||
else if (*in.position() == '\n')
|
||||
{
|
||||
if (used_size + static_cast<size_t>(in.position() - begin_pos) >= min_chunk_size)
|
||||
need_more_data = false;
|
||||
++in.position();
|
||||
if (!eofWithSavingBufferState(in, memory, used_size, begin_pos) && *in.position() == '\r')
|
||||
++in.position();
|
||||
}
|
||||
else if (*in.position() == '\r')
|
||||
{
|
||||
if (used_size + static_cast<size_t>(in.position() - begin_pos) >= min_chunk_size)
|
||||
need_more_data = false;
|
||||
++in.position();
|
||||
if (!eofWithSavingBufferState(in, memory, used_size, begin_pos) && *in.position() == '\n')
|
||||
++in.position();
|
||||
}
|
||||
}
|
||||
}
|
||||
eofWithSavingBufferState(in, memory, used_size, begin_pos, true);
|
||||
return true;
|
||||
}
|
||||
|
||||
void registerFileSegmentationEngineCSV(FormatFactory & factory)
|
||||
{
|
||||
factory.registerFileSegmentationEngine("CSV", &fileSegmentationEngineCSVImpl);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -270,71 +270,4 @@ void registerInputFormatProcessorJSONEachRow(FormatFactory & factory)
|
||||
});
|
||||
}
|
||||
|
||||
bool fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t & used_size, size_t min_chunk_size)
|
||||
{
|
||||
if (in.eof())
|
||||
return false;
|
||||
|
||||
skipWhitespaceIfAny(in);
|
||||
char * begin_pos = in.position();
|
||||
size_t balance = 0;
|
||||
bool quotes = false;
|
||||
memory.resize(min_chunk_size);
|
||||
while (!eofWithSavingBufferState(in, memory, used_size, begin_pos)
|
||||
&& (balance || used_size + static_cast<size_t>(in.position() - begin_pos) < min_chunk_size))
|
||||
{
|
||||
if (quotes)
|
||||
{
|
||||
in.position() = find_first_symbols<'\\', '"'>(in.position(), in.buffer().end());
|
||||
if (in.position() == in.buffer().end())
|
||||
continue;
|
||||
if (*in.position() == '\\')
|
||||
{
|
||||
++in.position();
|
||||
if (!eofWithSavingBufferState(in, memory, used_size, begin_pos))
|
||||
++in.position();
|
||||
}
|
||||
else if (*in.position() == '"')
|
||||
{
|
||||
++in.position();
|
||||
quotes = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
in.position() = find_first_symbols<'{', '}', '\\', '"'>(in.position(), in.buffer().end());
|
||||
if (in.position() == in.buffer().end())
|
||||
continue;
|
||||
if (*in.position() == '{')
|
||||
{
|
||||
++balance;
|
||||
++in.position();
|
||||
}
|
||||
else if (*in.position() == '}')
|
||||
{
|
||||
--balance;
|
||||
++in.position();
|
||||
}
|
||||
else if (*in.position() == '\\')
|
||||
{
|
||||
++in.position();
|
||||
if (!eofWithSavingBufferState(in, memory, used_size, begin_pos))
|
||||
++in.position();
|
||||
}
|
||||
else if (*in.position() == '"')
|
||||
{
|
||||
quotes = true;
|
||||
++in.position();
|
||||
}
|
||||
}
|
||||
}
|
||||
eofWithSavingBufferState(in, memory, used_size, begin_pos, true);
|
||||
return true;
|
||||
}
|
||||
|
||||
void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory)
|
||||
{
|
||||
factory.registerFileSegmentationEngine("JSONEachRow", &fileSegmentationEngineJSONEachRowImpl);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -210,40 +210,4 @@ void registerInputFormatProcessorTSKV(FormatFactory & factory)
|
||||
});
|
||||
}
|
||||
|
||||
bool fileSegmentationEngineTSKVImpl(ReadBuffer & in, DB::Memory<> & memory, size_t & used_size, size_t min_chunk_size)
|
||||
{
|
||||
if (in.eof())
|
||||
return false;
|
||||
|
||||
char * begin_pos = in.position();
|
||||
bool need_more_data = true;
|
||||
memory.resize(min_chunk_size);
|
||||
while (!eofWithSavingBufferState(in, memory, used_size, begin_pos) && need_more_data)
|
||||
{
|
||||
in.position() = find_first_symbols<'\\','\r', '\n'>(in.position(), in.buffer().end());
|
||||
if (in.position() == in.buffer().end())
|
||||
continue;
|
||||
if (*in.position() == '\\')
|
||||
{
|
||||
++in.position();
|
||||
if (!eofWithSavingBufferState(in, memory, used_size, begin_pos))
|
||||
++in.position();
|
||||
}
|
||||
else if (*in.position() == '\n' || *in.position() == '\r')
|
||||
{
|
||||
if (used_size + static_cast<size_t>(in.position() - begin_pos) >= min_chunk_size)
|
||||
need_more_data = false;
|
||||
++in.position();
|
||||
}
|
||||
}
|
||||
eofWithSavingBufferState(in, memory, used_size, begin_pos, true);
|
||||
return true;
|
||||
}
|
||||
|
||||
void registerFileSegmentationEngineTSKV(FormatFactory & factory)
|
||||
{
|
||||
factory.registerFileSegmentationEngine("TSKV", &fileSegmentationEngineTSKVImpl);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user