Remove all segmentation engines except TSV

This commit is contained in:
Alexander Kuzmenkov 2019-11-18 16:12:28 +03:00
parent 83030b98a2
commit 4ab7ac14bd
5 changed files with 0 additions and 195 deletions

View File

@ -291,11 +291,7 @@ void registerOutputFormatProcessorTemplate(FormatFactory &factory);
/// File Segmentation Engines for parallel reading
void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory);
void registerFileSegmentationEngineTabSeparated(FormatFactory & factory);
//void registerFileSegmentationEngineValues(FormatFactory & factory);
void registerFileSegmentationEngineCSV(FormatFactory & factory);
void registerFileSegmentationEngineTSKV(FormatFactory & factory);
/// Output only (presentational) formats.
@ -347,10 +343,7 @@ FormatFactory::FormatFactory()
registerInputFormatProcessorTemplate(*this);
registerOutputFormatProcessorTemplate(*this);
registerFileSegmentationEngineJSONEachRow(*this);
registerFileSegmentationEngineTabSeparated(*this);
registerFileSegmentationEngineCSV(*this);
registerFileSegmentationEngineTSKV(*this);
registerOutputFormatNull(*this);

View File

@ -1053,26 +1053,4 @@ void skipToUnescapedNextLineOrEOF(ReadBuffer & buf)
}
}
bool eofWithSavingBufferState(ReadBuffer & buf, DB::Memory<> & memory, size_t & used_size, char * & begin_pos, bool force_saving_buffer_state)
{
/// If there is some pending data - no need to copy data from buffer to memory.
if (force_saving_buffer_state || !buf.hasPendingData())
{
const auto capacity = memory.size();
const auto block_size = static_cast<size_t>(buf.position() - begin_pos);
/// To avoid calling a function when not needed.
if (capacity <= block_size + used_size)
{
memory.resize(used_size + block_size);
}
memcpy(memory.data() + used_size, begin_pos, buf.position() - begin_pos);
used_size += block_size;
bool res = buf.eof();
begin_pos = buf.position();
return res;
}
return false;
}
}

View File

@ -422,68 +422,5 @@ void registerInputFormatProcessorCSV(FormatFactory & factory)
}
}
bool fileSegmentationEngineCSVImpl(ReadBuffer & in, DB::Memory<> & memory, size_t & used_size, size_t min_chunk_size)
{
if (in.eof())
return false;
skipWhitespacesAndTabs(in);
char * begin_pos = in.position();
bool quotes = false;
bool need_more_data = true;
memory.resize(min_chunk_size);
while (!eofWithSavingBufferState(in, memory, used_size, begin_pos) && need_more_data)
{
if (quotes)
{
in.position() = find_first_symbols<'"'>(in.position(), in.buffer().end());
if (in.position() == in.buffer().end())
continue;
if (*in.position() == '"')
{
++in.position();
if (!eofWithSavingBufferState(in, memory, used_size, begin_pos) && *in.position() == '"')
++in.position();
else
quotes = false;
}
}
else
{
in.position() = find_first_symbols<'"','\r', '\n'>(in.position(), in.buffer().end());
if (in.position() == in.buffer().end())
continue;
if (*in.position() == '"')
{
quotes = true;
++in.position();
}
else if (*in.position() == '\n')
{
if (used_size + static_cast<size_t>(in.position() - begin_pos) >= min_chunk_size)
need_more_data = false;
++in.position();
if (!eofWithSavingBufferState(in, memory, used_size, begin_pos) && *in.position() == '\r')
++in.position();
}
else if (*in.position() == '\r')
{
if (used_size + static_cast<size_t>(in.position() - begin_pos) >= min_chunk_size)
need_more_data = false;
++in.position();
if (!eofWithSavingBufferState(in, memory, used_size, begin_pos) && *in.position() == '\n')
++in.position();
}
}
}
eofWithSavingBufferState(in, memory, used_size, begin_pos, true);
return true;
}
void registerFileSegmentationEngineCSV(FormatFactory & factory)
{
factory.registerFileSegmentationEngine("CSV", &fileSegmentationEngineCSVImpl);
}
}

View File

@ -270,71 +270,4 @@ void registerInputFormatProcessorJSONEachRow(FormatFactory & factory)
});
}
bool fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t & used_size, size_t min_chunk_size)
{
if (in.eof())
return false;
skipWhitespaceIfAny(in);
char * begin_pos = in.position();
size_t balance = 0;
bool quotes = false;
memory.resize(min_chunk_size);
while (!eofWithSavingBufferState(in, memory, used_size, begin_pos)
&& (balance || used_size + static_cast<size_t>(in.position() - begin_pos) < min_chunk_size))
{
if (quotes)
{
in.position() = find_first_symbols<'\\', '"'>(in.position(), in.buffer().end());
if (in.position() == in.buffer().end())
continue;
if (*in.position() == '\\')
{
++in.position();
if (!eofWithSavingBufferState(in, memory, used_size, begin_pos))
++in.position();
}
else if (*in.position() == '"')
{
++in.position();
quotes = false;
}
}
else
{
in.position() = find_first_symbols<'{', '}', '\\', '"'>(in.position(), in.buffer().end());
if (in.position() == in.buffer().end())
continue;
if (*in.position() == '{')
{
++balance;
++in.position();
}
else if (*in.position() == '}')
{
--balance;
++in.position();
}
else if (*in.position() == '\\')
{
++in.position();
if (!eofWithSavingBufferState(in, memory, used_size, begin_pos))
++in.position();
}
else if (*in.position() == '"')
{
quotes = true;
++in.position();
}
}
}
eofWithSavingBufferState(in, memory, used_size, begin_pos, true);
return true;
}
void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory)
{
factory.registerFileSegmentationEngine("JSONEachRow", &fileSegmentationEngineJSONEachRowImpl);
}
}

View File

@ -210,40 +210,4 @@ void registerInputFormatProcessorTSKV(FormatFactory & factory)
});
}
bool fileSegmentationEngineTSKVImpl(ReadBuffer & in, DB::Memory<> & memory, size_t & used_size, size_t min_chunk_size)
{
if (in.eof())
return false;
char * begin_pos = in.position();
bool need_more_data = true;
memory.resize(min_chunk_size);
while (!eofWithSavingBufferState(in, memory, used_size, begin_pos) && need_more_data)
{
in.position() = find_first_symbols<'\\','\r', '\n'>(in.position(), in.buffer().end());
if (in.position() == in.buffer().end())
continue;
if (*in.position() == '\\')
{
++in.position();
if (!eofWithSavingBufferState(in, memory, used_size, begin_pos))
++in.position();
}
else if (*in.position() == '\n' || *in.position() == '\r')
{
if (used_size + static_cast<size_t>(in.position() - begin_pos) >= min_chunk_size)
need_more_data = false;
++in.position();
}
}
eofWithSavingBufferState(in, memory, used_size, begin_pos, true);
return true;
}
void registerFileSegmentationEngineTSKV(FormatFactory & factory)
{
factory.registerFileSegmentationEngine("TSKV", &fileSegmentationEngineTSKVImpl);
}
}