2013-02-03 18:39:09 +00:00
|
|
|
|
#include <DB/Core/Defines.h>
|
|
|
|
|
|
2010-05-13 16:13:38 +00:00
|
|
|
|
#include <DB/Columns/ColumnString.h>
|
|
|
|
|
#include <DB/Columns/ColumnsNumber.h>
|
2011-08-12 18:27:39 +00:00
|
|
|
|
#include <DB/Columns/ColumnConst.h>
|
2010-05-13 16:13:38 +00:00
|
|
|
|
|
|
|
|
|
#include <DB/DataTypes/DataTypeString.h>
|
|
|
|
|
|
2010-06-04 18:25:25 +00:00
|
|
|
|
#include <DB/IO/ReadHelpers.h>
|
|
|
|
|
#include <DB/IO/WriteHelpers.h>
|
|
|
|
|
#include <DB/IO/VarInt.h>
|
|
|
|
|
|
2016-01-13 21:05:11 +00:00
|
|
|
|
#if defined(__x86_64__)
|
|
|
|
|
#include <emmintrin.h>
|
|
|
|
|
#endif
|
2015-02-15 14:25:43 +00:00
|
|
|
|
|
2010-05-13 16:13:38 +00:00
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
|
2010-06-04 18:25:25 +00:00
|
|
|
|
void DataTypeString::serializeBinary(const Field & field, WriteBuffer & ostr) const
|
2010-05-13 16:13:38 +00:00
|
|
|
|
{
|
2013-01-05 20:03:19 +00:00
|
|
|
|
const String & s = get<const String &>(field);
|
2010-05-13 16:13:38 +00:00
|
|
|
|
writeVarUInt(s.size(), ostr);
|
2010-06-04 18:25:25 +00:00
|
|
|
|
writeString(s, ostr);
|
2010-05-13 16:13:38 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2010-06-04 18:25:25 +00:00
|
|
|
|
void DataTypeString::deserializeBinary(Field & field, ReadBuffer & istr) const
|
2010-05-13 16:13:38 +00:00
|
|
|
|
{
|
|
|
|
|
UInt64 size;
|
|
|
|
|
readVarUInt(size, istr);
|
2011-08-07 02:08:22 +00:00
|
|
|
|
field = String();
|
2013-01-05 20:03:19 +00:00
|
|
|
|
String & s = get<String &>(field);
|
2010-05-13 16:13:38 +00:00
|
|
|
|
s.resize(size);
|
2013-02-18 16:40:36 +00:00
|
|
|
|
istr.readStrict(&s[0], size);
|
2010-05-13 16:13:38 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
|
void DataTypeString::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const
|
|
|
|
|
{
|
|
|
|
|
const StringRef & s = static_cast<const ColumnString &>(column).getDataAt(row_num);
|
|
|
|
|
writeVarUInt(s.size, ostr);
|
|
|
|
|
writeString(s, ostr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void DataTypeString::deserializeBinary(IColumn & column, ReadBuffer & istr) const
|
|
|
|
|
{
|
|
|
|
|
ColumnString & column_string = static_cast<ColumnString &>(column);
|
|
|
|
|
ColumnString::Chars_t & data = column_string.getChars();
|
|
|
|
|
ColumnString::Offsets_t & offsets = column_string.getOffsets();
|
|
|
|
|
|
|
|
|
|
UInt64 size;
|
|
|
|
|
readVarUInt(size, istr);
|
|
|
|
|
|
|
|
|
|
size_t old_chars_size = data.size();
|
|
|
|
|
size_t offset = old_chars_size + size + 1;
|
|
|
|
|
offsets.push_back(offset);
|
|
|
|
|
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
data.resize(offset);
|
|
|
|
|
istr.readStrict(reinterpret_cast<char*>(&data[offset - size - 1]), size);
|
|
|
|
|
data.back() = 0;
|
|
|
|
|
}
|
|
|
|
|
catch (...)
|
|
|
|
|
{
|
|
|
|
|
offsets.pop_back();
|
|
|
|
|
data.resize_assume_reserved(old_chars_size);
|
|
|
|
|
throw;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2012-12-11 20:31:39 +00:00
|
|
|
|
void DataTypeString::serializeBinary(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const
|
2010-05-13 16:13:38 +00:00
|
|
|
|
{
|
2014-06-26 00:58:14 +00:00
|
|
|
|
const ColumnString & column_string = typeid_cast<const ColumnString &>(column);
|
2013-05-05 15:25:25 +00:00
|
|
|
|
const ColumnString::Chars_t & data = column_string.getChars();
|
|
|
|
|
const ColumnString::Offsets_t & offsets = column_string.getOffsets();
|
2010-05-13 16:13:38 +00:00
|
|
|
|
|
2012-12-12 17:16:44 +00:00
|
|
|
|
size_t size = column.size();
|
2013-05-05 15:25:25 +00:00
|
|
|
|
if (!size)
|
2010-05-13 16:13:38 +00:00
|
|
|
|
return;
|
|
|
|
|
|
2012-12-11 20:31:39 +00:00
|
|
|
|
size_t end = limit && offset + limit < size
|
|
|
|
|
? offset + limit
|
|
|
|
|
: size;
|
2012-12-12 17:46:23 +00:00
|
|
|
|
|
|
|
|
|
if (offset == 0)
|
|
|
|
|
{
|
|
|
|
|
UInt64 str_size = offsets[0] - 1;
|
|
|
|
|
writeVarUInt(str_size, ostr);
|
|
|
|
|
ostr.write(reinterpret_cast<const char *>(&data[0]), str_size);
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2012-12-12 17:46:23 +00:00
|
|
|
|
++offset;
|
|
|
|
|
}
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2012-12-11 20:31:39 +00:00
|
|
|
|
for (size_t i = offset; i < end; ++i)
|
2010-05-13 16:13:38 +00:00
|
|
|
|
{
|
2010-06-04 19:06:32 +00:00
|
|
|
|
UInt64 str_size = offsets[i] - offsets[i - 1] - 1;
|
2010-05-13 16:13:38 +00:00
|
|
|
|
writeVarUInt(str_size, ostr);
|
|
|
|
|
ostr.write(reinterpret_cast<const char *>(&data[offsets[i - 1]]), str_size);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2015-02-15 15:18:21 +00:00
|
|
|
|
template <int UNROLL_TIMES>
|
|
|
|
|
static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars_t & data, ColumnString::Offsets_t & offsets, ReadBuffer & istr, size_t limit)
|
2010-05-13 16:13:38 +00:00
|
|
|
|
{
|
2013-11-26 10:18:09 +00:00
|
|
|
|
size_t offset = data.size();
|
2010-05-13 16:13:38 +00:00
|
|
|
|
for (size_t i = 0; i < limit; ++i)
|
|
|
|
|
{
|
2010-06-04 18:38:56 +00:00
|
|
|
|
if (istr.eof())
|
|
|
|
|
break;
|
|
|
|
|
|
2010-05-13 16:13:38 +00:00
|
|
|
|
UInt64 size;
|
|
|
|
|
readVarUInt(size, istr);
|
|
|
|
|
|
2010-06-07 17:50:50 +00:00
|
|
|
|
offset += size + 1;
|
2010-05-13 16:13:38 +00:00
|
|
|
|
offsets.push_back(offset);
|
|
|
|
|
|
2013-11-26 10:18:09 +00:00
|
|
|
|
data.resize(offset);
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2015-02-15 14:25:43 +00:00
|
|
|
|
if (size)
|
|
|
|
|
{
|
2016-01-13 21:05:11 +00:00
|
|
|
|
#if defined(__x86_64__)
|
2015-02-15 14:25:43 +00:00
|
|
|
|
/// Оптимистичная ветка, в которой возможно более эффективное копирование.
|
2016-04-20 21:23:23 +00:00
|
|
|
|
if (offset + 16 * UNROLL_TIMES <= data.allocated_size() && istr.position() + size + 16 * UNROLL_TIMES <= istr.buffer().end())
|
2015-02-15 14:25:43 +00:00
|
|
|
|
{
|
|
|
|
|
const __m128i * sse_src_pos = reinterpret_cast<const __m128i *>(istr.position());
|
2015-02-15 16:12:08 +00:00
|
|
|
|
const __m128i * sse_src_end = sse_src_pos + (size + (16 * UNROLL_TIMES - 1)) / 16 / UNROLL_TIMES * UNROLL_TIMES;
|
2015-02-15 14:25:43 +00:00
|
|
|
|
__m128i * sse_dst_pos = reinterpret_cast<__m128i *>(&data[offset - size - 1]);
|
|
|
|
|
|
|
|
|
|
while (sse_src_pos < sse_src_end)
|
|
|
|
|
{
|
2015-02-15 15:18:21 +00:00
|
|
|
|
/// NOTE gcc 4.9.2 разворачивает цикл, но почему-то использует только один xmm регистр.
|
2015-02-15 15:38:04 +00:00
|
|
|
|
///for (size_t j = 0; j < UNROLL_TIMES; ++j)
|
|
|
|
|
/// _mm_storeu_si128(sse_dst_pos + j, _mm_loadu_si128(sse_src_pos + j));
|
2015-02-15 15:18:21 +00:00
|
|
|
|
|
|
|
|
|
sse_src_pos += UNROLL_TIMES;
|
|
|
|
|
sse_dst_pos += UNROLL_TIMES;
|
2015-02-15 15:38:04 +00:00
|
|
|
|
|
2015-02-15 16:12:08 +00:00
|
|
|
|
if (UNROLL_TIMES >= 4) __asm__("movdqu %0, %%xmm0" :: "m"(sse_src_pos[-4]));
|
|
|
|
|
if (UNROLL_TIMES >= 3) __asm__("movdqu %0, %%xmm1" :: "m"(sse_src_pos[-3]));
|
|
|
|
|
if (UNROLL_TIMES >= 2) __asm__("movdqu %0, %%xmm2" :: "m"(sse_src_pos[-2]));
|
|
|
|
|
if (UNROLL_TIMES >= 1) __asm__("movdqu %0, %%xmm3" :: "m"(sse_src_pos[-1]));
|
2015-02-15 15:59:30 +00:00
|
|
|
|
|
2015-02-15 16:12:08 +00:00
|
|
|
|
if (UNROLL_TIMES >= 4) __asm__("movdqu %%xmm0, %0" : "=m"(sse_dst_pos[-4]));
|
|
|
|
|
if (UNROLL_TIMES >= 3) __asm__("movdqu %%xmm1, %0" : "=m"(sse_dst_pos[-3]));
|
|
|
|
|
if (UNROLL_TIMES >= 2) __asm__("movdqu %%xmm2, %0" : "=m"(sse_dst_pos[-2]));
|
|
|
|
|
if (UNROLL_TIMES >= 1) __asm__("movdqu %%xmm3, %0" : "=m"(sse_dst_pos[-1]));
|
2015-02-15 14:25:43 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
istr.position() += size;
|
|
|
|
|
}
|
|
|
|
|
else
|
2016-01-13 21:05:11 +00:00
|
|
|
|
#endif
|
2015-02-15 14:25:43 +00:00
|
|
|
|
{
|
|
|
|
|
istr.readStrict(reinterpret_cast<char*>(&data[offset - size - 1]), size);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2010-06-04 19:06:32 +00:00
|
|
|
|
data[offset - 1] = 0;
|
2010-05-13 16:13:38 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2015-02-15 15:18:21 +00:00
|
|
|
|
void DataTypeString::deserializeBinary(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const
|
|
|
|
|
{
|
|
|
|
|
ColumnString & column_string = typeid_cast<ColumnString &>(column);
|
|
|
|
|
ColumnString::Chars_t & data = column_string.getChars();
|
|
|
|
|
ColumnString::Offsets_t & offsets = column_string.getOffsets();
|
|
|
|
|
|
2015-08-13 21:16:23 +00:00
|
|
|
|
double avg_chars_size;
|
2015-02-15 15:18:21 +00:00
|
|
|
|
|
2015-08-13 21:16:23 +00:00
|
|
|
|
if (avg_value_size_hint && avg_value_size_hint > sizeof(offsets[0]))
|
|
|
|
|
{
|
|
|
|
|
/// Выбрано наугад.
|
|
|
|
|
constexpr auto avg_value_size_hint_reserve_multiplier = 1.2;
|
|
|
|
|
|
|
|
|
|
avg_chars_size = (avg_value_size_hint - sizeof(offsets[0])) * avg_value_size_hint_reserve_multiplier;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
/** Небольшая эвристика для оценки того, что в столбце много пустых строк.
|
|
|
|
|
* В этом случае, для экономии оперативки, будем говорить, что средний размер значения маленький.
|
|
|
|
|
*/
|
|
|
|
|
if (istr.position() + sizeof(UInt32) <= istr.buffer().end()
|
|
|
|
|
&& *reinterpret_cast<const UInt32 *>(istr.position()) == 0) /// Первые 4 строки находятся в буфере и являются пустыми.
|
|
|
|
|
{
|
|
|
|
|
avg_chars_size = 1;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
avg_chars_size = DBMS_APPROX_STRING_SIZE;
|
|
|
|
|
}
|
2015-02-15 15:18:21 +00:00
|
|
|
|
|
|
|
|
|
data.reserve(data.size() + std::ceil(limit * avg_chars_size));
|
|
|
|
|
|
|
|
|
|
offsets.reserve(offsets.size() + limit);
|
|
|
|
|
|
|
|
|
|
if (avg_chars_size >= 64)
|
|
|
|
|
deserializeBinarySSE2<4>(data, offsets, istr, limit);
|
|
|
|
|
else if (avg_chars_size >= 48)
|
|
|
|
|
deserializeBinarySSE2<3>(data, offsets, istr, limit);
|
|
|
|
|
else if (avg_chars_size >= 32)
|
|
|
|
|
deserializeBinarySSE2<2>(data, offsets, istr, limit);
|
|
|
|
|
else
|
|
|
|
|
deserializeBinarySSE2<1>(data, offsets, istr, limit);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
|
void DataTypeString::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr) const
|
2010-05-13 16:13:38 +00:00
|
|
|
|
{
|
2016-02-16 16:39:39 +00:00
|
|
|
|
writeString(static_cast<const ColumnString &>(column).getDataAt(row_num), ostr);
|
2010-05-13 16:13:38 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
|
void DataTypeString::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr) const
|
2010-05-13 16:13:38 +00:00
|
|
|
|
{
|
2016-02-16 16:39:39 +00:00
|
|
|
|
writeEscapedString(static_cast<const ColumnString &>(column).getDataAt(row_num), ostr);
|
2010-05-13 16:13:38 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
|
template <typename Reader>
|
|
|
|
|
static inline void read(IColumn & column, ReadBuffer & istr, Reader && reader)
|
2010-05-13 16:13:38 +00:00
|
|
|
|
{
|
2016-02-16 16:39:39 +00:00
|
|
|
|
ColumnString & column_string = static_cast<ColumnString &>(column);
|
|
|
|
|
ColumnString::Chars_t & data = column_string.getChars();
|
|
|
|
|
ColumnString::Offsets_t & offsets = column_string.getOffsets();
|
|
|
|
|
|
|
|
|
|
size_t old_chars_size = data.size();
|
|
|
|
|
size_t old_offsets_size = offsets.size();
|
|
|
|
|
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
reader(data);
|
|
|
|
|
data.push_back(0);
|
|
|
|
|
offsets.push_back(data.size());
|
|
|
|
|
}
|
|
|
|
|
catch (...)
|
|
|
|
|
{
|
|
|
|
|
offsets.resize_assume_reserved(old_offsets_size);
|
|
|
|
|
data.resize_assume_reserved(old_chars_size);
|
|
|
|
|
throw;
|
|
|
|
|
}
|
2010-05-13 16:13:38 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
|
void DataTypeString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr) const
|
2010-05-13 16:13:38 +00:00
|
|
|
|
{
|
2016-02-16 16:39:39 +00:00
|
|
|
|
read(column, istr, [&](ColumnString::Chars_t & data) { readEscapedStringInto(data, istr); });
|
2010-05-13 16:13:38 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
|
void DataTypeString::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr) const
|
2010-05-13 16:13:38 +00:00
|
|
|
|
{
|
2016-02-16 16:39:39 +00:00
|
|
|
|
writeQuotedString(static_cast<const ColumnString &>(column).getDataAt(row_num), ostr);
|
2010-05-13 16:13:38 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
|
void DataTypeString::deserializeTextQuoted(IColumn & column, ReadBuffer & istr) const
|
2010-05-13 16:13:38 +00:00
|
|
|
|
{
|
2016-02-16 16:39:39 +00:00
|
|
|
|
read(column, istr, [&](ColumnString::Chars_t & data) { readQuotedStringInto(data, istr); });
|
2010-05-13 16:13:38 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
|
void DataTypeString::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr) const
|
2013-05-16 12:52:09 +00:00
|
|
|
|
{
|
2016-02-16 16:39:39 +00:00
|
|
|
|
writeJSONString(static_cast<const ColumnString &>(column).getDataAt(row_num), ostr);
|
2013-05-16 12:52:09 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2016-02-18 11:44:50 +00:00
|
|
|
|
void DataTypeString::deserializeTextJSON(IColumn & column, ReadBuffer & istr) const
|
|
|
|
|
{
|
|
|
|
|
read(column, istr, [&](ColumnString::Chars_t & data) { readJSONStringInto(data, istr); });
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
|
void DataTypeString::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr) const
|
2016-02-14 02:37:42 +00:00
|
|
|
|
{
|
2016-02-16 16:39:39 +00:00
|
|
|
|
writeXMLString(static_cast<const ColumnString &>(column).getDataAt(row_num), ostr);
|
2016-02-14 02:37:42 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
|
void DataTypeString::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr) const
|
2016-02-07 08:42:21 +00:00
|
|
|
|
{
|
2016-02-16 16:39:39 +00:00
|
|
|
|
writeCSVString<>(static_cast<const ColumnString &>(column).getDataAt(row_num), ostr);
|
2016-02-07 08:42:21 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
|
void DataTypeString::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const char delimiter) const
|
2016-02-07 08:42:21 +00:00
|
|
|
|
{
|
2016-02-16 16:39:39 +00:00
|
|
|
|
read(column, istr, [&](ColumnString::Chars_t & data) { readCSVStringInto(data, istr); });
|
2016-02-07 08:42:21 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2011-08-09 19:19:00 +00:00
|
|
|
|
ColumnPtr DataTypeString::createColumn() const
|
2010-05-13 16:13:38 +00:00
|
|
|
|
{
|
2016-05-28 05:31:36 +00:00
|
|
|
|
return std::make_shared<ColumnString>();
|
2010-05-13 16:13:38 +00:00
|
|
|
|
}
|
|
|
|
|
|
2011-08-12 18:27:39 +00:00
|
|
|
|
|
|
|
|
|
ColumnPtr DataTypeString::createConstColumn(size_t size, const Field & field) const
|
|
|
|
|
{
|
2016-05-28 05:31:36 +00:00
|
|
|
|
return std::make_shared<ColumnConstString>(size, get<const String &>(field));
|
2011-08-12 18:27:39 +00:00
|
|
|
|
}
|
|
|
|
|
|
2010-05-13 16:13:38 +00:00
|
|
|
|
}
|