diff --git a/src/DataTypes/Serializations/SerializationBool.cpp b/src/DataTypes/Serializations/SerializationBool.cpp index 0542e050a25..1709e3321d6 100644 --- a/src/DataTypes/Serializations/SerializationBool.cpp +++ b/src/DataTypes/Serializations/SerializationBool.cpp @@ -57,7 +57,7 @@ void SerializationBool::deserializeTextEscaped(IColumn & column, ReadBuffer & is if (istr.eof()) throw Exception("Expected boolean value but get EOF.", ErrorCodes::CANNOT_PARSE_BOOL); - deserializeWithCustom(column, istr, settings, [](ReadBuffer & buf){ return buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'; }); + deserializeImpl(column, istr, settings, [](ReadBuffer & buf){ return buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'; }); } void SerializationBool::serializeTextJSON(const IColumn &column, size_t row_num, WriteBuffer &ostr, const FormatSettings &settings) const @@ -93,7 +93,7 @@ void SerializationBool::deserializeTextCSV(IColumn & column, ReadBuffer & istr, if (istr.eof()) throw Exception("Expected boolean value but get EOF.", ErrorCodes::CANNOT_PARSE_BOOL); - deserializeWithCustom(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof() || *buf.position() == settings.csv.delimiter || *buf.position() == '\n'; }); + deserializeImpl(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof() || *buf.position() == settings.csv.delimiter || *buf.position() == '\n'; }); } void SerializationBool::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const @@ -106,7 +106,7 @@ void SerializationBool::deserializeTextRaw(IColumn & column, ReadBuffer & istr, if (istr.eof()) throw Exception("Expected boolean value but get EOF.", ErrorCodes::CANNOT_PARSE_BOOL); - deserializeWithCustom(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'; }); + deserializeImpl(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'; }); } void SerializationBool::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const @@ -114,16 +114,42 @@ void SerializationBool::serializeTextQuoted(const IColumn & column, size_t row_n serializeSimple(column, row_num, ostr, settings); } -void SerializationBool::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +void SerializationBool::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { if (istr.eof()) throw Exception("Expected boolean value but get EOF.", ErrorCodes::CANNOT_PARSE_BOOL); auto * col = checkAndGetDeserializeColumnType(column); - if (!deserializeImpl(col, istr)) - throw Exception( - ErrorCodes::CANNOT_PARSE_BOOL, - "Invalid boolean value, should be one of True/False/T/F/Y/N/Yes/No/On/Off/Enable/Disable/Enabled/Disabled/1/0"); + + char symbol = toLowerIfAlphaASCII(*istr.position()); + switch (symbol) + { + case 't': + assertStringCaseInsensitive("true", istr); + col->insert(true); + break; + case 'f': + assertStringCaseInsensitive("false", istr); + col->insert(false); + break; + case '1': + col->insert(true); + break; + case '0': + col->insert(false); + break; + case '\'': + ++istr.position(); + deserializeImpl(column, istr, settings, [](ReadBuffer & buf){ return !buf.eof() && *buf.position() == '\''; }); + assertChar('\'', istr); + break; + default: + throw Exception( + ErrorCodes::CANNOT_PARSE_BOOL, + "Cannot parse boolean value here: '{}', should be true/false, 1/0 or on of " + "True/False/T/F/Y/N/Yes/No/On/Off/Enable/Disable/Enabled/Disabled/1/0 in quotes", + String(istr.position(), std::min(10ul, istr.available()))); + } } void SerializationBool::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const @@ -131,7 +157,7 @@ void SerializationBool::deserializeWholeText(IColumn & column, ReadBuffer & istr if (istr.eof()) throw Exception("Expected boolean value but get EOF.", ErrorCodes::CANNOT_PARSE_BOOL); - deserializeWithCustom(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof(); }); + deserializeImpl(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof(); }); } void SerializationBool::serializeCustom(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const @@ -163,7 +189,7 @@ void SerializationBool::serializeSimple(const IColumn & column, size_t row_num, ostr.write(str_false, sizeof(str_false) - 1); } -void SerializationBool::deserializeWithCustom( +void SerializationBool::deserializeImpl( IColumn & column, ReadBuffer & istr, const FormatSettings & settings, std::function check_end_of_value) const { ColumnUInt8 * col = checkAndGetDeserializeColumnType(column); @@ -190,7 +216,7 @@ void SerializationBool::deserializeWithCustom( } buf.rollbackToCheckpoint(); - if (deserializeImpl(col, buf) && check_end_of_value(buf)) + if (tryDeserializeAllVariants(col, buf) && check_end_of_value(buf)) { buf.dropCheckpoint(); if (buf.hasUnreadData()) @@ -201,15 +227,18 @@ void SerializationBool::deserializeWithCustom( return; } + buf.makeContinuousMemoryFromCheckpointToPos(); + buf.rollbackToCheckpoint(); throw Exception( ErrorCodes::CANNOT_PARSE_BOOL, - "Invalid boolean value, should be '{}' or '{}' controlled by setting bool_true_representation and " + "Cannot parse boolean value here: '{}', should be '{}' or '{}' controlled by setting bool_true_representation and " "bool_false_representation or one of " "True/False/T/F/Y/N/Yes/No/On/Off/Enable/Disable/Enabled/Disabled/1/0", + String(buf.position(), std::min(10lu, buf.available())), settings.bool_true_representation, settings.bool_false_representation); } -bool SerializationBool::deserializeImpl(ColumnUInt8 * column, ReadBuffer & istr) const +bool SerializationBool::tryDeserializeAllVariants(ColumnUInt8 * column, ReadBuffer & istr) const { if (checkCharCaseInsensitive('1', istr)) { diff --git a/src/DataTypes/Serializations/SerializationBool.h b/src/DataTypes/Serializations/SerializationBool.h index 7999f5388d7..48c8b679694 100644 --- a/src/DataTypes/Serializations/SerializationBool.h +++ b/src/DataTypes/Serializations/SerializationBool.h @@ -40,8 +40,8 @@ public: protected: void serializeCustom(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const; void serializeSimple(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const; - void deserializeWithCustom(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, std::function check_end_of_value) const; - bool deserializeImpl(ColumnUInt8 * column, ReadBuffer & istr) const; + void deserializeImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, std::function check_end_of_value) const; + bool tryDeserializeAllVariants(ColumnUInt8 * column, ReadBuffer & istr) const; }; } diff --git a/tests/queries/0_stateless/02152_bool_type_parsing.sh b/tests/queries/0_stateless/02152_bool_type_parsing.sh index 65c173e700b..9e9db499cf5 100755 --- a/tests/queries/0_stateless/02152_bool_type_parsing.sh +++ b/tests/queries/0_stateless/02152_bool_type_parsing.sh @@ -17,7 +17,7 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FILE_NAME', 'TSV', 'bool Bool') sett $CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FILE_NAME', 'CSV', 'bool Bool') settings bool_true_representation='Custom true', bool_false_representation='Custom false'" $CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FILE_NAME', 'CSV', 'bool Bool') settings bool_true_representation='Custom true', bool_false_representation='Custom false', input_format_parallel_parsing=0, max_read_buffer_size=2" -echo -e "Yes\nNo\nyes\nno\ny\nY\nN\nTrue\nFalse\ntrue\nfalse\nt\nf\nT\nF\nOn\nOff\non\noff\nenable\ndisable\nenabled\ndisabled" > $DATA_FILE +echo -e "'Yes'\n'No'\n'yes'\n'no'\n'y'\n'Y'\n'N'\nTrue\nFalse\ntrue\nfalse\n't'\n'f'\n'T'\n'F'\n'On'\n'Off'\n'on'\n'off'\n'enable'\n'disable'\n'enabled'\n'disabled'" > $DATA_FILE $CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FILE_NAME', 'CustomSeparated', 'bool Bool') settings format_custom_escaping_rule='Quoted'" $CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FILE_NAME', 'CustomSeparated', 'bool Bool') settings format_custom_escaping_rule='Quoted', input_format_parallel_parsing=0, max_read_buffer_size=2" diff --git a/utils/check-style/codespell-ignore-words.list b/utils/check-style/codespell-ignore-words.list index 200b55d112d..d3a7586647c 100644 --- a/utils/check-style/codespell-ignore-words.list +++ b/utils/check-style/codespell-ignore-words.list @@ -6,7 +6,7 @@ nd ect pullrequest pullrequests -thenn ths offsett numer +ue