mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Try to treat input values as enum id in tsv/scv
This commit is contained in:
parent
39cd1c0371
commit
ca4088b400
@ -155,7 +155,7 @@ void DataTypeEnum<Type>::deserializeTextEscaped(IColumn & column, ReadBuffer & i
|
||||
/// NOTE It would be nice to do without creating a temporary object - at least extract std::string out.
|
||||
std::string field_name;
|
||||
readEscapedString(field_name, istr);
|
||||
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name)));
|
||||
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name), true));
|
||||
}
|
||||
}
|
||||
|
||||
@ -182,7 +182,7 @@ void DataTypeEnum<Type>::deserializeWholeText(IColumn & column, ReadBuffer & ist
|
||||
{
|
||||
std::string field_name;
|
||||
readString(field_name, istr);
|
||||
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name)));
|
||||
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name), true));
|
||||
}
|
||||
}
|
||||
|
||||
@ -226,7 +226,7 @@ void DataTypeEnum<Type>::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
|
||||
{
|
||||
std::string field_name;
|
||||
readCSVString(field_name, istr, settings.csv);
|
||||
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name)));
|
||||
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name), true));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
|
||||
@ -80,13 +81,25 @@ public:
|
||||
return findByValue(value)->second;
|
||||
}
|
||||
|
||||
FieldType getValue(StringRef field_name) const
|
||||
FieldType getValue(StringRef field_name, bool try_treat_as_id=false) const
|
||||
{
|
||||
const auto it = name_to_value_map.find(field_name);
|
||||
if (!it)
|
||||
const auto value_it = name_to_value_map.find(field_name);
|
||||
if (!value_it)
|
||||
{
|
||||
/// It is used in CSV and TSV input formats. If we fail to find given string in
|
||||
/// enum names and this string is number, we will try to treat it as enum id.
|
||||
if (try_treat_as_id && isStringNumber(field_name))
|
||||
{
|
||||
FieldType x;
|
||||
ReadBufferFromMemory tmp_buf(field_name.data, field_name.size);
|
||||
readText(x, tmp_buf);
|
||||
const auto name_it = value_to_name_map.find(x);
|
||||
if (name_it != value_to_name_map.end())
|
||||
return x;
|
||||
}
|
||||
throw Exception{"Unknown element '" + field_name.toString() + "' for type " + getName(), ErrorCodes::BAD_ARGUMENTS};
|
||||
|
||||
return it->getMapped();
|
||||
}
|
||||
return value_it->getMapped();
|
||||
}
|
||||
|
||||
FieldType readValue(ReadBuffer & istr) const
|
||||
|
@ -1111,4 +1111,12 @@ bool loadAtPosition(ReadBuffer & in, DB::Memory<> & memory, char * & current)
|
||||
return loaded_more;
|
||||
}
|
||||
|
||||
bool isStringNumber(StringRef str)
|
||||
{
|
||||
for (size_t i = 0; i != str.size; ++i)
|
||||
if (!isdigit(str.data[i]))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1228,4 +1228,6 @@ void saveUpToPosition(ReadBuffer & in, Memory<> & memory, char * current);
|
||||
*/
|
||||
bool loadAtPosition(ReadBuffer & in, Memory<> & memory, char * & current);
|
||||
|
||||
bool isStringNumber(StringRef str);
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,10 @@
|
||||
1 a
|
||||
2 3
|
||||
3 3
|
||||
4 a
|
||||
5 b
|
||||
6 a
|
||||
7 3
|
||||
8 3
|
||||
9 a
|
||||
10 b
|
@ -0,0 +1,23 @@
|
||||
DROP TABLE IF EXISTS enum_as_num;
|
||||
|
||||
CREATE TABLE enum_as_num (
|
||||
Id Int32,
|
||||
Value Enum('a' = 1, '3' = 2, 'b' = 3)
|
||||
) ENGINE=Memory();
|
||||
|
||||
INSERT INTO enum_as_num FORMAT TSV 1 1
|
||||
INSERT INTO enum_as_num FORMAT TSV 2 2
|
||||
INSERT INTO enum_as_num FORMAT TSV 3 3
|
||||
INSERT INTO enum_as_num FORMAT TSV 4 a
|
||||
INSERT INTO enum_as_num FORMAT TSV 5 b
|
||||
|
||||
INSERT INTO enum_as_num FORMAT CSV 6,1
|
||||
INSERT INTO enum_as_num FORMAT CSV 7,2
|
||||
INSERT INTO enum_as_num FORMAT CSV 8,3
|
||||
INSERT INTO enum_as_num FORMAT CSV 9,a
|
||||
INSERT INTO enum_as_num FORMAT CSV 10,b
|
||||
|
||||
SELECT * FROM enum_as_num ORDER BY Id;
|
||||
|
||||
|
||||
DROP TABLE IF EXISTS enum_as_num;
|
Loading…
Reference in New Issue
Block a user