mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Try to treat input values as enum id in tsv/scv
This commit is contained in:
parent
39cd1c0371
commit
ca4088b400
@ -155,7 +155,7 @@ void DataTypeEnum<Type>::deserializeTextEscaped(IColumn & column, ReadBuffer & i
|
|||||||
/// NOTE It would be nice to do without creating a temporary object - at least extract std::string out.
|
/// NOTE It would be nice to do without creating a temporary object - at least extract std::string out.
|
||||||
std::string field_name;
|
std::string field_name;
|
||||||
readEscapedString(field_name, istr);
|
readEscapedString(field_name, istr);
|
||||||
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name)));
|
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name), true));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -182,7 +182,7 @@ void DataTypeEnum<Type>::deserializeWholeText(IColumn & column, ReadBuffer & ist
|
|||||||
{
|
{
|
||||||
std::string field_name;
|
std::string field_name;
|
||||||
readString(field_name, istr);
|
readString(field_name, istr);
|
||||||
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name)));
|
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name), true));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -226,7 +226,7 @@ void DataTypeEnum<Type>::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
|
|||||||
{
|
{
|
||||||
std::string field_name;
|
std::string field_name;
|
||||||
readCSVString(field_name, istr, settings.csv);
|
readCSVString(field_name, istr, settings.csv);
|
||||||
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name)));
|
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name), true));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
#include <Columns/ColumnVector.h>
|
#include <Columns/ColumnVector.h>
|
||||||
#include <Columns/ColumnConst.h>
|
#include <Columns/ColumnConst.h>
|
||||||
#include <Common/HashTable/HashMap.h>
|
#include <Common/HashTable/HashMap.h>
|
||||||
|
#include <IO/ReadHelpers.h>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
@ -80,13 +81,25 @@ public:
|
|||||||
return findByValue(value)->second;
|
return findByValue(value)->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
FieldType getValue(StringRef field_name) const
|
FieldType getValue(StringRef field_name, bool try_treat_as_id=false) const
|
||||||
{
|
{
|
||||||
const auto it = name_to_value_map.find(field_name);
|
const auto value_it = name_to_value_map.find(field_name);
|
||||||
if (!it)
|
if (!value_it)
|
||||||
|
{
|
||||||
|
/// It is used in CSV and TSV input formats. If we fail to find given string in
|
||||||
|
/// enum names and this string is number, we will try to treat it as enum id.
|
||||||
|
if (try_treat_as_id && isStringNumber(field_name))
|
||||||
|
{
|
||||||
|
FieldType x;
|
||||||
|
ReadBufferFromMemory tmp_buf(field_name.data, field_name.size);
|
||||||
|
readText(x, tmp_buf);
|
||||||
|
const auto name_it = value_to_name_map.find(x);
|
||||||
|
if (name_it != value_to_name_map.end())
|
||||||
|
return x;
|
||||||
|
}
|
||||||
throw Exception{"Unknown element '" + field_name.toString() + "' for type " + getName(), ErrorCodes::BAD_ARGUMENTS};
|
throw Exception{"Unknown element '" + field_name.toString() + "' for type " + getName(), ErrorCodes::BAD_ARGUMENTS};
|
||||||
|
}
|
||||||
return it->getMapped();
|
return value_it->getMapped();
|
||||||
}
|
}
|
||||||
|
|
||||||
FieldType readValue(ReadBuffer & istr) const
|
FieldType readValue(ReadBuffer & istr) const
|
||||||
|
@ -1111,4 +1111,12 @@ bool loadAtPosition(ReadBuffer & in, DB::Memory<> & memory, char * & current)
|
|||||||
return loaded_more;
|
return loaded_more;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool isStringNumber(StringRef str)
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i != str.size; ++i)
|
||||||
|
if (!isdigit(str.data[i]))
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1228,4 +1228,6 @@ void saveUpToPosition(ReadBuffer & in, Memory<> & memory, char * current);
|
|||||||
*/
|
*/
|
||||||
bool loadAtPosition(ReadBuffer & in, Memory<> & memory, char * & current);
|
bool loadAtPosition(ReadBuffer & in, Memory<> & memory, char * & current);
|
||||||
|
|
||||||
|
bool isStringNumber(StringRef str);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,10 @@
|
|||||||
|
1 a
|
||||||
|
2 3
|
||||||
|
3 3
|
||||||
|
4 a
|
||||||
|
5 b
|
||||||
|
6 a
|
||||||
|
7 3
|
||||||
|
8 3
|
||||||
|
9 a
|
||||||
|
10 b
|
@ -0,0 +1,23 @@
|
|||||||
|
DROP TABLE IF EXISTS enum_as_num;
|
||||||
|
|
||||||
|
CREATE TABLE enum_as_num (
|
||||||
|
Id Int32,
|
||||||
|
Value Enum('a' = 1, '3' = 2, 'b' = 3)
|
||||||
|
) ENGINE=Memory();
|
||||||
|
|
||||||
|
INSERT INTO enum_as_num FORMAT TSV 1 1
|
||||||
|
INSERT INTO enum_as_num FORMAT TSV 2 2
|
||||||
|
INSERT INTO enum_as_num FORMAT TSV 3 3
|
||||||
|
INSERT INTO enum_as_num FORMAT TSV 4 a
|
||||||
|
INSERT INTO enum_as_num FORMAT TSV 5 b
|
||||||
|
|
||||||
|
INSERT INTO enum_as_num FORMAT CSV 6,1
|
||||||
|
INSERT INTO enum_as_num FORMAT CSV 7,2
|
||||||
|
INSERT INTO enum_as_num FORMAT CSV 8,3
|
||||||
|
INSERT INTO enum_as_num FORMAT CSV 9,a
|
||||||
|
INSERT INTO enum_as_num FORMAT CSV 10,b
|
||||||
|
|
||||||
|
SELECT * FROM enum_as_num ORDER BY Id;
|
||||||
|
|
||||||
|
|
||||||
|
DROP TABLE IF EXISTS enum_as_num;
|
Loading…
Reference in New Issue
Block a user