mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Apply suggestions from code review
Co-authored-by: Antonio Andelic <antonio2368@users.noreply.github.com>
This commit is contained in:
parent
96bb99f864
commit
9820beae68
@ -558,7 +558,7 @@ and if the value is not a number, ClickHouse treats it as a string.
|
||||
If you don't want ClickHouse to try to determine complex types using some parsers and heuristics, you can disable setting `input_format_csv_use_best_effort_in_schema_inference`
|
||||
and ClickHouse will treat all columns as Strings.
|
||||
|
||||
If setting `input_format_csv_detect_header` is enabled, ClickHouse will try to detect a header with column names (and maybe types) while schema inference. This setting is enabled by default.
|
||||
If setting `input_format_csv_detect_header` is enabled, ClickHouse will try to detect the header with column names (and maybe types) while inferring schema. This setting is enabled by default.
|
||||
|
||||
**Examples:**
|
||||
|
||||
@ -708,7 +708,7 @@ $$)
|
||||
└────────┴───────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
Not that header can be detected only if there is at least one column with non-String type. If all columns have String type, header is not detected:
|
||||
Note that the header can be detected only if there is at least one column with a non-String type. If all columns have String type, the header is not detected:
|
||||
|
||||
```sql
|
||||
SELECT * FROM format(CSV,
|
||||
@ -734,7 +734,7 @@ the recursive parser to determine the most appropriate type. If the type cannot
|
||||
If you don't want ClickHouse to try to determine complex types using some parsers and heuristics, you can disable setting `input_format_tsv_use_best_effort_in_schema_inference`
|
||||
and ClickHouse will treat all columns as Strings.
|
||||
|
||||
If setting `input_format_tsv_detect_header` is enabled, ClickHouse will try to detect a header with column names (and maybe types) while schema inference. This setting is enabled by default.
|
||||
If setting `input_format_tsv_detect_header` is enabled, ClickHouse will try to detect the header with column names (and maybe types) while inferring schema. This setting is enabled by default.
|
||||
|
||||
**Examples:**
|
||||
|
||||
@ -894,7 +894,7 @@ $$)
|
||||
└────────┴───────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
Not that header can be detected only if there is at least one column with non-String type. If all columns have String type, header is not detected:
|
||||
Note that the header can be detected only if there is at least one column with a non-String type. If all columns have String type, the header is not detected:
|
||||
|
||||
```sql
|
||||
SELECT * FROM format(TSV,
|
||||
@ -1024,7 +1024,7 @@ DESC format(TSV, '[1,2,3] 42.42 Hello World!')
|
||||
In CustomSeparated format ClickHouse first extracts all column values from the row according to specified delimiters and then tries to infer
|
||||
the data type for each value according to escaping rule.
|
||||
|
||||
If setting `input_format_custom_detect_header` is enabled, ClickHouse will try to detect a header with column names (and maybe types) while schema inference. This setting is enabled by default.
|
||||
If setting `input_format_custom_detect_header` is enabled, ClickHouse will try to detect the header with column names (and maybe types) while inferring schema. This setting is enabled by default.
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -310,7 +310,7 @@ DataTypePtr tryInferDataTypeByEscapingRule(const String & field, const FormatSet
|
||||
return type;
|
||||
}
|
||||
|
||||
/// Case when CSV value is not in quotes. Check if it's a number or date/datetime, and if not, determine it's as a string.
|
||||
/// Case when CSV value is not in quotes. Check if it's a number or date/datetime, and if not, determine it as a string.
|
||||
if (auto number_type = tryInferNumberFromString(field, format_settings))
|
||||
return number_type;
|
||||
|
||||
|
@ -190,7 +190,7 @@ void chooseResultColumnTypes(
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Rows have different amount of values");
|
||||
|
||||
if (types.size() != column_names.size())
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA,"The number of column names {} differs with the number of types {}", column_names.size(), types.size());
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "The number of column names {} differs from the number of types {}", column_names.size(), types.size());
|
||||
|
||||
for (size_t i = 0; i != types.size(); ++i)
|
||||
chooseResultColumnType(schema_reader, types[i], new_types[i], default_type, column_names[i], row);
|
||||
|
@ -26,8 +26,8 @@ namespace
|
||||
{
|
||||
void checkBadDelimiter(char delimiter)
|
||||
{
|
||||
const String bad_delimiters = " \t\"'.UL";
|
||||
if (bad_delimiters.find(delimiter) != String::npos)
|
||||
constexpr std::string_view bad_delimiters = " \t\"'.UL";
|
||||
if (bad_delimiters.find(delimiter) != std::string_view::npos)
|
||||
throw Exception(
|
||||
String("CSV format may not work correctly with delimiter '") + delimiter
|
||||
+ "'. Try use CustomSeparated format instead.",
|
||||
@ -343,7 +343,7 @@ std::pair<std::vector<String>, DataTypes> CSVSchemaReader::readRowAndGetFieldsAn
|
||||
|
||||
DataTypes CSVSchemaReader::readRowAndGetDataTypesImpl()
|
||||
{
|
||||
return readRowAndGetFieldsAndDataTypes().second;
|
||||
return std::move(readRowAndGetFieldsAndDataTypes().second);
|
||||
}
|
||||
|
||||
|
||||
@ -435,7 +435,7 @@ void registerFileSegmentationEngineCSV(FormatFactory & factory)
|
||||
{
|
||||
auto register_func = [&](const String & format_name, bool, bool)
|
||||
{
|
||||
size_t min_rows = 3; /// Make it 3 for header auto detection (first 3 rows must be always in the same segment).
|
||||
static constexpr size_t min_rows = 3; /// Make it 3 for header auto detection (first 3 rows must be always in the same segment).
|
||||
factory.registerFileSegmentationEngine(format_name, [min_rows](ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows)
|
||||
{
|
||||
return fileSegmentationEngineCSVImpl(in, memory, min_bytes, min_rows, max_rows);
|
||||
|
@ -83,7 +83,7 @@ public:
|
||||
void setReadBuffer(ReadBuffer & in_) override;
|
||||
|
||||
private:
|
||||
enum class ReadFieldMode
|
||||
enum class ReadFieldMode : uint8_t
|
||||
{
|
||||
AS_STRING,
|
||||
AS_FIELD,
|
||||
|
@ -422,7 +422,7 @@ void registerFileSegmentationEngineTabSeparated(FormatFactory & factory)
|
||||
{
|
||||
auto register_func = [&](const String & format_name, bool, bool)
|
||||
{
|
||||
size_t min_rows = 3; /// Make it 3 for header auto detection (first 3 rows must be always in the same segment).
|
||||
static constexpr size_t min_rows = 3; /// Make it 3 for header auto detection (first 3 rows must be always in the same segment).
|
||||
factory.registerFileSegmentationEngine(format_name, [is_raw, min_rows](ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows)
|
||||
{
|
||||
return fileSegmentationEngineTabSeparatedImpl(in, memory, is_raw, min_bytes, min_rows, max_rows);
|
||||
|
@ -160,7 +160,7 @@ void RowInputFormatWithNamesAndTypes::tryDetectHeader(std::vector<String> & colu
|
||||
}
|
||||
|
||||
/// First row is a header with column names.
|
||||
column_names_out = first_row_values;
|
||||
column_names_out = std::move(first_row_values);
|
||||
peekable_buf->dropCheckpoint();
|
||||
is_header_detected = true;
|
||||
|
||||
@ -187,7 +187,7 @@ void RowInputFormatWithNamesAndTypes::tryDetectHeader(std::vector<String> & colu
|
||||
}
|
||||
|
||||
/// The second row is a header with type names.
|
||||
type_names_out = second_row_values;
|
||||
type_names_out = std::move(second_row_values);
|
||||
peekable_buf->dropCheckpoint();
|
||||
}
|
||||
|
||||
@ -461,7 +461,7 @@ void FormatWithNamesAndTypesSchemaReader::tryDetectHeader(std::vector<String> &
|
||||
/// with all String elements can be real data and we cannot use them as a header.
|
||||
if (checkIfAllTypesAreString(data_types))
|
||||
{
|
||||
buffered_types = data_types;
|
||||
buffered_types = std::move(data_types);
|
||||
return;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user