Apply suggestions from code review

Co-authored-by: Antonio Andelic <antonio2368@users.noreply.github.com>
This commit is contained in:
Kruglov Pavel 2023-01-19 16:11:13 +01:00 committed by GitHub
parent 96bb99f864
commit 9820beae68
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 16 additions and 16 deletions

View File

@ -558,7 +558,7 @@ and if the value is not a number, ClickHouse treats it as a string.
If you don't want ClickHouse to try to determine complex types using some parsers and heuristics, you can disable setting `input_format_csv_use_best_effort_in_schema_inference`
and ClickHouse will treat all columns as Strings.
If setting `input_format_csv_detect_header` is enabled, ClickHouse will try to detect a header with column names (and maybe types) while schema inference. This setting is enabled by default.
If setting `input_format_csv_detect_header` is enabled, ClickHouse will try to detect the header with column names (and maybe types) while inferring schema. This setting is enabled by default.
**Examples:**
@ -708,7 +708,7 @@ $$)
└────────┴───────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
Not that header can be detected only if there is at least one column with non-String type. If all columns have String type, header is not detected:
Note that the header can be detected only if there is at least one column with a non-String type. If all columns have String type, the header is not detected:
```sql
SELECT * FROM format(CSV,
@ -734,7 +734,7 @@ the recursive parser to determine the most appropriate type. If the type cannot
If you don't want ClickHouse to try to determine complex types using some parsers and heuristics, you can disable setting `input_format_tsv_use_best_effort_in_schema_inference`
and ClickHouse will treat all columns as Strings.
If setting `input_format_tsv_detect_header` is enabled, ClickHouse will try to detect a header with column names (and maybe types) while schema inference. This setting is enabled by default.
If setting `input_format_tsv_detect_header` is enabled, ClickHouse will try to detect the header with column names (and maybe types) while inferring schema. This setting is enabled by default.
**Examples:**
@ -894,7 +894,7 @@ $$)
└────────┴───────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
Not that header can be detected only if there is at least one column with non-String type. If all columns have String type, header is not detected:
Note that the header can be detected only if there is at least one column with a non-String type. If all columns have String type, the header is not detected:
```sql
SELECT * FROM format(TSV,
@ -1024,7 +1024,7 @@ DESC format(TSV, '[1,2,3] 42.42 Hello World!')
In CustomSeparated format ClickHouse first extracts all column values from the row according to specified delimiters and then tries to infer
the data type for each value according to escaping rule.
If setting `input_format_custom_detect_header` is enabled, ClickHouse will try to detect a header with column names (and maybe types) while schema inference. This setting is enabled by default.
If setting `input_format_custom_detect_header` is enabled, ClickHouse will try to detect the header with column names (and maybe types) while inferring schema. This setting is enabled by default.
**Example**

View File

@ -310,7 +310,7 @@ DataTypePtr tryInferDataTypeByEscapingRule(const String & field, const FormatSet
return type;
}
/// Case when CSV value is not in quotes. Check if it's a number or date/datetime, and if not, determine it's as a string.
/// Case when CSV value is not in quotes. Check if it's a number or date/datetime, and if not, determine it as a string.
if (auto number_type = tryInferNumberFromString(field, format_settings))
return number_type;

View File

@ -190,7 +190,7 @@ void chooseResultColumnTypes(
throw Exception(ErrorCodes::INCORRECT_DATA, "Rows have different amount of values");
if (types.size() != column_names.size())
throw Exception(ErrorCodes::INCORRECT_DATA,"The number of column names {} differs with the number of types {}", column_names.size(), types.size());
throw Exception(ErrorCodes::INCORRECT_DATA, "The number of column names {} differs from the number of types {}", column_names.size(), types.size());
for (size_t i = 0; i != types.size(); ++i)
chooseResultColumnType(schema_reader, types[i], new_types[i], default_type, column_names[i], row);

View File

@ -26,8 +26,8 @@ namespace
{
void checkBadDelimiter(char delimiter)
{
const String bad_delimiters = " \t\"'.UL";
if (bad_delimiters.find(delimiter) != String::npos)
constexpr std::string_view bad_delimiters = " \t\"'.UL";
if (bad_delimiters.find(delimiter) != std::string_view::npos)
throw Exception(
String("CSV format may not work correctly with delimiter '") + delimiter
+ "'. Try use CustomSeparated format instead.",
@ -343,7 +343,7 @@ std::pair<std::vector<String>, DataTypes> CSVSchemaReader::readRowAndGetFieldsAn
DataTypes CSVSchemaReader::readRowAndGetDataTypesImpl()
{
return readRowAndGetFieldsAndDataTypes().second;
return std::move(readRowAndGetFieldsAndDataTypes().second);
}
@ -435,7 +435,7 @@ void registerFileSegmentationEngineCSV(FormatFactory & factory)
{
auto register_func = [&](const String & format_name, bool, bool)
{
size_t min_rows = 3; /// Make it 3 for header auto detection (first 3 rows must be always in the same segment).
static constexpr size_t min_rows = 3; /// Make it 3 for header auto detection (first 3 rows must be always in the same segment).
factory.registerFileSegmentationEngine(format_name, [min_rows](ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows)
{
return fileSegmentationEngineCSVImpl(in, memory, min_bytes, min_rows, max_rows);

View File

@ -83,7 +83,7 @@ public:
void setReadBuffer(ReadBuffer & in_) override;
private:
enum class ReadFieldMode
enum class ReadFieldMode : uint8_t
{
AS_STRING,
AS_FIELD,

View File

@ -422,7 +422,7 @@ void registerFileSegmentationEngineTabSeparated(FormatFactory & factory)
{
auto register_func = [&](const String & format_name, bool, bool)
{
size_t min_rows = 3; /// Make it 3 for header auto detection (first 3 rows must be always in the same segment).
static constexpr size_t min_rows = 3; /// Make it 3 for header auto detection (first 3 rows must be always in the same segment).
factory.registerFileSegmentationEngine(format_name, [is_raw, min_rows](ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows)
{
return fileSegmentationEngineTabSeparatedImpl(in, memory, is_raw, min_bytes, min_rows, max_rows);

View File

@ -160,7 +160,7 @@ void RowInputFormatWithNamesAndTypes::tryDetectHeader(std::vector<String> & colu
}
/// First row is a header with column names.
column_names_out = first_row_values;
column_names_out = std::move(first_row_values);
peekable_buf->dropCheckpoint();
is_header_detected = true;
@ -187,7 +187,7 @@ void RowInputFormatWithNamesAndTypes::tryDetectHeader(std::vector<String> & colu
}
/// The second row is a header with type names.
type_names_out = second_row_values;
type_names_out = std::move(second_row_values);
peekable_buf->dropCheckpoint();
}
@ -461,7 +461,7 @@ void FormatWithNamesAndTypesSchemaReader::tryDetectHeader(std::vector<String> &
/// with all String elements can be real data and we cannot use them as a header.
if (checkIfAllTypesAreString(data_types))
{
buffered_types = data_types;
buffered_types = std::move(data_types);
return;
}