Apply suggestions from code review

Co-authored-by: Antonio Andelic <antonio2368@users.noreply.github.com>
2024-09-20 08:40:50 +00:00 · 2023-01-19 16:11:13 +01:00 · 2023-01-19 16:11:13 +01:00 · 9820beae68
commit 9820beae68
parent 96bb99f864
7 changed files with 16 additions and 16 deletions
--- a/docs/en/interfaces/schema-inference.md
+++ b/docs/en/interfaces/schema-inference.md
@ -558,7 +558,7 @@ and if the value is not a number, ClickHouse treats it as a string.
 If you don't want ClickHouse to try to determine complex types using some parsers and heuristics, you can disable setting `input_format_csv_use_best_effort_in_schema_inference`
 and ClickHouse will treat all columns as Strings.

-If setting `input_format_csv_detect_header` is enabled, ClickHouse will try to detect a header with column names (and maybe types) while schema inference. This setting is enabled by default.
+If setting `input_format_csv_detect_header` is enabled, ClickHouse will try to detect the header with column names (and maybe types) while inferring schema. This setting is enabled by default.

 **Examples:**

@ -708,7 +708,7 @@ $$)
 └────────┴───────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```

-Not that header can be detected only if there is at least one column with non-String type. If all columns have String type, header is not detected:
+Note that the header can be detected only if there is at least one column with a non-String type. If all columns have String type, the header is not detected:

 ```sql
 SELECT * FROM format(CSV,
@ -734,7 +734,7 @@ the recursive parser to determine the most appropriate type. If the type cannot
 If you don't want ClickHouse to try to determine complex types using some parsers and heuristics, you can disable setting `input_format_tsv_use_best_effort_in_schema_inference`
 and ClickHouse will treat all columns as Strings.

-If setting `input_format_tsv_detect_header` is enabled, ClickHouse will try to detect a header with column names (and maybe types) while schema inference. This setting is enabled by default.
+If setting `input_format_tsv_detect_header` is enabled, ClickHouse will try to detect the header with column names (and maybe types) while inferring schema. This setting is enabled by default.

 **Examples:**

@ -894,7 +894,7 @@ $$)
 └────────┴───────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```

-Not that header can be detected only if there is at least one column with non-String type. If all columns have String type, header is not detected:
+Note that the header can be detected only if there is at least one column with a non-String type. If all columns have String type, the header is not detected:

 ```sql
 SELECT * FROM format(TSV,
@ -1024,7 +1024,7 @@ DESC format(TSV, '[1,2,3]	42.42	Hello World!')
 In CustomSeparated format ClickHouse first extracts all column values from the row according to specified delimiters and then tries to infer
 the data type for each value according to escaping rule.

-If setting `input_format_custom_detect_header` is enabled, ClickHouse will try to detect a header with column names (and maybe types) while schema inference. This setting is enabled by default.
+If setting `input_format_custom_detect_header` is enabled, ClickHouse will try to detect the header with column names (and maybe types) while inferring schema. This setting is enabled by default.

 **Example**

--- a/src/Formats/EscapingRuleUtils.cpp
+++ b/src/Formats/EscapingRuleUtils.cpp
@ -310,7 +310,7 @@ DataTypePtr tryInferDataTypeByEscapingRule(const String & field, const FormatSet
                return type;
            }

-            /// Case when CSV value is not in quotes. Check if it's a number or date/datetime, and if not, determine it's as a string.
+            /// Case when CSV value is not in quotes. Check if it's a number or date/datetime, and if not, determine it as a string.
            if (auto number_type = tryInferNumberFromString(field, format_settings))
                return number_type;

--- a/src/Processors/Formats/ISchemaReader.h
+++ b/src/Processors/Formats/ISchemaReader.h
@ -190,7 +190,7 @@ void chooseResultColumnTypes(
        throw Exception(ErrorCodes::INCORRECT_DATA, "Rows have different amount of values");

    if (types.size() != column_names.size())
-        throw Exception(ErrorCodes::INCORRECT_DATA,"The number of column names {} differs with the number of types {}", column_names.size(), types.size());
+        throw Exception(ErrorCodes::INCORRECT_DATA, "The number of column names {} differs from the number of types {}", column_names.size(), types.size());

    for (size_t i = 0; i != types.size(); ++i)
        chooseResultColumnType(schema_reader, types[i], new_types[i], default_type, column_names[i], row);
--- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
@ -26,8 +26,8 @@ namespace
 {
    void checkBadDelimiter(char delimiter)
    {
-        const String bad_delimiters = " \t\"'.UL";
-        if (bad_delimiters.find(delimiter) != String::npos)
+        constexpr std::string_view bad_delimiters = " \t\"'.UL";
+        if (bad_delimiters.find(delimiter) != std::string_view::npos)
            throw Exception(
                String("CSV format may not work correctly with delimiter '") + delimiter
                    + "'. Try use CustomSeparated format instead.",
@ -343,7 +343,7 @@ std::pair<std::vector<String>, DataTypes> CSVSchemaReader::readRowAndGetFieldsAn

 DataTypes CSVSchemaReader::readRowAndGetDataTypesImpl()
 {
-    return readRowAndGetFieldsAndDataTypes().second;
+    return std::move(readRowAndGetFieldsAndDataTypes().second);
 }


@ -435,7 +435,7 @@ void registerFileSegmentationEngineCSV(FormatFactory & factory)
 {
    auto register_func = [&](const String & format_name, bool, bool)
    {
-        size_t min_rows = 3; /// Make it 3 for header auto detection (first 3 rows must be always in the same segment).
+        static constexpr size_t min_rows = 3; /// Make it 3 for header auto detection (first 3 rows must be always in the same segment).
        factory.registerFileSegmentationEngine(format_name, [min_rows](ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows)
        {
            return fileSegmentationEngineCSVImpl(in, memory, min_bytes, min_rows, max_rows);
--- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h
+++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h
@ -83,7 +83,7 @@ public:
    void setReadBuffer(ReadBuffer & in_) override;

 private:
-    enum class ReadFieldMode
+    enum class ReadFieldMode : uint8_t
    {
        AS_STRING,
        AS_FIELD,
--- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
@ -422,7 +422,7 @@ void registerFileSegmentationEngineTabSeparated(FormatFactory & factory)
    {
        auto register_func = [&](const String & format_name, bool, bool)
        {
-            size_t min_rows = 3; /// Make it 3 for header auto detection (first 3 rows must be always in the same segment).
+            static constexpr size_t min_rows = 3; /// Make it 3 for header auto detection (first 3 rows must be always in the same segment).
            factory.registerFileSegmentationEngine(format_name, [is_raw, min_rows](ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows)
            {
                return fileSegmentationEngineTabSeparatedImpl(in, memory, is_raw, min_bytes, min_rows, max_rows);
--- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
+++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
@ -160,7 +160,7 @@ void RowInputFormatWithNamesAndTypes::tryDetectHeader(std::vector<String> & colu
    }

    /// First row is a header with column names.
-    column_names_out = first_row_values;
+    column_names_out = std::move(first_row_values);
    peekable_buf->dropCheckpoint();
    is_header_detected = true;

@ -187,7 +187,7 @@ void RowInputFormatWithNamesAndTypes::tryDetectHeader(std::vector<String> & colu
    }

    /// The second row is a header with type names.
-    type_names_out = second_row_values;
+    type_names_out = std::move(second_row_values);
    peekable_buf->dropCheckpoint();
 }

@ -461,7 +461,7 @@ void FormatWithNamesAndTypesSchemaReader::tryDetectHeader(std::vector<String> &
        /// with all String elements can be real data and we cannot use them as a header.
        if (checkIfAllTypesAreString(data_types))
        {
-            buffered_types = data_types;
+            buffered_types = std::move(data_types);
            return;
        }