Check for punctuation

2024-11-22 15:42:02 +00:00 · 2023-07-25 06:10:04 +02:00 · 2023-07-25 06:10:04 +02:00 · 21382afa2b
commit 21382afa2b
parent ecdafeaf83
28 changed files with 45 additions and 56 deletions
--- a/src/Common/parseRemoteDescription.cpp
+++ b/src/Common/parseRemoteDescription.cpp
@ -52,20 +52,8 @@ static bool parseNumber(const String & description, size_t l, size_t r, size_t &
 }


-/* Parse a string that generates shards and replicas. Separator - one of two characters | or ,
- *  depending on whether shards or replicas are generated.
- * For example:
- * host1,host2,...      - generates set of shards from host1, host2, ...
- * host1|host2|...      - generates set of replicas from host1, host2, ...
- * abc{8..10}def        - generates set of shards abc8def, abc9def, abc10def.
- * abc{08..10}def       - generates set of shards abc08def, abc09def, abc10def.
- * abc{x,yy,z}def       - generates set of shards abcxdef, abcyydef, abczdef.
- * abc{x|yy|z} def      - generates set of replicas abcxdef, abcyydef, abczdef.
- * abc{1..9}de{f,g,h}   - is a direct product, 27 shards.
- * abc{1..9}de{0|1}     - is a direct product, 9 shards, in each 2 replicas.
- */
-std::vector<String>
-parseRemoteDescription(const String & description, size_t l, size_t r, char separator, size_t max_addresses, const String & func_name)
+std::vector<String> parseRemoteDescription(
+    const String & description, size_t l, size_t r, char separator, size_t max_addresses, const String & func_name)
 {
    std::vector<String> res;
    std::vector<String> cur;
--- a/src/Common/parseRemoteDescription.h
+++ b/src/Common/parseRemoteDescription.h
@ -3,7 +3,7 @@
 #include <vector>
 namespace DB
 {
-/* Parse a string that generates shards and replicas. Separator - one of two characters | or ,
+/* Parse a string that generates shards and replicas. Separator - one of two characters '|' or ','
 *  depending on whether shards or replicas are generated.
 * For example:
 * host1,host2,...      - generates set of shards from host1, host2, ...
--- a/src/Core/tests/gtest_settings.cpp
+++ b/src/Core/tests/gtest_settings.cpp
@ -121,7 +121,7 @@ GTEST_TEST(SettingMySQLDataTypesSupport, SetString)
    ASSERT_EQ(Field("decimal,datetime64"), setting);

    // comma with spaces
-    setting = " datetime64 ,    decimal ";
+    setting = " datetime64 ,    decimal "; /// bad punctuation is ok here
    ASSERT_TRUE(setting.changed);
    ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
    ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
@ -166,4 +166,3 @@ GTEST_TEST(SettingMySQLDataTypesSupport, SetInvalidString)
    ASSERT_TRUE(setting.changed);
    ASSERT_EQ(0, setting.value.getValue());
 }
-
--- a/src/Functions/FunctionsStringHash.cpp
+++ b/src/Functions/FunctionsStringHash.cpp
@ -293,7 +293,7 @@ struct SimHashImpl

            // we need to store the new word hash value to the oldest location.
            // for example, N = 5, array |a0|a1|a2|a3|a4|, now, a0 is the oldest location,
-            // so we need to store new word hash into location of a0, then ,this array become
+            // so we need to store new word hash into location of a0, then this array become
            // |a5|a1|a2|a3|a4|, next time, a1 become the oldest location, we need to store new
            // word hash value into location of a1, then array become |a5|a6|a2|a3|a4|
            words[offset] = BytesRef{word_start, length};
@ -793,4 +793,3 @@ REGISTER_FUNCTION(StringHash)
    factory.registerFunction<FunctionWordShingleMinHashArgCaseInsensitiveUTF8>();
 }
 }
-
--- a/src/Interpreters/TransactionLog.cpp
+++ b/src/Interpreters/TransactionLog.cpp
@ -482,7 +482,7 @@ CSN TransactionLog::finalizeCommittedTransaction(MergeTreeTransaction * txn, CSN
        bool removed = running_list.erase(txn->tid.getHash());
        if (!removed)
        {
-            LOG_ERROR(log , "I's a bug: TID {} {} doesn't exist", txn->tid.getHash(), txn->tid);
+            LOG_ERROR(log, "It's a bug: TID {} {} doesn't exist", txn->tid.getHash(), txn->tid);
            abort();
        }
    }
--- a/src/Parsers/Kusto/ParserKQLOperators.h
+++ b/src/Parsers/Kusto/ParserKQLOperators.h
@ -31,10 +31,10 @@ protected:
        not_endswith,
        endswith_cs,
        not_endswith_cs,
-        equal, //=~
-        not_equal,//!~
-        equal_cs, //=
-        not_equal_cs,//!=
+        equal, /// =~
+        not_equal, /// !~
+        equal_cs, /// =
+        not_equal_cs, /// !=
        has,
        not_has,
        has_all,
@ -49,10 +49,10 @@ protected:
        not_hassuffix,
        hassuffix_cs,
        not_hassuffix_cs,
-        in_cs,  //in
-        not_in_cs, //!in
-        in, //in~
-        not_in ,//!in~
+        in_cs, /// in
+        not_in_cs, /// !in
+        in, /// in~
+        not_in, /// !in~
        matches_regex,
        startswith,
        not_startswith,
--- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
@ -236,10 +236,10 @@ bool JSONEachRowRowInputFormat::readRow(MutableColumns & columns, RowReadExtensi

 bool JSONEachRowRowInputFormat::checkEndOfData(bool is_first_row)
 {
-    /// We consume , or \n before scanning a new row, instead scanning to next row at the end.
+    /// We consume ',' or '\n' before scanning a new row, instead scanning to next row at the end.
    /// The reason is that if we want an exact number of rows read with LIMIT x
    /// from a streaming table engine with text data format, like File or Kafka
-    /// then seeking to next ;, or \n would trigger reading of an extra row at the end.
+    /// then seeking to next ';,' or '\n' would trigger reading of an extra row at the end.

    /// Semicolon is added for convenience as it could be used at end of INSERT query.
    if (!in->eof())
--- a/utils/check-style/check-style
+++ b/utils/check-style/check-style
@ -410,3 +410,6 @@ find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep

 # The stateful directory should only contain the tests that depend on the test dataset (hits or visits).
 find $ROOT_PATH/tests/queries/1_stateful -name '*.sql' -or -name '*.sh' | grep -v '00076_system_columns_bytes' | xargs -I{} bash -c 'grep -q -P "hits|visits" "{}" || echo "The test {} does not depend on the test dataset (hits or visits table) and should be located in the 0_stateless directory. You can also add an exception to the check-style script."'
+
+# Check for bad punctuation: whitespace before comma.
+find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '\w ,' | grep -v 'bad punctuation is ok here' && echo "^ There is bad punctuation: whitespace before comma. You should write it like this: 'Hello, world!'"