diff --git a/utils/check-style/aspell-ignore/en/casesensitive.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt similarity index 63% rename from utils/check-style/aspell-ignore/en/casesensitive.txt rename to utils/check-style/aspell-ignore/en/aspell-dict.txt index d161b80ce2f..0112aaf01da 100644 --- a/utils/check-style/aspell-ignore/en/casesensitive.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1,18 +1,26 @@ -FixedString +personal_ws-1.1 en 479 +AArch ACLs +AMQP +ASLR ASan Actian AddressSanitizer AppleClang ArrowStream AvroConfluent +CCTOOLS CLion +CMake CMakeLists CPUs CSVWithNames CSVWithNamesAndTypes CamelCase +CapnProto CentOS +ClickHouse +Config Contrib Ctrl CustomSeparated @@ -25,6 +33,8 @@ Doxygen Encodings Enum Eoan +FixedString +FreeBSD Fuzzer Fuzzers GTest @@ -39,17 +49,56 @@ Hostname IPv IntN Integrations +JSONAsString +JSONColumns +JSONColumnsWithMetadata +JSONCompact +JSONCompactColumns +JSONCompactEachRow +JSONCompactEachRowWithNames +JSONCompactEachRowWithNamesAndTypes +JSONCompactStrings +JSONCompactStringsEachRow +JSONCompactStringsEachRowWithNames +JSONCompactStringsEachRowWithNamesAndTypes +JSONEachRow +JSONEachRowWithProgress +JSONStrings +JSONStringsEachRow +JSONStringsEachRowWithProgress +JSONs +Jaeger +Jemalloc +Jepsen +KDevelop +LGPL +LOCALTIME +LOCALTIMESTAMP +LibFuzzer +LineAsString LowCardinality +MEMTABLE +MSan MacOS Memcheck MemorySanitizer MergeTree MessagePack MiB +MsgPack +Multiline +Multithreading +MySQLDump +NEKUDOTAYIM +NULLIF +NVME +NuRaft Ok OpenSUSE OpenStack OpenTelemetry +PAAMAYIM +Parsers Postgres Precompiled PrettyCompact @@ -61,17 +110,31 @@ PrettySpaceNoEscapes Protobuf ProtobufSingle QTCreator +RBAC RawBLOB RedHat RowBinary RowBinaryWithNames RowBinaryWithNamesAndTypes Runtime +SATA +SERIALIZABLE +SIMD +SMALLINT +SQLSTATE +SSSE +Schemas Stateful Submodules Subqueries TSVRaw TSan +TabSeparated +TabSeparatedRaw +TabSeparatedRawWithNames +TabSeparatedRawWithNamesAndTypes +TabSeparatedWithNames +TabSeparatedWithNamesAndTypes TargetSpecific TemplateIgnoreSpaces Testflows @@ -84,6 +147,7 @@ UIntN UPDATEs Uint Updatable +Util Valgrind Vectorized VirtualBox @@ -92,31 +156,211 @@ Woboq WriteBuffer WriteBuffers XCode +YAML +YYYY Zipkin ZooKeeper ZooKeeper's +aarch +allocator +analytics +anonymized +ansi +async +autogeneration +autostart +avro +avx +aws +backoff +backticks +benchmarking +blake blockSize +boolean +boringssl +brotli +buildable camelCase +capn +capnproto +cardinality +cassandra +cbindgen +ccache +cctz +cfg +changelog +checkouting +checksummed +checksumming +checksums +cityhash +cli +clickhouse +clickstream +cmake +codebase +codec +comparising +config +configs +contrib coroutines +cpp cppkafka cpu +croaring cronjob +csv csvwithnames csvwithnamesandtypes customseparated customseparatedwithnames +customseparatedwithnamesandtypes +cyrus +datacenter +datafiles +dataset +datasets +datetime dbms ddl +deallocation +debian +decompressor +denormals +deserialization +deserialized +destructor +destructors +dmesg +dont +dragonbox +durations +endian +enum +fastops +fcoverage +filesystem +filesystems +flatbuffers +fmtlib +formatschema +formatter +fuzzer +fuzzers +gRPC +gcem +github +glibc +googletest +grpc +grpcio +gtest +hardlinks +hdfs +heredoc +heredocs +homebrew +http +https +hyperscan +icudata +instantiation +integrational +integrations +interserver +jdbc +jemalloc +json +jsonasstring +jsoncolumns +jsoncolumnsmonoblock +jsoncompact +jsoncompactcolumns +jsoncompacteachrow +jsoncompacteachrowwithnames +jsoncompacteachrowwithnamesandtypes +jsoncompactstrings +jsoncompactstringseachrow +jsoncompactstringseachrowwithnames +jsoncompactstringseachrowwithnamesandtypes +jsoneachrow +jsoneachrowwithprogress +jsonstrings +jsonstringseachrow +jsonstringseachrowwithprogress +kafka +kafkacat +konsole +latencies +lexicographically +libFuzzer +libc +libcpuid +libcxx +libcxxabi +libdivide +libfarmhash +libfuzzer +libgsasl +libhdfs +libmetrohash +libpq +libpqxx +librdkafka +libs +libunwind +libuv +libvirt +linearizability +linearizable +lineasstring +linefeeds +linux +llvm +localhost +macOS +mariadb +miniselect +msgpack +msgpk +multiline +multithread +murmurhash +mutex +mysql +mysqldump +mysqljs +noop +nullable +num +obfuscator +odbc ok +openldap +opentelemetry +overcommit +parallelization +parallelize +parallelized +parsers +pclmulqdq +performant +poco +popcnt postfix postfixes postgresql pre prebuild prebuilt +preemptable preloaded preprocessed preprocessor +presentational prestable prettycompact prettycompactmonoblock @@ -141,6 +385,9 @@ readonly rebalanced replxx repo +representable +requestor +resultset rethrow risc ro @@ -151,9 +398,14 @@ rowbinarywithnames rowbinarywithnamesandtypes rsync runningAccumulate +runtime russian rw +sasl +schemas simdjson +skippingerrors +sparsehash sql src stacktraces @@ -175,12 +427,19 @@ subpatterns subqueries subquery subseconds +substring subtree sudo symlink symlinks syntaxes systemd +tabseparated +tabseparatedraw +tabseparatedrawwithnames +tabseparatedrawwithnamesandtypes +tabseparatedwithnames +tabseparatedwithnamesandtypes tcp templateignorespaces tgz @@ -199,7 +458,7 @@ unencrypted unixodbc url userspace -usr +utils variadic varint vectorized @@ -208,6 +467,8 @@ wchs webpage webserver wget +whitespace +whitespaces wrt xcode xml @@ -217,126 +478,3 @@ zkcopy zlib znodes zstd -datacenter -datafiles -dataset -datasets -datetime -deallocation -libmetrohash -libpq -libpqxx -librdkafka -libs -libunwind -libuv -libvirt -linearizability -linearizable -LineAsString -llvm -localhost -macOS -mutex -mysql -MySQLDump -comparising -contrib -decompressor -deserialization -deserialized -destructor -destructors -dmesg -dragonbox -durations -endian -enum -fastops -fcoverage -filesystems -flatbuffers -fmtlib -formatter -FreeBSD -fuzzer -fuzzers -gcem -github -glibc -googletest -gRPC -grpc -grpcio -gtest -hardlinks -heredoc -heredocs -http -https -NuRaft -odbc -parallelization -parallelize -parallelized -Parsers -parsers -pclmulqdq -performant -poco -popcnt -presentational -representable -requestor -resultset -runtime -Jemalloc -jemalloc -Jepsen -KDevelop -konsole -libcxx -libcxxabi -libdivide -libfarmhash -LibFuzzer -libFuzzer -libfuzzer -libgsasl -libhdfs -linefeeds -mariadb -miniselect -MSan -MsgPack -msgpack -msgpk -Multiline -multiline -multithread -Multithreading -murmurhash -mysqljs -openldap -opentelemetry -overcommit -integrational -interserver -Jaeger -jdbc -kafka -kafkacat -lexicographically -libc -libcpuid -sasl -Schemas -schemas -sparsehash -croaring -cyrus -denormals -hyperscan -icudata -instantiation -integrations diff --git a/utils/check-style/aspell-ignore/en/caseinsensitive.txt b/utils/check-style/aspell-ignore/en/caseinsensitive.txt deleted file mode 100644 index ab067370a7c..00000000000 --- a/utils/check-style/aspell-ignore/en/caseinsensitive.txt +++ /dev/null @@ -1,54 +0,0 @@ -aarch -allocator -analytics -anonymized -ansi -async -autogeneration -autostart -avro -avx -aws -backoff -backticks -benchmarking -blake -boolean -boringssl -brotli -buildable -capn -capnproto -cardinality -cassandra -cbindgen -ccache -cctz -cfg -changelog -checkouting -checksummed -checksumming -checksums -cityhash -cli -clickhouse -clickstream -cmake -codebase -codec -configs? -cpp -csv -filesystem -json[a-z]* -latencies -noop -nullable -num -obfuscator -preemptable -substring -tabseparated[a-z]* -utils? -whitespaces? diff --git a/utils/check-style/aspell-ignore/en/todo.txt b/utils/check-style/aspell-ignore/en/todo.txt deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/utils/check-style/check-doc-aspell b/utils/check-style/check-doc-aspell index 63535e81640..d39769aa930 100755 --- a/utils/check-style/check-doc-aspell +++ b/utils/check-style/check-doc-aspell @@ -1,24 +1,37 @@ #!/usr/bin/env bash # Perform spell checking on the docs -# Files casesensitive.txt and caseinsensitive.txt contains words to ignore (case insensitive and sensitive respectively) -# File todo.txt needs to be revised which words is actual misspellings + +if [[ ${1:-} == "--help" ]] || [[ ${1:-} == "-h" ]]; then + echo "Usage $0 [--help|-h] [-i]" + echo " --help|-h: print this help" + echo " -i: interactive mode" + exit 0 +fi ROOT_PATH=$(git rev-parse --show-toplevel) -CHECK_LANG=${1:-en} +CHECK_LANG=en ASPELL_IGNORE_PATH="${ROOT_PATH}/utils/check-style/aspell-ignore/${CHECK_LANG}" STATUS=0 for fname in ${ROOT_PATH}/docs/${CHECK_LANG}/**/*.md; do - # vvv ---- remove anchors ---- vvv - errors=$(cat "$fname" | sed -E 's/(^#.*) \{#[a-z-]+\}$/\1/' \ - | aspell list --add-sgml-skip=code --encoding=utf-8 --mode=markdown -W 3 --lang=${CHECK_LANG} --home-dir=${ASPELL_IGNORE_PATH} \ - | grep -Ewv -f "${ASPELL_IGNORE_PATH}/todo.txt" \ - | grep -Ewvi -f "${ASPELL_IGNORE_PATH}/caseinsensitive.txt" \ - | grep -Ewv -f "${ASPELL_IGNORE_PATH}/casesensitive.txt" \ - | grep -Ewv "[A-Z]+" \ + if [[ ${1:-} == "-i" ]]; then + echo "Checking $fname" + aspell --personal=aspell-dict.txt --add-sgml-skip=code --encoding=utf-8 --mode=markdown -W 3 --lang=${CHECK_LANG} --home-dir=${ASPELL_IGNORE_PATH} -c "$fname" + continue + fi + + errors=$(cat "$fname" \ + | aspell list \ + -W 3 \ + --personal=aspell-dict.txt \ + --add-sgml-skip=code \ + --encoding=utf-8 \ + --mode=markdown \ + --lang=${CHECK_LANG} \ + --home-dir=${ASPELL_IGNORE_PATH} \ | sort | uniq) if [ ! -z "$errors" ]; then STATUS=1 @@ -27,4 +40,10 @@ for fname in ${ROOT_PATH}/docs/${CHECK_LANG}/**/*.md; do fi done +if (( STATUS != 0 )); then + echo "====== Errors found ======" + echo "To exclude some words add them to the dictionary file \"${ASPELL_IGNORE_PATH}/aspell-dict.txt\"" + echo "You can also run ${0} -i to see the errors interactively and fix them or add to the dictionary file" +fi + exit ${STATUS}