mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
Better check-doc-aspell
This commit is contained in:
parent
27ebad89c5
commit
226fcbdac7
@ -1,18 +1,26 @@
|
||||
FixedString
|
||||
personal_ws-1.1 en 479
|
||||
AArch
|
||||
ACLs
|
||||
AMQP
|
||||
ASLR
|
||||
ASan
|
||||
Actian
|
||||
AddressSanitizer
|
||||
AppleClang
|
||||
ArrowStream
|
||||
AvroConfluent
|
||||
CCTOOLS
|
||||
CLion
|
||||
CMake
|
||||
CMakeLists
|
||||
CPUs
|
||||
CSVWithNames
|
||||
CSVWithNamesAndTypes
|
||||
CamelCase
|
||||
CapnProto
|
||||
CentOS
|
||||
ClickHouse
|
||||
Config
|
||||
Contrib
|
||||
Ctrl
|
||||
CustomSeparated
|
||||
@ -25,6 +33,8 @@ Doxygen
|
||||
Encodings
|
||||
Enum
|
||||
Eoan
|
||||
FixedString
|
||||
FreeBSD
|
||||
Fuzzer
|
||||
Fuzzers
|
||||
GTest
|
||||
@ -39,17 +49,56 @@ Hostname
|
||||
IPv
|
||||
IntN
|
||||
Integrations
|
||||
JSONAsString
|
||||
JSONColumns
|
||||
JSONColumnsWithMetadata
|
||||
JSONCompact
|
||||
JSONCompactColumns
|
||||
JSONCompactEachRow
|
||||
JSONCompactEachRowWithNames
|
||||
JSONCompactEachRowWithNamesAndTypes
|
||||
JSONCompactStrings
|
||||
JSONCompactStringsEachRow
|
||||
JSONCompactStringsEachRowWithNames
|
||||
JSONCompactStringsEachRowWithNamesAndTypes
|
||||
JSONEachRow
|
||||
JSONEachRowWithProgress
|
||||
JSONStrings
|
||||
JSONStringsEachRow
|
||||
JSONStringsEachRowWithProgress
|
||||
JSONs
|
||||
Jaeger
|
||||
Jemalloc
|
||||
Jepsen
|
||||
KDevelop
|
||||
LGPL
|
||||
LOCALTIME
|
||||
LOCALTIMESTAMP
|
||||
LibFuzzer
|
||||
LineAsString
|
||||
LowCardinality
|
||||
MEMTABLE
|
||||
MSan
|
||||
MacOS
|
||||
Memcheck
|
||||
MemorySanitizer
|
||||
MergeTree
|
||||
MessagePack
|
||||
MiB
|
||||
MsgPack
|
||||
Multiline
|
||||
Multithreading
|
||||
MySQLDump
|
||||
NEKUDOTAYIM
|
||||
NULLIF
|
||||
NVME
|
||||
NuRaft
|
||||
Ok
|
||||
OpenSUSE
|
||||
OpenStack
|
||||
OpenTelemetry
|
||||
PAAMAYIM
|
||||
Parsers
|
||||
Postgres
|
||||
Precompiled
|
||||
PrettyCompact
|
||||
@ -61,17 +110,31 @@ PrettySpaceNoEscapes
|
||||
Protobuf
|
||||
ProtobufSingle
|
||||
QTCreator
|
||||
RBAC
|
||||
RawBLOB
|
||||
RedHat
|
||||
RowBinary
|
||||
RowBinaryWithNames
|
||||
RowBinaryWithNamesAndTypes
|
||||
Runtime
|
||||
SATA
|
||||
SERIALIZABLE
|
||||
SIMD
|
||||
SMALLINT
|
||||
SQLSTATE
|
||||
SSSE
|
||||
Schemas
|
||||
Stateful
|
||||
Submodules
|
||||
Subqueries
|
||||
TSVRaw
|
||||
TSan
|
||||
TabSeparated
|
||||
TabSeparatedRaw
|
||||
TabSeparatedRawWithNames
|
||||
TabSeparatedRawWithNamesAndTypes
|
||||
TabSeparatedWithNames
|
||||
TabSeparatedWithNamesAndTypes
|
||||
TargetSpecific
|
||||
TemplateIgnoreSpaces
|
||||
Testflows
|
||||
@ -84,6 +147,7 @@ UIntN
|
||||
UPDATEs
|
||||
Uint
|
||||
Updatable
|
||||
Util
|
||||
Valgrind
|
||||
Vectorized
|
||||
VirtualBox
|
||||
@ -92,31 +156,211 @@ Woboq
|
||||
WriteBuffer
|
||||
WriteBuffers
|
||||
XCode
|
||||
YAML
|
||||
YYYY
|
||||
Zipkin
|
||||
ZooKeeper
|
||||
ZooKeeper's
|
||||
aarch
|
||||
allocator
|
||||
analytics
|
||||
anonymized
|
||||
ansi
|
||||
async
|
||||
autogeneration
|
||||
autostart
|
||||
avro
|
||||
avx
|
||||
aws
|
||||
backoff
|
||||
backticks
|
||||
benchmarking
|
||||
blake
|
||||
blockSize
|
||||
boolean
|
||||
boringssl
|
||||
brotli
|
||||
buildable
|
||||
camelCase
|
||||
capn
|
||||
capnproto
|
||||
cardinality
|
||||
cassandra
|
||||
cbindgen
|
||||
ccache
|
||||
cctz
|
||||
cfg
|
||||
changelog
|
||||
checkouting
|
||||
checksummed
|
||||
checksumming
|
||||
checksums
|
||||
cityhash
|
||||
cli
|
||||
clickhouse
|
||||
clickstream
|
||||
cmake
|
||||
codebase
|
||||
codec
|
||||
comparising
|
||||
config
|
||||
configs
|
||||
contrib
|
||||
coroutines
|
||||
cpp
|
||||
cppkafka
|
||||
cpu
|
||||
croaring
|
||||
cronjob
|
||||
csv
|
||||
csvwithnames
|
||||
csvwithnamesandtypes
|
||||
customseparated
|
||||
customseparatedwithnames
|
||||
customseparatedwithnamesandtypes
|
||||
cyrus
|
||||
datacenter
|
||||
datafiles
|
||||
dataset
|
||||
datasets
|
||||
datetime
|
||||
dbms
|
||||
ddl
|
||||
deallocation
|
||||
debian
|
||||
decompressor
|
||||
denormals
|
||||
deserialization
|
||||
deserialized
|
||||
destructor
|
||||
destructors
|
||||
dmesg
|
||||
dont
|
||||
dragonbox
|
||||
durations
|
||||
endian
|
||||
enum
|
||||
fastops
|
||||
fcoverage
|
||||
filesystem
|
||||
filesystems
|
||||
flatbuffers
|
||||
fmtlib
|
||||
formatschema
|
||||
formatter
|
||||
fuzzer
|
||||
fuzzers
|
||||
gRPC
|
||||
gcem
|
||||
github
|
||||
glibc
|
||||
googletest
|
||||
grpc
|
||||
grpcio
|
||||
gtest
|
||||
hardlinks
|
||||
hdfs
|
||||
heredoc
|
||||
heredocs
|
||||
homebrew
|
||||
http
|
||||
https
|
||||
hyperscan
|
||||
icudata
|
||||
instantiation
|
||||
integrational
|
||||
integrations
|
||||
interserver
|
||||
jdbc
|
||||
jemalloc
|
||||
json
|
||||
jsonasstring
|
||||
jsoncolumns
|
||||
jsoncolumnsmonoblock
|
||||
jsoncompact
|
||||
jsoncompactcolumns
|
||||
jsoncompacteachrow
|
||||
jsoncompacteachrowwithnames
|
||||
jsoncompacteachrowwithnamesandtypes
|
||||
jsoncompactstrings
|
||||
jsoncompactstringseachrow
|
||||
jsoncompactstringseachrowwithnames
|
||||
jsoncompactstringseachrowwithnamesandtypes
|
||||
jsoneachrow
|
||||
jsoneachrowwithprogress
|
||||
jsonstrings
|
||||
jsonstringseachrow
|
||||
jsonstringseachrowwithprogress
|
||||
kafka
|
||||
kafkacat
|
||||
konsole
|
||||
latencies
|
||||
lexicographically
|
||||
libFuzzer
|
||||
libc
|
||||
libcpuid
|
||||
libcxx
|
||||
libcxxabi
|
||||
libdivide
|
||||
libfarmhash
|
||||
libfuzzer
|
||||
libgsasl
|
||||
libhdfs
|
||||
libmetrohash
|
||||
libpq
|
||||
libpqxx
|
||||
librdkafka
|
||||
libs
|
||||
libunwind
|
||||
libuv
|
||||
libvirt
|
||||
linearizability
|
||||
linearizable
|
||||
lineasstring
|
||||
linefeeds
|
||||
linux
|
||||
llvm
|
||||
localhost
|
||||
macOS
|
||||
mariadb
|
||||
miniselect
|
||||
msgpack
|
||||
msgpk
|
||||
multiline
|
||||
multithread
|
||||
murmurhash
|
||||
mutex
|
||||
mysql
|
||||
mysqldump
|
||||
mysqljs
|
||||
noop
|
||||
nullable
|
||||
num
|
||||
obfuscator
|
||||
odbc
|
||||
ok
|
||||
openldap
|
||||
opentelemetry
|
||||
overcommit
|
||||
parallelization
|
||||
parallelize
|
||||
parallelized
|
||||
parsers
|
||||
pclmulqdq
|
||||
performant
|
||||
poco
|
||||
popcnt
|
||||
postfix
|
||||
postfixes
|
||||
postgresql
|
||||
pre
|
||||
prebuild
|
||||
prebuilt
|
||||
preemptable
|
||||
preloaded
|
||||
preprocessed
|
||||
preprocessor
|
||||
presentational
|
||||
prestable
|
||||
prettycompact
|
||||
prettycompactmonoblock
|
||||
@ -141,6 +385,9 @@ readonly
|
||||
rebalanced
|
||||
replxx
|
||||
repo
|
||||
representable
|
||||
requestor
|
||||
resultset
|
||||
rethrow
|
||||
risc
|
||||
ro
|
||||
@ -151,9 +398,14 @@ rowbinarywithnames
|
||||
rowbinarywithnamesandtypes
|
||||
rsync
|
||||
runningAccumulate
|
||||
runtime
|
||||
russian
|
||||
rw
|
||||
sasl
|
||||
schemas
|
||||
simdjson
|
||||
skippingerrors
|
||||
sparsehash
|
||||
sql
|
||||
src
|
||||
stacktraces
|
||||
@ -175,12 +427,19 @@ subpatterns
|
||||
subqueries
|
||||
subquery
|
||||
subseconds
|
||||
substring
|
||||
subtree
|
||||
sudo
|
||||
symlink
|
||||
symlinks
|
||||
syntaxes
|
||||
systemd
|
||||
tabseparated
|
||||
tabseparatedraw
|
||||
tabseparatedrawwithnames
|
||||
tabseparatedrawwithnamesandtypes
|
||||
tabseparatedwithnames
|
||||
tabseparatedwithnamesandtypes
|
||||
tcp
|
||||
templateignorespaces
|
||||
tgz
|
||||
@ -199,7 +458,7 @@ unencrypted
|
||||
unixodbc
|
||||
url
|
||||
userspace
|
||||
usr
|
||||
utils
|
||||
variadic
|
||||
varint
|
||||
vectorized
|
||||
@ -208,6 +467,8 @@ wchs
|
||||
webpage
|
||||
webserver
|
||||
wget
|
||||
whitespace
|
||||
whitespaces
|
||||
wrt
|
||||
xcode
|
||||
xml
|
||||
@ -217,126 +478,3 @@ zkcopy
|
||||
zlib
|
||||
znodes
|
||||
zstd
|
||||
datacenter
|
||||
datafiles
|
||||
dataset
|
||||
datasets
|
||||
datetime
|
||||
deallocation
|
||||
libmetrohash
|
||||
libpq
|
||||
libpqxx
|
||||
librdkafka
|
||||
libs
|
||||
libunwind
|
||||
libuv
|
||||
libvirt
|
||||
linearizability
|
||||
linearizable
|
||||
LineAsString
|
||||
llvm
|
||||
localhost
|
||||
macOS
|
||||
mutex
|
||||
mysql
|
||||
MySQLDump
|
||||
comparising
|
||||
contrib
|
||||
decompressor
|
||||
deserialization
|
||||
deserialized
|
||||
destructor
|
||||
destructors
|
||||
dmesg
|
||||
dragonbox
|
||||
durations
|
||||
endian
|
||||
enum
|
||||
fastops
|
||||
fcoverage
|
||||
filesystems
|
||||
flatbuffers
|
||||
fmtlib
|
||||
formatter
|
||||
FreeBSD
|
||||
fuzzer
|
||||
fuzzers
|
||||
gcem
|
||||
github
|
||||
glibc
|
||||
googletest
|
||||
gRPC
|
||||
grpc
|
||||
grpcio
|
||||
gtest
|
||||
hardlinks
|
||||
heredoc
|
||||
heredocs
|
||||
http
|
||||
https
|
||||
NuRaft
|
||||
odbc
|
||||
parallelization
|
||||
parallelize
|
||||
parallelized
|
||||
Parsers
|
||||
parsers
|
||||
pclmulqdq
|
||||
performant
|
||||
poco
|
||||
popcnt
|
||||
presentational
|
||||
representable
|
||||
requestor
|
||||
resultset
|
||||
runtime
|
||||
Jemalloc
|
||||
jemalloc
|
||||
Jepsen
|
||||
KDevelop
|
||||
konsole
|
||||
libcxx
|
||||
libcxxabi
|
||||
libdivide
|
||||
libfarmhash
|
||||
LibFuzzer
|
||||
libFuzzer
|
||||
libfuzzer
|
||||
libgsasl
|
||||
libhdfs
|
||||
linefeeds
|
||||
mariadb
|
||||
miniselect
|
||||
MSan
|
||||
MsgPack
|
||||
msgpack
|
||||
msgpk
|
||||
Multiline
|
||||
multiline
|
||||
multithread
|
||||
Multithreading
|
||||
murmurhash
|
||||
mysqljs
|
||||
openldap
|
||||
opentelemetry
|
||||
overcommit
|
||||
integrational
|
||||
interserver
|
||||
Jaeger
|
||||
jdbc
|
||||
kafka
|
||||
kafkacat
|
||||
lexicographically
|
||||
libc
|
||||
libcpuid
|
||||
sasl
|
||||
Schemas
|
||||
schemas
|
||||
sparsehash
|
||||
croaring
|
||||
cyrus
|
||||
denormals
|
||||
hyperscan
|
||||
icudata
|
||||
instantiation
|
||||
integrations
|
@ -1,54 +0,0 @@
|
||||
aarch
|
||||
allocator
|
||||
analytics
|
||||
anonymized
|
||||
ansi
|
||||
async
|
||||
autogeneration
|
||||
autostart
|
||||
avro
|
||||
avx
|
||||
aws
|
||||
backoff
|
||||
backticks
|
||||
benchmarking
|
||||
blake
|
||||
boolean
|
||||
boringssl
|
||||
brotli
|
||||
buildable
|
||||
capn
|
||||
capnproto
|
||||
cardinality
|
||||
cassandra
|
||||
cbindgen
|
||||
ccache
|
||||
cctz
|
||||
cfg
|
||||
changelog
|
||||
checkouting
|
||||
checksummed
|
||||
checksumming
|
||||
checksums
|
||||
cityhash
|
||||
cli
|
||||
clickhouse
|
||||
clickstream
|
||||
cmake
|
||||
codebase
|
||||
codec
|
||||
configs?
|
||||
cpp
|
||||
csv
|
||||
filesystem
|
||||
json[a-z]*
|
||||
latencies
|
||||
noop
|
||||
nullable
|
||||
num
|
||||
obfuscator
|
||||
preemptable
|
||||
substring
|
||||
tabseparated[a-z]*
|
||||
utils?
|
||||
whitespaces?
|
@ -1,24 +1,37 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Perform spell checking on the docs
|
||||
# Files casesensitive.txt and caseinsensitive.txt contains words to ignore (case insensitive and sensitive respectively)
|
||||
# File todo.txt needs to be revised which words is actual misspellings
|
||||
|
||||
if [[ ${1:-} == "--help" ]] || [[ ${1:-} == "-h" ]]; then
|
||||
echo "Usage $0 [--help|-h] [-i]"
|
||||
echo " --help|-h: print this help"
|
||||
echo " -i: interactive mode"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
ROOT_PATH=$(git rev-parse --show-toplevel)
|
||||
|
||||
CHECK_LANG=${1:-en}
|
||||
CHECK_LANG=en
|
||||
|
||||
ASPELL_IGNORE_PATH="${ROOT_PATH}/utils/check-style/aspell-ignore/${CHECK_LANG}"
|
||||
|
||||
STATUS=0
|
||||
for fname in ${ROOT_PATH}/docs/${CHECK_LANG}/**/*.md; do
|
||||
# vvv ---- remove anchors ---- vvv
|
||||
errors=$(cat "$fname" | sed -E 's/(^#.*) \{#[a-z-]+\}$/\1/' \
|
||||
| aspell list --add-sgml-skip=code --encoding=utf-8 --mode=markdown -W 3 --lang=${CHECK_LANG} --home-dir=${ASPELL_IGNORE_PATH} \
|
||||
| grep -Ewv -f "${ASPELL_IGNORE_PATH}/todo.txt" \
|
||||
| grep -Ewvi -f "${ASPELL_IGNORE_PATH}/caseinsensitive.txt" \
|
||||
| grep -Ewv -f "${ASPELL_IGNORE_PATH}/casesensitive.txt" \
|
||||
| grep -Ewv "[A-Z]+" \
|
||||
if [[ ${1:-} == "-i" ]]; then
|
||||
echo "Checking $fname"
|
||||
aspell --personal=aspell-dict.txt --add-sgml-skip=code --encoding=utf-8 --mode=markdown -W 3 --lang=${CHECK_LANG} --home-dir=${ASPELL_IGNORE_PATH} -c "$fname"
|
||||
continue
|
||||
fi
|
||||
|
||||
errors=$(cat "$fname" \
|
||||
| aspell list \
|
||||
-W 3 \
|
||||
--personal=aspell-dict.txt \
|
||||
--add-sgml-skip=code \
|
||||
--encoding=utf-8 \
|
||||
--mode=markdown \
|
||||
--lang=${CHECK_LANG} \
|
||||
--home-dir=${ASPELL_IGNORE_PATH} \
|
||||
| sort | uniq)
|
||||
if [ ! -z "$errors" ]; then
|
||||
STATUS=1
|
||||
@ -27,4 +40,10 @@ for fname in ${ROOT_PATH}/docs/${CHECK_LANG}/**/*.md; do
|
||||
fi
|
||||
done
|
||||
|
||||
if (( STATUS != 0 )); then
|
||||
echo "====== Errors found ======"
|
||||
echo "To exclude some words add them to the dictionary file \"${ASPELL_IGNORE_PATH}/aspell-dict.txt\""
|
||||
echo "You can also run ${0} -i to see the errors interactively and fix them or add to the dictionary file"
|
||||
fi
|
||||
|
||||
exit ${STATUS}
|
||||
|
Loading…
Reference in New Issue
Block a user