From 2084d262389570f746fdd48eca89b63b8da83b6a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 8 Jan 2020 13:20:55 +0300 Subject: [PATCH] Removed useless code, added comments --- dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index da3f1df8130..8041ad4dbe7 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -636,15 +636,16 @@ bool SplitTokenExtractor::next(const char * data, size_t len, size_t * pos, size { if (isASCII(data[*pos]) && !isAlphaNumericASCII(data[*pos])) { + /// Finish current token if any if (*token_len > 0) return true; *token_start = ++*pos; } else { - const size_t sz = UTF8::seqLength(static_cast(data[*pos])); - *pos += sz; - *token_len += sz; + /// Note that UTF-8 sequence is completely consisted of non-ASCII bytes. + ++*pos; + ++*token_len; } } return *token_len > 0;