mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-18 21:51:57 +00:00
202 lines
5.9 KiB
Python
202 lines
5.9 KiB
Python
#!/usr/bin/env python3
|
|
# encoding: utf-8
|
|
|
|
import re
|
|
from string import Template
|
|
|
|
HAYSTACKS = [
|
|
"hay hay hay hay hay hay hay hay hay needle hay hay hay hay hay hay hay hay hay",
|
|
"hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay needle",
|
|
"needle hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay",
|
|
]
|
|
|
|
NEEDLE = "needle"
|
|
|
|
HAY_RE = re.compile(r"\bhay\b", re.IGNORECASE)
|
|
NEEDLE_RE = re.compile(r"\bneedle\b", re.IGNORECASE)
|
|
|
|
|
|
def replace_follow_case(replacement):
|
|
def func(match):
|
|
g = match.group()
|
|
if g.islower():
|
|
return replacement.lower()
|
|
if g.istitle():
|
|
return replacement.title()
|
|
if g.isupper():
|
|
return replacement.upper()
|
|
return replacement
|
|
|
|
return func
|
|
|
|
|
|
def replace_separators(query, new_sep):
|
|
SEP_RE = re.compile("\\s+")
|
|
result = SEP_RE.sub(new_sep, query)
|
|
return result
|
|
|
|
|
|
def enlarge_haystack(query, times, separator=""):
|
|
return HAY_RE.sub(replace_follow_case(("hay" + separator) * times), query)
|
|
|
|
|
|
def small_needle(query):
|
|
return NEEDLE_RE.sub(replace_follow_case("n"), query)
|
|
|
|
|
|
def remove_needle(query):
|
|
return NEEDLE_RE.sub("", query)
|
|
|
|
|
|
def replace_needle(query, new_needle):
|
|
return NEEDLE_RE.sub(new_needle, query)
|
|
|
|
|
|
# with str.lower, str.uppert, str.title and such
|
|
def transform_needle(query, string_transformation_func):
|
|
def replace_with_transformation(match):
|
|
g = match.group()
|
|
return string_transformation_func(g)
|
|
|
|
return NEEDLE_RE.sub(replace_with_transformation, query)
|
|
|
|
|
|
def create_cases(
|
|
case_sensitive_func,
|
|
case_insensitive_func,
|
|
table_row_template,
|
|
table_query_template,
|
|
const_query_template,
|
|
):
|
|
const_queries = []
|
|
table_rows = []
|
|
table_queries = set()
|
|
|
|
def add_case(func, haystack, needle, match):
|
|
match = int(match)
|
|
args = dict(func=func, haystack=haystack, needle=needle, match=match)
|
|
const_queries.append(const_query_template.substitute(args))
|
|
table_queries.add(table_query_template.substitute(args))
|
|
table_rows.append(table_row_template.substitute(args))
|
|
|
|
def add_case_sensitive(haystack, needle, match):
|
|
add_case(case_sensitive_func, haystack, needle, match)
|
|
if match:
|
|
add_case(
|
|
case_sensitive_func,
|
|
transform_needle(haystack, str.swapcase),
|
|
transform_needle(needle, str.swapcase),
|
|
match,
|
|
)
|
|
|
|
def add_case_insensitive(haystack, needle, match):
|
|
add_case(case_insensitive_func, haystack, needle, match)
|
|
if match:
|
|
add_case(
|
|
case_insensitive_func,
|
|
transform_needle(haystack, str.swapcase),
|
|
needle,
|
|
match,
|
|
)
|
|
add_case(
|
|
case_insensitive_func,
|
|
haystack,
|
|
transform_needle(needle, str.swapcase),
|
|
match,
|
|
)
|
|
|
|
# Negative cases
|
|
add_case_sensitive(remove_needle(HAYSTACKS[0]), NEEDLE, False)
|
|
add_case_insensitive(remove_needle(HAYSTACKS[0]), NEEDLE, False)
|
|
|
|
for haystack in HAYSTACKS:
|
|
add_case_sensitive(transform_needle(haystack, str.swapcase), NEEDLE, False)
|
|
|
|
sep = ""
|
|
h = replace_separators(haystack, sep)
|
|
|
|
add_case_sensitive(h, NEEDLE, False)
|
|
add_case_insensitive(h, NEEDLE, False)
|
|
|
|
add_case_sensitive(small_needle(h), small_needle(NEEDLE), False)
|
|
add_case_insensitive(small_needle(h), small_needle(NEEDLE), False)
|
|
|
|
add_case_sensitive(enlarge_haystack(h, 10, sep), NEEDLE, False)
|
|
add_case_insensitive(enlarge_haystack(h, 10, sep), NEEDLE, False)
|
|
|
|
# positive cases
|
|
for haystack in HAYSTACKS:
|
|
add_case_sensitive(haystack, NEEDLE, True)
|
|
add_case_insensitive(haystack, NEEDLE, True)
|
|
|
|
for sep in list(""" ,"""):
|
|
h = replace_separators(haystack, sep)
|
|
add_case_sensitive(h, NEEDLE, True)
|
|
add_case_sensitive(small_needle(h), small_needle(NEEDLE), True)
|
|
add_case_sensitive(enlarge_haystack(h, 200, sep), NEEDLE, True)
|
|
|
|
add_case_insensitive(h, NEEDLE, True)
|
|
add_case_insensitive(small_needle(h), small_needle(NEEDLE), True)
|
|
add_case_insensitive(enlarge_haystack(h, 200, sep), NEEDLE, True)
|
|
|
|
# case insesitivity works only on ASCII strings
|
|
add_case_sensitive(
|
|
replace_needle(h, "иголка"), replace_needle(NEEDLE, "иголка"), True
|
|
)
|
|
add_case_sensitive(
|
|
replace_needle(h, "指针"), replace_needle(NEEDLE, "指针"), True
|
|
)
|
|
|
|
for sep in list("""~!@$%^&*()-=+|]}[{";:/?.><\t""") + [r"\\\\"]:
|
|
h = replace_separators(HAYSTACKS[0], sep)
|
|
add_case(case_sensitive_func, h, NEEDLE, True)
|
|
|
|
return table_rows, table_queries, const_queries
|
|
|
|
|
|
def main():
|
|
def query(x):
|
|
print(x)
|
|
|
|
CONST_QUERY = Template(
|
|
"""SELECT ${func}('${haystack}', '${needle}'), ' expecting ', ${match};"""
|
|
)
|
|
TABLE_QUERY = Template(
|
|
"""WITH '${needle}' as n
|
|
SELECT haystack, needle, ${func}(haystack, n) as result
|
|
FROM ht
|
|
WHERE func = '${func}' AND needle = n AND result != match;"""
|
|
)
|
|
TABLE_ROW = Template("""('${haystack}', '${needle}', ${match}, '${func}')""")
|
|
|
|
rows, table_queries, const_queries = create_cases(
|
|
"hasToken", "hasTokenCaseInsensitive", TABLE_ROW, TABLE_QUERY, CONST_QUERY
|
|
)
|
|
for q in const_queries:
|
|
query(q)
|
|
|
|
query(
|
|
"""DROP TABLE IF EXISTS ht;
|
|
CREATE TABLE IF NOT EXISTS
|
|
ht
|
|
(
|
|
haystack String,
|
|
needle String,
|
|
match UInt8,
|
|
func String
|
|
)
|
|
ENGINE MergeTree()
|
|
ORDER BY haystack;
|
|
INSERT INTO ht VALUES {values};""".format(
|
|
values=", ".join(rows)
|
|
)
|
|
)
|
|
for q in sorted(table_queries):
|
|
query(q)
|
|
|
|
query("""DROP TABLE ht""")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|