ClickHouse/tests/queries/0_stateless/00990_hasToken.python

202 lines
5.9 KiB
Plaintext
Raw Normal View History

2020-10-02 16:54:07 +00:00
#!/usr/bin/env python3
# encoding: utf-8
import re
from string import Template
HAYSTACKS = [
"hay hay hay hay hay hay hay hay hay needle hay hay hay hay hay hay hay hay hay",
"hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay needle",
"needle hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay",
]
NEEDLE = "needle"
2023-03-23 15:33:23 +00:00
HAY_RE = re.compile(r"\bhay\b", re.IGNORECASE)
NEEDLE_RE = re.compile(r"\bneedle\b", re.IGNORECASE)
def replace_follow_case(replacement):
def func(match):
g = match.group()
2023-03-23 15:33:23 +00:00
if g.islower():
return replacement.lower()
if g.istitle():
return replacement.title()
if g.isupper():
return replacement.upper()
return replacement
2023-03-23 15:33:23 +00:00
return func
2023-03-23 15:33:23 +00:00
def replace_separators(query, new_sep):
2023-03-23 15:33:23 +00:00
SEP_RE = re.compile("\\s+")
result = SEP_RE.sub(new_sep, query)
return result
2023-03-23 15:33:23 +00:00
def enlarge_haystack(query, times, separator=""):
return HAY_RE.sub(replace_follow_case(("hay" + separator) * times), query)
def small_needle(query):
2023-03-23 15:33:23 +00:00
return NEEDLE_RE.sub(replace_follow_case("n"), query)
def remove_needle(query):
2023-03-23 15:33:23 +00:00
return NEEDLE_RE.sub("", query)
def replace_needle(query, new_needle):
return NEEDLE_RE.sub(new_needle, query)
2023-03-23 15:33:23 +00:00
# with str.lower, str.uppert, str.title and such
def transform_needle(query, string_transformation_func):
def replace_with_transformation(match):
g = match.group()
return string_transformation_func(g)
return NEEDLE_RE.sub(replace_with_transformation, query)
2023-03-23 15:33:23 +00:00
def create_cases(
case_sensitive_func,
case_insensitive_func,
table_row_template,
table_query_template,
const_query_template,
):
const_queries = []
table_rows = []
table_queries = set()
def add_case(func, haystack, needle, match):
match = int(match)
2023-03-23 15:33:23 +00:00
args = dict(func=func, haystack=haystack, needle=needle, match=match)
const_queries.append(const_query_template.substitute(args))
table_queries.add(table_query_template.substitute(args))
table_rows.append(table_row_template.substitute(args))
def add_case_sensitive(haystack, needle, match):
add_case(case_sensitive_func, haystack, needle, match)
if match:
2023-03-23 15:33:23 +00:00
add_case(
case_sensitive_func,
transform_needle(haystack, str.swapcase),
transform_needle(needle, str.swapcase),
match,
)
def add_case_insensitive(haystack, needle, match):
add_case(case_insensitive_func, haystack, needle, match)
if match:
2023-03-23 15:33:23 +00:00
add_case(
case_insensitive_func,
transform_needle(haystack, str.swapcase),
needle,
match,
)
add_case(
case_insensitive_func,
haystack,
transform_needle(needle, str.swapcase),
match,
)
# Negative cases
add_case_sensitive(remove_needle(HAYSTACKS[0]), NEEDLE, False)
add_case_insensitive(remove_needle(HAYSTACKS[0]), NEEDLE, False)
for haystack in HAYSTACKS:
add_case_sensitive(transform_needle(haystack, str.swapcase), NEEDLE, False)
2023-03-23 15:33:23 +00:00
sep = ""
h = replace_separators(haystack, sep)
add_case_sensitive(h, NEEDLE, False)
add_case_insensitive(h, NEEDLE, False)
add_case_sensitive(small_needle(h), small_needle(NEEDLE), False)
add_case_insensitive(small_needle(h), small_needle(NEEDLE), False)
add_case_sensitive(enlarge_haystack(h, 10, sep), NEEDLE, False)
add_case_insensitive(enlarge_haystack(h, 10, sep), NEEDLE, False)
# positive cases
for haystack in HAYSTACKS:
add_case_sensitive(haystack, NEEDLE, True)
add_case_insensitive(haystack, NEEDLE, True)
2023-03-23 15:33:23 +00:00
for sep in list(""" ,"""):
h = replace_separators(haystack, sep)
add_case_sensitive(h, NEEDLE, True)
add_case_sensitive(small_needle(h), small_needle(NEEDLE), True)
add_case_sensitive(enlarge_haystack(h, 200, sep), NEEDLE, True)
add_case_insensitive(h, NEEDLE, True)
add_case_insensitive(small_needle(h), small_needle(NEEDLE), True)
add_case_insensitive(enlarge_haystack(h, 200, sep), NEEDLE, True)
# case insesitivity works only on ASCII strings
2023-03-23 15:33:23 +00:00
add_case_sensitive(
replace_needle(h, "иголка"), replace_needle(NEEDLE, "иголка"), True
)
add_case_sensitive(
replace_needle(h, "指针"), replace_needle(NEEDLE, "指针"), True
)
for sep in list("""~!@$%^&*()-=+|]}[{";:/?.><\t""") + [r"\\\\"]:
h = replace_separators(HAYSTACKS[0], sep)
add_case(case_sensitive_func, h, NEEDLE, True)
return table_rows, table_queries, const_queries
2023-03-23 15:33:23 +00:00
def main():
def query(x):
2020-10-02 16:54:07 +00:00
print(x)
2023-03-23 15:33:23 +00:00
CONST_QUERY = Template(
"""SELECT ${func}('${haystack}', '${needle}'), ' expecting ', ${match};"""
)
TABLE_QUERY = Template(
"""WITH '${needle}' as n
SELECT haystack, needle, ${func}(haystack, n) as result
FROM ht
2023-03-23 15:33:23 +00:00
WHERE func = '${func}' AND needle = n AND result != match;"""
)
TABLE_ROW = Template("""('${haystack}', '${needle}', ${match}, '${func}')""")
2023-03-23 15:33:23 +00:00
rows, table_queries, const_queries = create_cases(
"hasToken", "hasTokenCaseInsensitive", TABLE_ROW, TABLE_QUERY, CONST_QUERY
)
for q in const_queries:
query(q)
2023-03-23 15:33:23 +00:00
query(
"""DROP TABLE IF EXISTS ht;
CREATE TABLE IF NOT EXISTS
ht
(
haystack String,
needle String,
match UInt8,
func String
)
ENGINE MergeTree()
ORDER BY haystack;
2023-03-23 15:33:23 +00:00
INSERT INTO ht VALUES {values};""".format(
values=", ".join(rows)
)
)
for q in sorted(table_queries):
query(q)
query("""DROP TABLE ht""")
2023-03-23 15:33:23 +00:00
if __name__ == "__main__":
main()