mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-05 15:21:43 +00:00
161 lines
5.5 KiB
Python
161 lines
5.5 KiB
Python
#!/usr/bin/env python3
|
|
# encoding: utf-8
|
|
|
|
import re
|
|
from string import Template
|
|
|
|
HAYSTACKS = [
|
|
"hay hay hay hay hay hay hay hay hay needle hay hay hay hay hay hay hay hay hay",
|
|
"hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay needle",
|
|
"needle hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay",
|
|
]
|
|
|
|
NEEDLE = "needle"
|
|
|
|
HAY_RE = re.compile(r'\bhay\b', re.IGNORECASE)
|
|
NEEDLE_RE = re.compile(r'\bneedle\b', re.IGNORECASE)
|
|
|
|
def replace_follow_case(replacement):
|
|
def func(match):
|
|
g = match.group()
|
|
if g.islower(): return replacement.lower()
|
|
if g.istitle(): return replacement.title()
|
|
if g.isupper(): return replacement.upper()
|
|
return replacement
|
|
return func
|
|
|
|
def replace_separators(query, new_sep):
|
|
SEP_RE = re.compile('\\s+')
|
|
result = SEP_RE.sub(new_sep, query)
|
|
return result
|
|
|
|
def enlarge_haystack(query, times, separator=''):
|
|
return HAY_RE.sub(replace_follow_case(('hay' + separator) * times), query)
|
|
|
|
def small_needle(query):
|
|
return NEEDLE_RE.sub(replace_follow_case('n'), query)
|
|
|
|
def remove_needle(query):
|
|
return NEEDLE_RE.sub('', query)
|
|
|
|
def replace_needle(query, new_needle):
|
|
return NEEDLE_RE.sub(new_needle, query)
|
|
|
|
# with str.lower, str.uppert, str.title and such
|
|
def transform_needle(query, string_transformation_func):
|
|
def replace_with_transformation(match):
|
|
g = match.group()
|
|
return string_transformation_func(g)
|
|
|
|
return NEEDLE_RE.sub(replace_with_transformation, query)
|
|
|
|
def create_cases(case_sensitive_func, case_insensitive_func, table_row_template, table_query_template, const_query_template):
|
|
const_queries = []
|
|
table_rows = []
|
|
table_queries = set()
|
|
|
|
def add_case(func, haystack, needle, match):
|
|
match = int(match)
|
|
args = dict(
|
|
func = func,
|
|
haystack = haystack,
|
|
needle = needle,
|
|
match = match
|
|
)
|
|
const_queries.append(const_query_template.substitute(args))
|
|
table_queries.add(table_query_template.substitute(args))
|
|
table_rows.append(table_row_template.substitute(args))
|
|
|
|
def add_case_sensitive(haystack, needle, match):
|
|
add_case(case_sensitive_func, haystack, needle, match)
|
|
if match:
|
|
add_case(case_sensitive_func, transform_needle(haystack, str.swapcase), transform_needle(needle, str.swapcase), match)
|
|
|
|
def add_case_insensitive(haystack, needle, match):
|
|
add_case(case_insensitive_func, haystack, needle, match)
|
|
if match:
|
|
add_case(case_insensitive_func, transform_needle(haystack, str.swapcase), needle, match)
|
|
add_case(case_insensitive_func, haystack, transform_needle(needle, str.swapcase), match)
|
|
|
|
|
|
# Negative cases
|
|
add_case_sensitive(remove_needle(HAYSTACKS[0]), NEEDLE, False)
|
|
add_case_insensitive(remove_needle(HAYSTACKS[0]), NEEDLE, False)
|
|
|
|
for haystack in HAYSTACKS:
|
|
add_case_sensitive(transform_needle(haystack, str.swapcase), NEEDLE, False)
|
|
|
|
sep = ''
|
|
h = replace_separators(haystack, sep)
|
|
|
|
add_case_sensitive(h, NEEDLE, False)
|
|
add_case_insensitive(h, NEEDLE, False)
|
|
|
|
add_case_sensitive(small_needle(h), small_needle(NEEDLE), False)
|
|
add_case_insensitive(small_needle(h), small_needle(NEEDLE), False)
|
|
|
|
add_case_sensitive(enlarge_haystack(h, 10, sep), NEEDLE, False)
|
|
add_case_insensitive(enlarge_haystack(h, 10, sep), NEEDLE, False)
|
|
|
|
# positive cases
|
|
for haystack in HAYSTACKS:
|
|
add_case_sensitive(haystack, NEEDLE, True)
|
|
add_case_insensitive(haystack, NEEDLE, True)
|
|
|
|
|
|
for sep in list(''' ,'''):
|
|
h = replace_separators(haystack, sep)
|
|
add_case_sensitive(h, NEEDLE, True)
|
|
add_case_sensitive(small_needle(h), small_needle(NEEDLE), True)
|
|
add_case_sensitive(enlarge_haystack(h, 200, sep), NEEDLE, True)
|
|
|
|
add_case_insensitive(h, NEEDLE, True)
|
|
add_case_insensitive(small_needle(h), small_needle(NEEDLE), True)
|
|
add_case_insensitive(enlarge_haystack(h, 200, sep), NEEDLE, True)
|
|
|
|
# case insesitivity works only on ASCII strings
|
|
add_case_sensitive(replace_needle(h, 'иголка'), replace_needle(NEEDLE, 'иголка'), True)
|
|
add_case_sensitive(replace_needle(h, '指针'), replace_needle(NEEDLE, '指针'), True)
|
|
|
|
for sep in list('''~!@$%^&*()-=+|]}[{";:/?.><\t''') + [r'\\\\']:
|
|
h = replace_separators(HAYSTACKS[0], sep)
|
|
add_case(case_sensitive_func, h, NEEDLE, True)
|
|
|
|
return table_rows, table_queries, const_queries
|
|
|
|
def main():
|
|
|
|
def query(x):
|
|
print(x)
|
|
|
|
CONST_QUERY = Template("""SELECT ${func}('${haystack}', '${needle}'), ' expecting ', ${match};""")
|
|
TABLE_QUERY = Template("""WITH '${needle}' as n
|
|
SELECT haystack, needle, ${func}(haystack, n) as result
|
|
FROM ht
|
|
WHERE func = '${func}' AND needle = n AND result != match;""")
|
|
TABLE_ROW = Template("""('${haystack}', '${needle}', ${match}, '${func}')""")
|
|
|
|
rows, table_queries, const_queries = create_cases('hasToken', 'hasTokenCaseInsensitive', TABLE_ROW, TABLE_QUERY, CONST_QUERY)
|
|
for q in const_queries:
|
|
query(q)
|
|
|
|
query("""DROP TABLE IF EXISTS ht;
|
|
CREATE TABLE IF NOT EXISTS
|
|
ht
|
|
(
|
|
haystack String,
|
|
needle String,
|
|
match UInt8,
|
|
func String
|
|
)
|
|
ENGINE MergeTree()
|
|
ORDER BY haystack;
|
|
INSERT INTO ht VALUES {values};""".format(values=", ".join(rows)))
|
|
for q in sorted(table_queries):
|
|
query(q)
|
|
|
|
query("""DROP TABLE ht""")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|