ClickHouse/dbms/tests/queries/0_stateless/00990_hasToken.python

161 lines
5.5 KiB
Plaintext
Raw Normal View History

#!/usr/bin/env python
# encoding: utf-8
import re
from string import Template
HAYSTACKS = [
"hay hay hay hay hay hay hay hay hay needle hay hay hay hay hay hay hay hay hay",
"hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay needle",
"needle hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay hay",
]
NEEDLE = "needle"
HAY_RE = re.compile(r'\bhay\b', re.IGNORECASE)
NEEDLE_RE = re.compile(r'\bneedle\b', re.IGNORECASE)
def replace_follow_case(replacement):
def func(match):
g = match.group()
if g.islower(): return replacement.lower()
if g.istitle(): return replacement.title()
if g.isupper(): return replacement.upper()
return replacement
return func
def replace_separators(query, new_sep):
SEP_RE = re.compile('\\s+')
result = SEP_RE.sub(new_sep, query)
return result
def enlarge_haystack(query, times, separator=''):
return HAY_RE.sub(replace_follow_case(('hay' + separator) * times), query)
def small_needle(query):
return NEEDLE_RE.sub(replace_follow_case('n'), query)
def remove_needle(query):
return NEEDLE_RE.sub('', query)
def replace_needle(query, new_needle):
return NEEDLE_RE.sub(new_needle, query)
# with str.lower, str.uppert, str.title and such
def transform_needle(query, string_transformation_func):
def replace_with_transformation(match):
g = match.group()
return string_transformation_func(g)
return NEEDLE_RE.sub(replace_with_transformation, query)
def create_cases(case_sensitive_func, case_insensitive_func, table_row_template, table_query_template, const_query_template):
const_queries = []
table_rows = []
table_queries = set()
def add_case(func, haystack, needle, match):
match = int(match)
args = dict(
func = func,
haystack = haystack,
needle = needle,
match = match
)
const_queries.append(const_query_template.substitute(args))
table_queries.add(table_query_template.substitute(args))
table_rows.append(table_row_template.substitute(args))
def add_case_sensitive(haystack, needle, match):
add_case(case_sensitive_func, haystack, needle, match)
if match:
add_case(case_sensitive_func, transform_needle(haystack, str.swapcase), transform_needle(needle, str.swapcase), match)
def add_case_insensitive(haystack, needle, match):
add_case(case_insensitive_func, haystack, needle, match)
if match:
add_case(case_insensitive_func, transform_needle(haystack, str.swapcase), needle, match)
add_case(case_insensitive_func, haystack, transform_needle(needle, str.swapcase), match)
# Negative cases
add_case_sensitive(remove_needle(HAYSTACKS[0]), NEEDLE, False)
add_case_insensitive(remove_needle(HAYSTACKS[0]), NEEDLE, False)
for haystack in HAYSTACKS:
add_case_sensitive(transform_needle(haystack, str.swapcase), NEEDLE, False)
sep = ''
h = replace_separators(haystack, sep)
add_case_sensitive(h, NEEDLE, False)
add_case_insensitive(h, NEEDLE, False)
add_case_sensitive(small_needle(h), small_needle(NEEDLE), False)
add_case_insensitive(small_needle(h), small_needle(NEEDLE), False)
add_case_sensitive(enlarge_haystack(h, 10, sep), NEEDLE, False)
add_case_insensitive(enlarge_haystack(h, 10, sep), NEEDLE, False)
# positive cases
for haystack in HAYSTACKS:
add_case_sensitive(haystack, NEEDLE, True)
add_case_insensitive(haystack, NEEDLE, True)
for sep in list(''' ,'''):
h = replace_separators(haystack, sep)
add_case_sensitive(h, NEEDLE, True)
add_case_sensitive(small_needle(h), small_needle(NEEDLE), True)
add_case_sensitive(enlarge_haystack(h, 200, sep), NEEDLE, True)
add_case_insensitive(h, NEEDLE, True)
add_case_insensitive(small_needle(h), small_needle(NEEDLE), True)
add_case_insensitive(enlarge_haystack(h, 200, sep), NEEDLE, True)
# case insesitivity works only on ASCII strings
add_case_sensitive(replace_needle(h, 'иголка'), replace_needle(NEEDLE, 'иголка'), True)
add_case_sensitive(replace_needle(h, '指针'), replace_needle(NEEDLE, '指针'), True)
for sep in list('''~!@$%^&*()-=+|]}[{";:/?.><\t''') + [r'\\\\']:
h = replace_separators(HAYSTACKS[0], sep)
add_case(case_sensitive_func, h, NEEDLE, True)
return table_rows, table_queries, const_queries
def main():
def query(x):
print x
CONST_QUERY = Template("""SELECT ${func}('${haystack}', '${needle}'), ' expecting ', ${match};""")
TABLE_QUERY = Template("""WITH '${needle}' as n
SELECT haystack, needle, ${func}(haystack, n) as result
FROM ht
WHERE func = '${func}' AND needle = n AND result != match;""")
TABLE_ROW = Template("""('${haystack}', '${needle}', ${match}, '${func}')""")
rows, table_queries, const_queries = create_cases('hasToken', 'hasTokenCaseInsensitive', TABLE_ROW, TABLE_QUERY, CONST_QUERY)
for q in const_queries:
query(q)
query("""DROP TABLE IF EXISTS ht;
CREATE TABLE IF NOT EXISTS
ht
(
haystack String,
needle String,
match UInt8,
func String
)
ENGINE MergeTree()
ORDER BY haystack;
INSERT INTO ht VALUES {values};""".format(values=", ".join(rows)))
for q in sorted(table_queries):
query(q)
query("""DROP TABLE ht""")
if __name__ == '__main__':
main()