ClickHouse/tests/queries/0_stateless/01674_htm_xml_coarse_parse.sql
zlx19950903 c92e613b82
Add a function htmlOrXmlCoarseParse to extract content from html or xml format string. (#19600)
* add html and xml coarse parse

* add test file

* add conditional check: hyperscan

* fix style error

* add conditional check

* bug fix

* delete unit

* typos check fix

* add unit test

* style check fix

* fix build error: case style

* acradis_skip test fix

* LINT error fix

* Remove comments

Co-authored-by: guojiantao <guojiantao15@mails.ucas.ac.cn>
Co-authored-by: Ivan <5627721+abyss7@users.noreply.github.com>
Co-authored-by: Ivan Lezhankin <ilezhankin@yandex-team.ru>
2021-02-18 15:05:55 +03:00

16 lines
1008 B
SQL

SELECT htmlOrXmlCoarseParse('<script>Here is script.</script>');
SELECT htmlOrXmlCoarseParse('<style>Here is style.</style>');
SELECT htmlOrXmlCoarseParse('<![CDATA[Here is CDTATA.]]>');
SELECT htmlOrXmlCoarseParse('This is a white space test.');
SELECT htmlOrXmlCoarseParse('This is a complex test. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"\n "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><![CDATA[<script type="text/javascript">Hello, world</script> ]]><hello />world<![CDATA[ <style> ]]> hello</style>\n<script><![CDATA[</script>]]>hello</script>\n</html>');
DROP TABLE IF EXISTS defaults;
CREATE TABLE defaults
(
stringColumn String
) ENGINE = Memory();
INSERT INTO defaults values ('<common tag>hello, world<tag>'), ('<script desc=content> some content </script>'), ('<![CDATA[hello, world]]>'), ('white space collapse');
SELECT htmlOrXmlCoarseParse(stringColumn) FROM defaults;
DROP table defaults;