mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-03 13:02:00 +00:00
Tests and documentation
This commit is contained in:
parent
2ac673b12a
commit
0ab4afeeed
@ -2,7 +2,7 @@
|
||||
|
||||
Here is CDTATA.
|
||||
This is a white space test.
|
||||
This is a complex test. <script type="text/javascript">Hello, world</script> world <style> hello
|
||||
This is a complex test.<script type="text/javascript">Hello, world</script> world <style> hello
|
||||
hello, world
|
||||
|
||||
hello, world
|
||||
|
@ -1,53 +1,120 @@
|
||||
-- { echo }
|
||||
|
||||
SELECT extractTextFromHTML('');
|
||||
|
||||
SELECT extractTextFromHTML(' ');
|
||||
|
||||
SELECT extractTextFromHTML(' ');
|
||||
|
||||
SELECT extractTextFromHTML('Hello');
|
||||
Hello
|
||||
SELECT extractTextFromHTML('Hello, world');
|
||||
Hello, world
|
||||
SELECT extractTextFromHTML('Hello, world');
|
||||
Hello, world
|
||||
Hello, world
|
||||
Hello, world
|
||||
Hello, world
|
||||
SELECT extractTextFromHTML(' Hello, world');
|
||||
Hello, world
|
||||
SELECT extractTextFromHTML(' Hello, world ');
|
||||
Hello, world
|
||||
SELECT extractTextFromHTML(' \t Hello,\rworld \n ');
|
||||
Hello, world
|
||||
SELECT extractTextFromHTML('Hello<world');
|
||||
Hello
|
||||
SELECT extractTextFromHTML('Hello < world');
|
||||
Hello
|
||||
SELECT extractTextFromHTML('Hello > world');
|
||||
Hello > world
|
||||
SELECT extractTextFromHTML('Hello<world>');
|
||||
Hello
|
||||
Helloworld
|
||||
Helloworld
|
||||
Helloworld
|
||||
Helloworld
|
||||
Helloworld
|
||||
Helloworld
|
||||
SELECT extractTextFromHTML('Hello<>world');
|
||||
Hello world
|
||||
SELECT extractTextFromHTML('Hello<!>world');
|
||||
Hello world
|
||||
SELECT extractTextFromHTML('Hello<!->world');
|
||||
Hello world
|
||||
SELECT extractTextFromHTML('Hello<!-->world');
|
||||
Hello world
|
||||
SELECT extractTextFromHTML('Hello<!--->world');
|
||||
Hello world
|
||||
SELECT extractTextFromHTML('Hello<!---->world');
|
||||
Hello world
|
||||
SELECT extractTextFromHTML('Hello <!-- --> World');
|
||||
Hello World
|
||||
SELECT extractTextFromHTML('Hello<!-- --> World');
|
||||
Hello World
|
||||
HelloWorld
|
||||
SELECT extractTextFromHTML('Hello<!-- -->World');
|
||||
Hello World
|
||||
SELECT extractTextFromHTML('Hello <!-- -->World');
|
||||
Hello World
|
||||
SELECT extractTextFromHTML('Hello <u> World</u>');
|
||||
Hello World
|
||||
HelloWorld
|
||||
SELECT extractTextFromHTML('Hello <u>World</u>');
|
||||
Hello World
|
||||
SELECT extractTextFromHTML('Hello<u>World</u>');
|
||||
Hello World
|
||||
SELECT extractTextFromHTML('Hello<u> World</u>');
|
||||
Hello World
|
||||
SELECT extractTextFromHTML('<![CDATA[ \t Hello,\rworld \n ]]>');
|
||||
\t Hello,\rworld \n
|
||||
Hello Hello\tworld world!
|
||||
HelloHello\tworldworld!
|
||||
Hello Hello <b>world</b> world!
|
||||
SELECT extractTextFromHTML('Hello <![CDATA[Hello\tworld]]> world!');
|
||||
HelloHello\tworld world!
|
||||
SELECT extractTextFromHTML('Hello<![CDATA[Hello\tworld]]>world!');
|
||||
HelloHello\tworld world!
|
||||
SELECT extractTextFromHTML('Hello <![CDATA[Hello <b>world</b>]]> world!');
|
||||
HelloHello <b>world</b> world!
|
||||
SELECT extractTextFromHTML('<![CDATA[<sender>John Smith</sender>]]>');
|
||||
<sender>John Smith</sender>
|
||||
SELECT extractTextFromHTML('<![CDATA[<sender>John <![CDATA[Smith</sender>]]>');
|
||||
<sender>John <![CDATA[Smith</sender>
|
||||
<sender>John <![CDATA[Smith]]>
|
||||
SELECT extractTextFromHTML('<![CDATA[<sender>John <![CDATA[]]>Smith</sender>]]>');
|
||||
<sender>John <![CDATA[ Smith ]]>
|
||||
SELECT extractTextFromHTML('<![CDATA[<sender>John ]]><![CDATA[Smith</sender>]]>');
|
||||
<sender>John Smith</sender>
|
||||
<sender>John Smith</sender>
|
||||
SELECT extractTextFromHTML('<![CDATA[<sender>John ]]> <![CDATA[Smith</sender>]]>');
|
||||
<sender>John Smith</sender>
|
||||
SELECT extractTextFromHTML('<![CDATA[<sender>John]]> <![CDATA[Smith</sender>]]>');
|
||||
<sender>JohnSmith</sender>
|
||||
SELECT extractTextFromHTML('<![CDATA[<sender>John ]]>]]><![CDATA[Smith</sender>]]>');
|
||||
<sender>John ]]>Smith</sender>
|
||||
SELECT extractTextFromHTML('Hello<script>World</script> goodbye');
|
||||
Hello goodbye
|
||||
SELECT extractTextFromHTML('Hello<script >World</script> goodbye');
|
||||
Hello goodbye
|
||||
HelloWorld goodbye
|
||||
SELECT extractTextFromHTML('Hello<scripta>World</scripta> goodbye');
|
||||
Hello World goodbye
|
||||
SELECT extractTextFromHTML('Hello<script type="text/javascript">World</script> goodbye');
|
||||
Hello goodbye
|
||||
SELECT extractTextFromHTML('Hello<style type="text/css">World</style> goodbye');
|
||||
Hello goodbye
|
||||
HelloWorld goodbye
|
||||
HelloWorld goodbye
|
||||
SELECT extractTextFromHTML('Hello<script:p>World</script:p> goodbye');
|
||||
Hello World goodbye
|
||||
SELECT extractTextFromHTML('Hello<script:p type="text/javascript">World</script:p> goodbye');
|
||||
Hello World goodbye
|
||||
SELECT extractTextFromHTML('Hello<style type="text/css">World <!-- abc --> </style> goodbye');
|
||||
Hello goodbye
|
||||
SELECT extractTextFromHTML('Hello<style type="text/css">World <!-- abc --> </style \n > goodbye');
|
||||
Hello goodbye
|
||||
SELECT extractTextFromHTML('Hello<style type="text/css">World <!-- abc --> </ style> goodbye');
|
||||
Hello
|
||||
SELECT extractTextFromHTML('Hello<style type="text/css">World <!-- abc --> </stylea> goodbye');
|
||||
Hello
|
||||
SELECT extractTextFromHTML('Hello<style type="text/css">World <![CDATA[</style>]]> </stylea> goodbye');
|
||||
Hello
|
||||
SELECT extractTextFromHTML('Hello<style type="text/css">World <![CDATA[</style>]]> </style> goodbye');
|
||||
Hello goodbye
|
||||
Hello
|
||||
SELECT extractTextFromHTML('Hello<style type="text/css">World <![CDAT[</style>]]> </style> goodbye');
|
||||
Hello ]]> goodbye
|
||||
SELECT extractTextFromHTML('Hello<style type="text/css">World <![endif]--> </style> goodbye');
|
||||
Hello goodbye
|
||||
SELECT extractTextFromHTML('Hello<style type="text/css">World <script>abc</script> </stylea> goodbye');
|
||||
Hello
|
||||
SELECT extractTextFromHTML('Hello<style type="text/css">World <script>abc</script> </style> goodbye');
|
||||
Hello goodbye
|
||||
SELECT extractTextFromHTML('<![CDATA[]]]]><![CDATA[>]]>');
|
||||
]]>
|
||||
SELECT extractTextFromHTML('
|
||||
<img src="pictures/power.png" style="margin-bottom: -30px;" />
|
||||
<br><span style="padding-right: 10px; font-size: 10px;">xkcd.com</span>
|
||||
</div>
|
||||
');
|
||||
xkcd.com
|
||||
|
@ -58,5 +58,15 @@ SELECT extractTextFromHTML('Hello<style type="text/css">World <!-- abc --> </sty
|
||||
|
||||
SELECT extractTextFromHTML('Hello<style type="text/css">World <![CDATA[</style>]]> </stylea> goodbye');
|
||||
SELECT extractTextFromHTML('Hello<style type="text/css">World <![CDATA[</style>]]> </style> goodbye');
|
||||
SELECT extractTextFromHTML('Hello<style type="text/css">World <![CDAT[</style>]]> </style> goodbye');
|
||||
SELECT extractTextFromHTML('Hello<style type="text/css">World <![endif]--> </style> goodbye');
|
||||
SELECT extractTextFromHTML('Hello<style type="text/css">World <script>abc</script> </stylea> goodbye');
|
||||
SELECT extractTextFromHTML('Hello<style type="text/css">World <script>abc</script> </style> goodbye');
|
||||
|
||||
SELECT extractTextFromHTML('<![CDATA[]]]]><![CDATA[>]]>');
|
||||
|
||||
SELECT extractTextFromHTML('
|
||||
<img src="pictures/power.png" style="margin-bottom: -30px;" />
|
||||
<br><span style="padding-right: 10px; font-size: 10px;">xkcd.com</span>
|
||||
</div>
|
||||
');
|
||||
|
Loading…
Reference in New Issue
Block a user