ClickHouse/dbms/include/DB/Common/StringUtils.h

#pragma once

#include <DB/Common/Exception.h>
#include <string>
#include <cstring>

namespace DB { namespace ErrorCodes {

extern const int LOGICAL_ERROR;

}}

namespace detail
{
	bool startsWith(const std::string & s, const char * prefix, size_t prefix_size);
	bool endsWith(const std::string & s, const char * suffix, size_t suffix_size);
}


inline bool startsWith(const std::string & s, const std::string & prefix)
{
	return detail::startsWith(s, prefix.data(), prefix.size());
}

inline bool endsWith(const std::string & s, const std::string & suffix)
{
	return detail::endsWith(s, suffix.data(), suffix.size());
}


/// With GCC, strlen is evaluated compile time if we pass it a constant
/// string that is known at compile time.
inline bool startsWith(const std::string & s, const char * prefix)
{
	return detail::startsWith(s, prefix, strlen(prefix));
}

inline bool endsWith(const std::string & s, const char * suffix)
{
	return detail::endsWith(s, suffix, strlen(suffix));
}

/// Given an integer, return the adequate suffix for
/// printing an ordinal number.
template <typename T>
std::string getOrdinalSuffix(T n)
{
	static_assert(std::is_integral<T>::value && std::is_unsigned<T>::value,
		"Unsigned integer value required");

	const auto val = n % 10;

	bool is_th;
	if ((val >= 1) && (val <= 3))
		is_th = (n > 10) && (((n / 10) % 10) == 1);
	else
		is_th = true;

	if (is_th)
		return "th";
	else
	{
		switch (val)
		{
			case 1: return "st";
			case 2: return "nd";
			case 3: return "rd";
			default: throw DB::Exception{"getOrdinalSuffix: internal error",
				DB::ErrorCodes::LOGICAL_ERROR};
		};
	}
}

/// More efficient than libc, because doesn't respect locale.

inline bool isASCII(char c)
{
	return static_cast<unsigned char>(c) < 0x80;
}

inline bool isAlphaASCII(char c)
{
	return (c >= 'a' && c <= 'z')
		|| (c >= 'A' && c <= 'Z');
}

inline bool isNumericASCII(char c)
{
	return (c >= '0' && c <= '9');
}

inline bool isAlphaNumericASCII(char c)
{
	return isAlphaASCII(c)
		|| isNumericASCII(c);
}

inline bool isWordCharASCII(char c)
{
	return isAlphaNumericASCII(c)
		|| c == '_';
}

inline bool isWhitespaceASCII(char c)
{
	return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v';
}

/// Works assuming isAlphaASCII.
inline char toLowerIfAlphaASCII(char c)
{
	return c | 0x20;
}

inline char toUpperIfAlphaASCII(char c)
{
	return c & (~0x20);
}

inline char alternateCaseIfAlphaASCII(char c)
{
	return c ^ 0x20;
}

inline bool equalsCaseInsensitive(char a, char b)
{
	return a == b || (isAlphaASCII(a) && alternateCaseIfAlphaASCII(a) == b);
}
Allowed for certain aggregate functions to have case insensitive names for compatibility [#METR-22087]. 2016-07-14 05:22:09 +00:00			`#pragma once`

dbms: Server: Adding nullable type support to functions manipulating arrays. [#METR-19266] 2016-09-15 21:27:34 +00:00			`#include <DB/Common/Exception.h>`
Allowed for certain aggregate functions to have case insensitive names for compatibility [#METR-22087]. 2016-07-14 05:22:09 +00:00			`#include <string>`
			`#include <cstring>`

dbms: Server: Adding nullable type support to functions manipulating arrays. [#METR-19266] 2016-09-15 21:27:34 +00:00			`namespace DB { namespace ErrorCodes {`

			`extern const int LOGICAL_ERROR;`

			`}}`
Allowed for certain aggregate functions to have case insensitive names for compatibility [#METR-22087]. 2016-07-14 05:22:09 +00:00
			`namespace detail`
			`{`
			`bool startsWith(const std::string & s, const char * prefix, size_t prefix_size);`
			`bool endsWith(const std::string & s, const char * suffix, size_t suffix_size);`
			`}`


			`inline bool startsWith(const std::string & s, const std::string & prefix)`
			`{`
			`return detail::startsWith(s, prefix.data(), prefix.size());`
			`}`

			`inline bool endsWith(const std::string & s, const std::string & suffix)`
			`{`
			`return detail::endsWith(s, suffix.data(), suffix.size());`
			`}`


dbms: Server: Adding nullable type support to functions manipulating arrays. [#METR-19266] 2016-09-15 21:27:34 +00:00			`/// With GCC, strlen is evaluated compile time if we pass it a constant`
			`/// string that is known at compile time.`
Allowed for certain aggregate functions to have case insensitive names for compatibility [#METR-22087]. 2016-07-14 05:22:09 +00:00			`inline bool startsWith(const std::string & s, const char * prefix)`
			`{`
			`return detail::startsWith(s, prefix, strlen(prefix));`
			`}`

			`inline bool endsWith(const std::string & s, const char * suffix)`
			`{`
			`return detail::endsWith(s, suffix, strlen(suffix));`
			`}`
dbms: Server: Adding nullable type support to functions manipulating arrays. [#METR-19266] 2016-09-15 21:27:34 +00:00
			`/// Given an integer, return the adequate suffix for`
			`/// printing an ordinal number.`
			`template <typename T>`
			`std::string getOrdinalSuffix(T n)`
			`{`
dbms: Server: merged from master [#METR-19266] 2016-10-19 15:00:56 +00:00			`static_assert(std::is_integral<T>::value && std::is_unsigned<T>::value,`
			`"Unsigned integer value required");`
dbms: Server: Adding nullable type support to functions manipulating arrays. [#METR-19266] 2016-09-15 21:27:34 +00:00
dbms: Server: merged from master [#METR-19266] 2016-10-19 15:00:56 +00:00			`const auto val = n % 10;`
dbms: Server: Adding nullable type support to functions manipulating arrays. [#METR-19266] 2016-09-15 21:27:34 +00:00
			`bool is_th;`
			`if ((val >= 1) && (val <= 3))`
			`is_th = (n > 10) && (((n / 10) % 10) == 1);`
			`else`
			`is_th = true;`

			`if (is_th)`
			`return "th";`
			`else`
			`{`
			`switch (val)`
			`{`
			`case 1: return "st";`
			`case 2: return "nd";`
			`case 3: return "rd";`
dbms: Server: merged from master [#METR-19266] 2016-10-19 15:00:56 +00:00			`default: throw DB::Exception{"getOrdinalSuffix: internal error",`
			`DB::ErrorCodes::LOGICAL_ERROR};`
dbms: Server: Adding nullable type support to functions manipulating arrays. [#METR-19266] 2016-09-15 21:27:34 +00:00			`};`
			`}`
			`}`
Allow to parse Java-style floating point denormals 'NaN' and 'Infinity' [#METR-22428]. 2016-08-16 21:23:53 +00:00
			`/// More efficient than libc, because doesn't respect locale.`

			`inline bool isASCII(char c)`
			`{`
			`return static_cast<unsigned char>(c) < 0x80;`
			`}`

			`inline bool isAlphaASCII(char c)`
			`{`
			`return (c >= 'a' && c <= 'z')`
			`\|\| (c >= 'A' && c <= 'Z');`
			`}`

			`inline bool isNumericASCII(char c)`
			`{`
			`return (c >= '0' && c <= '9');`
			`}`

			`inline bool isAlphaNumericASCII(char c)`
			`{`
			`return isAlphaASCII(c)`
			`\|\| isNumericASCII(c);`
			`}`

			`inline bool isWordCharASCII(char c)`
			`{`
			`return isAlphaNumericASCII(c)`
			`\|\| c == '_';`
			`}`

Throw exception when multi-statements are disabled [#METR-22252]. 2016-08-17 05:38:51 +00:00			`inline bool isWhitespaceASCII(char c)`
			`{`
			`return c == ' ' \|\| c == '\t' \|\| c == '\n' \|\| c == '\r' \|\| c == '\f' \|\| c == '\v';`
			`}`

Allow to parse Java-style floating point denormals 'NaN' and 'Infinity' [#METR-22428]. 2016-08-16 21:23:53 +00:00			`/// Works assuming isAlphaASCII.`
			`inline char toLowerIfAlphaASCII(char c)`
			`{`
			`return c \| 0x20;`
			`}`

			`inline char toUpperIfAlphaASCII(char c)`
			`{`
			`return c & (~0x20);`
			`}`

			`inline char alternateCaseIfAlphaASCII(char c)`
			`{`
			`return c ^ 0x20;`
			`}`
NULLs support: fixed totally wrong code [#METR-19266]. 2016-12-30 05:13:14 +00:00
			`inline bool equalsCaseInsensitive(char a, char b)`
			`{`
			`return a == b \|\| (isAlphaASCII(a) && alternateCaseIfAlphaASCII(a) == b);`
			`}`