#include #include #include #include namespace DB { namespace { struct ExtractImpl { static void vector( const ColumnString::Chars & data, const ColumnString::Offsets & offsets, const std::string & pattern, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) { res_data.reserve(data.size() / 5); res_offsets.resize(offsets.size()); const OptimizedRegularExpression regexp = Regexps::createRegexp(pattern); unsigned capture = regexp.getNumberOfSubpatterns() > 0 ? 1 : 0; OptimizedRegularExpression::MatchVec matches; matches.reserve(capture + 1); size_t prev_offset = 0; size_t res_offset = 0; for (size_t i = 0; i < offsets.size(); ++i) { size_t cur_offset = offsets[i]; unsigned count = regexp.match(reinterpret_cast(&data[prev_offset]), cur_offset - prev_offset - 1, matches, capture + 1); if (count > capture && matches[capture].offset != std::string::npos) { const auto & match = matches[capture]; res_data.resize(res_offset + match.length + 1); memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &data[prev_offset + match.offset], match.length); res_offset += match.length; } else { res_data.resize(res_offset + 1); } res_data[res_offset] = 0; ++res_offset; res_offsets[i] = res_offset; prev_offset = cur_offset; } } }; struct NameExtract { static constexpr auto name = "extract"; }; using FunctionExtract = FunctionsStringSearchToString; } REGISTER_FUNCTION(Extract) { factory.registerFunction(); } }