rewrite SinkMeiliSearch using JSONRowOutputFormat

This commit is contained in:
Mikhail Artemenko 2022-01-08 14:08:17 +03:00
parent 5a9ad15df7
commit e24d2b4d34
6 changed files with 34 additions and 87 deletions

View File

@ -38,9 +38,7 @@ String MeiliSearchConnection::searchQuery(const std::unordered_map<String, Strin
std::string post_fields = "{";
for (const auto & q_attr : query_params)
{
post_fields += q_attr.first + ":" + q_attr.second + ",";
}
post_fields.back() = '}';
@ -65,14 +63,12 @@ String MeiliSearchConnection::searchQuery(const std::unordered_map<String, Strin
curl_slist_free_all(slist1);
if (ret_code != 0)
{
throw Exception(ErrorCodes::NETWORK_ERROR, curl_easy_strerror(ret_code));
}
return response_buffer;
}
String MeiliSearchConnection::updateQuery(const String & data) const
String MeiliSearchConnection::updateQuery(std::string_view data) const
{
CURLcode ret_code;
CURL * hnd;
@ -89,7 +85,7 @@ String MeiliSearchConnection::updateQuery(const String & data) const
curl_easy_setopt(hnd, CURLOPT_BUFFERSIZE, 102400L);
curl_easy_setopt(hnd, CURLOPT_URL, url.c_str());
curl_easy_setopt(hnd, CURLOPT_NOPROGRESS, 1L);
curl_easy_setopt(hnd, CURLOPT_POSTFIELDS, data.c_str());
curl_easy_setopt(hnd, CURLOPT_POSTFIELDS, data.data());
curl_easy_setopt(hnd, CURLOPT_POSTFIELDSIZE_LARGE, data.size());
curl_easy_setopt(hnd, CURLOPT_HTTPHEADER, slist1);
curl_easy_setopt(hnd, CURLOPT_MAXREDIRS, 50L);
@ -104,9 +100,7 @@ String MeiliSearchConnection::updateQuery(const String & data) const
curl_slist_free_all(slist1);
if (ret_code != 0)
{
throw Exception(ErrorCodes::NETWORK_ERROR, curl_easy_strerror(ret_code));
}
return response_buffer;
}

View File

@ -4,6 +4,7 @@
#include <iostream>
#include <memory>
#include <string>
#include <string_view>
#include <unordered_map>
#include <utility>
#include <vector>
@ -35,7 +36,7 @@ public:
String searchQuery(const std::unordered_map<String, String> & query_params) const;
String updateQuery(const String & data) const;
String updateQuery(std::string_view data) const;
private:
MeiliConfig config;

View File

@ -1,5 +1,6 @@
#include <Storages/MeiliSearch/SinkMeiliSearch.h>
#include "Core/Field.h"
#include "Formats/FormatFactory.h"
#include "IO/WriteBufferFromString.h"
#include "Processors/Formats/Impl/JSONRowOutputFormat.h"
#include "base/JSON.h"
@ -22,64 +23,35 @@ SinkMeiliSearch::SinkMeiliSearch(
{
}
String getStringRepresentation(const ColumnWithTypeAndName & col, size_t row)
{
Field elem;
if (col.column->size() <= row)
{
return "";
}
col.column->get(row, elem);
if (elem.getType() == Field::Types::Int64)
{
return std::to_string(elem.get<Int64>());
}
else if (elem.getType() == Field::Types::UInt64)
{
return std::to_string(elem.get<UInt64>());
}
else if (elem.getType() == Field::Types::String)
{
return doubleQuoteString(elem.get<String>());
}
else if (elem.getType() == Field::Types::Float64)
{
return std::to_string(elem.get<Float64>());
}
return "";
}
String SinkMeiliSearch::getOneElement(const Block & block, int ind) const
{
String ans = "{";
int id = 0;
for (const auto & col : block)
{
ans += doubleQuoteString(sample_block.getByPosition(id++).name) + ":" + getStringRepresentation(col, ind) + ",";
}
ans.back() = '}';
return ans;
void extractData(std::string_view& view) {
int ind = view.find("\"data\":") + 9;
view.remove_prefix(ind);
int bal = ind = 1;
while (bal > 0) {
if (view[ind] == '[') ++bal;
else if (view[ind] == ']') --bal;
++ind;
}
view.remove_suffix(view.size() - ind);
}
void SinkMeiliSearch::writeBlockData(const Block & block) const
{
size_t max_col_size = 0;
for (const auto & col : block)
{
max_col_size = std::max(max_col_size, col.column->size());
}
String json_array = "[";
for (size_t i = 0; i < max_col_size; ++i)
{
json_array += getOneElement(block, i) + ",";
}
json_array.back() = ']';
auto response = connection.updateQuery(json_array);
JSON jres = JSON(response).begin();
FormatSettings settings = getFormatSettings(local_context);
settings.json.quote_64bit_integers = false;
WriteBufferFromOwnString buf;
auto writer = FormatFactory::instance().getOutputFormat("JSON", buf, sample_block, local_context, {}, settings);
writer->write(block);
writer->flush();
writer->finalize();
std::string_view vbuf(buf.str());
extractData(vbuf);
auto response = connection.updateQuery(vbuf);
auto jres = JSON(response).begin();
if (jres.getName() == "message")
{
throw Exception(ErrorCodes::MEILISEARCH_EXCEPTION, jres.getValue().toString());
}
}
Blocks SinkMeiliSearch::splitBlocks(const Block & block, const size_t & max_rows) const
@ -104,9 +76,8 @@ Blocks SinkMeiliSearch::splitBlocks(const Block & block, const size_t & max_rows
if (idx == split_block_size - 1)
limits = rows - offsets;
for (size_t col_idx = 0; col_idx < columns; ++col_idx)
{
split_blocks[idx].getByPosition(col_idx).column = block.getByPosition(col_idx).column->cut(offsets, limits);
}
offsets += max_block_size;
}
@ -118,9 +89,7 @@ void SinkMeiliSearch::consume(Chunk chunk)
auto block = getHeader().cloneWithColumns(chunk.detachColumns());
auto blocks = splitBlocks(block, max_block_size);
for (const auto & b : blocks)
{
writeBlockData(b);
}
}

View File

@ -22,8 +22,6 @@ public:
Blocks splitBlocks(const Block & block, const size_t & max_rows) const;
private:
String getOneElement(const Block & block, int ind) const;
MeiliSearchConnection connection;
ContextPtr local_context;
const UInt64 max_block_size;

View File

@ -32,9 +32,8 @@ MeiliSearchSource::MeiliSearchSource(
String columns_to_get = "[";
for (const auto & col : description.sample_block)
{
columns_to_get += doubleQuoteString(col.name) + ",";
}
columns_to_get.back() = ']';
query_params[doubleQuoteString("attributesToRetrieve")] = columns_to_get;
@ -71,9 +70,7 @@ void insertWithTypeId(MutableColumnPtr & column, JSON kv_pair, int type_id)
Chunk MeiliSearchSource::generate()
{
if (all_read)
{
return {};
}
MutableColumns columns(description.sample_block.columns());
const size_t size = columns.size();
@ -87,9 +84,7 @@ Chunk MeiliSearchSource::generate()
JSON jres = JSON(response).begin();
if (jres.getName() == "message")
{
throw Exception(ErrorCodes::MEILISEARCH_EXCEPTION, jres.getValue().toString());
}
size_t cnt_match = 0;
String def;

View File

@ -62,28 +62,21 @@ std::string convertASTtoStr(ASTPtr ptr)
ASTPtr getFunctionParams(ASTPtr node, const std::string & name)
{
if (!node)
{
return nullptr;
}
const auto * ptr = node->as<ASTFunction>();
if (ptr && ptr->name == name)
{
if (node->children.size() == 1)
{
if (node->children.size() == 1)
return node->children[0];
}
else
{
else
return nullptr;
}
}
for (const auto & next : node->children)
{
auto res = getFunctionParams(next, name);
if (res != nullptr)
{
return res;
}
}
return nullptr;
}
@ -112,9 +105,8 @@ Pipe StorageMeiliSearch::read(
auto str = el->getColumnName();
auto it = find(str.begin(), str.end(), '=');
if (it == str.end())
{
throw Exception("meiliMatch function must have parameters of the form \'key=value\'", ErrorCodes::BAD_QUERY_PARAMETER);
}
String key(str.begin() + 1, it);
String value(it + 1, str.end() - 1);
kv_pairs_params[key] = value;
@ -126,9 +118,7 @@ Pipe StorageMeiliSearch::read(
}
for (const auto & el : kv_pairs_params)
{
LOG_TRACE(log, "Parsed parameter: key = " + el.first + ", value = " + el.second);
}
Block sample_block;
for (const String & column_name : column_names)