ClickHouse/dbms/IO/WriteBufferValidUTF8.h

42 lines
1019 B
C++
Raw Normal View History

#pragma once
#include <IO/WriteBuffer.h>
#include <IO/BufferWithOwnMemory.h>
namespace DB
{
2017-05-28 14:29:40 +00:00
/** Writes the data to another buffer, replacing the invalid UTF-8 sequences with the specified sequence.
* If the valid UTF-8 is already written, it works faster.
* Note: before using the resulting string, destroy this object.
*/
2020-03-18 18:26:40 +00:00
class WriteBufferValidUTF8 final : public BufferWithOwnMemory<WriteBuffer>
{
private:
WriteBuffer & output_buffer;
bool group_replacements;
2017-05-28 14:29:40 +00:00
/// The last recorded character was `replacement`.
bool just_put_replacement = false;
std::string replacement;
void putReplacement();
void putValid(char * data, size_t len);
void nextImpl() override;
void finish();
public:
static const size_t DEFAULT_SIZE;
WriteBufferValidUTF8(
2019-08-03 11:02:40 +00:00
WriteBuffer & output_buffer_,
bool group_replacements_ = true,
const char * replacement_ = "\xEF\xBF\xBD",
size_t size = DEFAULT_SIZE);
~WriteBufferValidUTF8() override;
};
}