ClickHouse/src/IO/WriteBufferValidUTF8.h
2020-04-03 18:14:31 +03:00

42 lines
1019 B
C++

#pragma once
#include <IO/WriteBuffer.h>
#include <IO/BufferWithOwnMemory.h>
namespace DB
{
/** Writes the data to another buffer, replacing the invalid UTF-8 sequences with the specified sequence.
* If the valid UTF-8 is already written, it works faster.
* Note: before using the resulting string, destroy this object.
*/
class WriteBufferValidUTF8 final : public BufferWithOwnMemory<WriteBuffer>
{
private:
WriteBuffer & output_buffer;
bool group_replacements;
/// The last recorded character was `replacement`.
bool just_put_replacement = false;
std::string replacement;
void putReplacement();
void putValid(char * data, size_t len);
void nextImpl() override;
void finish();
public:
static const size_t DEFAULT_SIZE;
WriteBufferValidUTF8(
WriteBuffer & output_buffer_,
bool group_replacements_ = true,
const char * replacement_ = "\xEF\xBF\xBD",
size_t size = DEFAULT_SIZE);
~WriteBufferValidUTF8() override;
};
}