2012-11-14 09:49:51 +00:00
|
|
|
#pragma once
|
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <IO/WriteBuffer.h>
|
|
|
|
#include <IO/BufferWithOwnMemory.h>
|
2016-08-19 01:54:23 +00:00
|
|
|
|
2012-11-14 09:49:51 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
2016-02-14 04:07:55 +00:00
|
|
|
|
2017-05-28 14:29:40 +00:00
|
|
|
/** Writes the data to another buffer, replacing the invalid UTF-8 sequences with the specified sequence.
|
|
|
|
* If the valid UTF-8 is already written, it works faster.
|
|
|
|
* Note: before using the resulting string, destroy this object.
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
2020-03-18 18:26:40 +00:00
|
|
|
class WriteBufferValidUTF8 final : public BufferWithOwnMemory<WriteBuffer>
|
2016-02-14 04:07:55 +00:00
|
|
|
{
|
|
|
|
private:
|
2017-04-01 07:20:54 +00:00
|
|
|
WriteBuffer & output_buffer;
|
|
|
|
bool group_replacements;
|
2017-05-28 14:29:40 +00:00
|
|
|
/// The last recorded character was `replacement`.
|
2017-04-01 07:20:54 +00:00
|
|
|
bool just_put_replacement = false;
|
|
|
|
std::string replacement;
|
2016-02-14 04:07:55 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
void putReplacement();
|
|
|
|
void putValid(char * data, size_t len);
|
2016-02-14 04:07:55 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
void nextImpl() override;
|
|
|
|
void finish();
|
2016-02-14 04:07:55 +00:00
|
|
|
|
|
|
|
public:
|
2017-04-01 07:20:54 +00:00
|
|
|
static const size_t DEFAULT_SIZE;
|
|
|
|
|
|
|
|
WriteBufferValidUTF8(
|
2019-08-03 11:02:40 +00:00
|
|
|
WriteBuffer & output_buffer_,
|
|
|
|
bool group_replacements_ = true,
|
|
|
|
const char * replacement_ = "\xEF\xBF\xBD",
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t size = DEFAULT_SIZE);
|
|
|
|
|
2019-12-24 17:12:08 +00:00
|
|
|
~WriteBufferValidUTF8() override;
|
2016-02-14 04:07:55 +00:00
|
|
|
};
|
|
|
|
|
2012-11-14 09:49:51 +00:00
|
|
|
}
|