ClickHouse/dbms/src/IO/ReadBuffer.h

#pragma once

#include <cstring>
#include <algorithm>
#include <memory>

#include <Common/Exception.h>
#include <IO/BufferBase.h>


namespace DB
{

namespace ErrorCodes
{
    extern const int ATTEMPT_TO_READ_AFTER_EOF;
    extern const int CANNOT_READ_ALL_DATA;
}

/** A simple abstract class for buffered data reading (char sequences) from somewhere.
  * Unlike std::istream, it provides access to the internal buffer,
  *  and also allows you to manually manage the position inside the buffer.
  *
  * Note! `char *`, not `const char *` is used
  *  (so that you can take out the common code into BufferBase, and also so that you can fill the buffer in with new data).
  * This causes inconveniences - for example, when using ReadBuffer to read from a chunk of memory const char *,
  *  you have to use const_cast.
  *
  * successors must implement the nextImpl() method.
  */
class ReadBuffer : public BufferBase
{
public:
    /** Creates a buffer and sets a piece of available data to read to zero size,
      *  so that the next() function is called to load the new data portion into the buffer at the first try.
      */
    ReadBuffer(Position ptr, size_t size) : BufferBase(ptr, size, 0) { working_buffer.resize(0); }

    /** Used when the buffer is already full of data that can be read.
      *  (in this case, pass 0 as an offset)
      */
    ReadBuffer(Position ptr, size_t size, size_t offset) : BufferBase(ptr, size, offset) {}

    void set(Position ptr, size_t size) { BufferBase::set(ptr, size, 0); working_buffer.resize(0); }

    /** read next data and fill a buffer with it; set position to the beginning;
      * return `false` in case of end, `true` otherwise; throw an exception, if something is wrong
      */
    bool next()
    {
        bytes += offset();
        bool res = nextImpl();
        if (!res)
            working_buffer.resize(0);

        pos = working_buffer.begin() + working_buffer_offset;
        working_buffer_offset = 0;
        return res;
    }


    inline void nextIfAtEnd()
    {
        if (!hasPendingData())
            next();
    }

    virtual ~ReadBuffer() {}


    /** Unlike std::istream, it returns true if all data was read
      *  (and not in case there was an attempt to read after the end).
      * If at the moment the position is at the end of the buffer, it calls the next() method.
      * That is, it has a side effect - if the buffer is over, then it updates it and set the position to the beginning.
      *
      * Try to read after the end should throw an exception.
      */
    bool ALWAYS_INLINE eof()
    {
        return !hasPendingData() && !next();
    }

    void ignore()
    {
        if (!eof())
            ++pos;
        else
            throw Exception("Attempt to read after eof", ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF);
    }

    void ignore(size_t n)
    {
        while (n != 0 && !eof())
        {
            size_t bytes_to_ignore = std::min(static_cast<size_t>(working_buffer.end() - pos), n);
            pos += bytes_to_ignore;
            n -= bytes_to_ignore;
        }

        if (n)
            throw Exception("Attempt to read after eof", ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF);
    }

    /// You could call this method `ignore`, and `ignore` call `ignoreStrict`.
    size_t tryIgnore(size_t n)
    {
        size_t bytes_ignored = 0;

        while (bytes_ignored < n && !eof())
        {
            size_t bytes_to_ignore = std::min(static_cast<size_t>(working_buffer.end() - pos), n - bytes_ignored);
            pos += bytes_to_ignore;
            bytes_ignored += bytes_to_ignore;
        }

        return bytes_ignored;
    }

    /** Reads as many as there are, no more than n bytes. */
    size_t read(char * to, size_t n)
    {
        size_t bytes_copied = 0;

        while (bytes_copied < n && !eof())
        {
            size_t bytes_to_copy = std::min(static_cast<size_t>(working_buffer.end() - pos), n - bytes_copied);
            ::memcpy(to + bytes_copied, pos, bytes_to_copy);
            pos += bytes_to_copy;
            bytes_copied += bytes_to_copy;
        }

        return bytes_copied;
    }

    /** Reads n bytes, if there are less - throws an exception. */
    void readStrict(char * to, size_t n)
    {
        auto read_bytes = read(to, n);
        if (n != read_bytes)
            throw Exception("Cannot read all data. Bytes read: " + std::to_string(read_bytes) + ". Bytes expected: " + std::to_string(n) + ".", ErrorCodes::CANNOT_READ_ALL_DATA);
    }

    /** A method that can be more efficiently implemented in successors, in the case of reading large enough blocks.
      * The implementation can read data directly into `to`, without superfluous copying, if in `to` there is enough space for work.
      * For example, a CompressedReadBuffer can decompress the data directly into `to`, if the entire decompressed block fits there.
      * By default - the same as read.
      * Don't use for small reads.
      */
    virtual size_t readBig(char * to, size_t n)
    {
        return read(to, n);
    }

protected:
    /// The number of bytes to ignore from the initial position of `working_buffer` buffer.
    size_t working_buffer_offset = 0;

private:
    /** Read the next data and fill a buffer with it.
      * Return `false` in case of the end, `true` otherwise.
      * Throw an exception if something is wrong.
      */
    virtual bool nextImpl() { return false; }
};


using ReadBufferPtr = std::shared_ptr<ReadBuffer>;


}
dbms: development [#CONV-2944]. 2011-08-09 15:57:33 +00:00			`#pragma once`
dbms: development 2010-06-04 18:25:25 +00:00
			`#include <cstring>`
dbms: development [#CONV-2944]. 2011-08-09 15:57:33 +00:00			`#include <algorithm>`
Implemented re-readable buffers, add unit tests. [#CLICKHOUSE-2070] 2017-02-09 10:10:13 +00:00			`#include <memory>`
dbms: development 2010-06-04 18:25:25 +00:00
Moved headers and sources to same place [#CLICKHOUSE-3]. 2017-04-01 09:19:00 +00:00			`#include <Common/Exception.h>`
			`#include <IO/BufferBase.h>`
dbms: development 2010-06-04 18:25:25 +00:00

			`namespace DB`
			`{`

dbms: cutting dependency [#METR-2944].. 2016-01-11 21:46:36 +00:00			`namespace ErrorCodes`
			`{`
Changed tabs to spaces in code [#CLICKHOUSE-3]. 2017-04-01 07:20:54 +00:00			`extern const int ATTEMPT_TO_READ_AFTER_EOF;`
			`extern const int CANNOT_READ_ALL_DATA;`
dbms: cutting dependency [#METR-2944].. 2016-01-11 21:46:36 +00:00			`}`

translate comments: IO 2017-05-28 14:29:40 +00:00			`/** A simple abstract class for buffered data reading (char sequences) from somewhere.`
			`* Unlike std::istream, it provides access to the internal buffer,`
			`* and also allows you to manually manage the position inside the buffer.`
dbms: development 2010-06-04 18:25:25 +00:00			`*`
translate comments: IO 2017-05-28 14:29:40 +00:00			* Note! `char `, not `const char ` is used
			`* (so that you can take out the common code into BufferBase, and also so that you can fill the buffer in with new data).`
			`* This causes inconveniences - for example, when using ReadBuffer to read from a chunk of memory const char *,`
			`* you have to use const_cast.`
dbms: development [#CONV-2944]. 2011-10-16 03:05:15 +00:00			`*`
translate comments: IO 2017-05-28 14:29:40 +00:00			`* successors must implement the nextImpl() method.`
dbms: development 2010-06-04 18:25:25 +00:00			`*/`
DB: IO: allowed to use not own memory piece [#CONV-2546]. 2011-06-27 18:22:14 +00:00			`class ReadBuffer : public BufferBase`
dbms: development 2010-06-04 18:25:25 +00:00			`{`
			`public:`
translate comments: IO 2017-05-28 14:29:40 +00:00			`/** Creates a buffer and sets a piece of available data to read to zero size,`
			`* so that the next() function is called to load the new data portion into the buffer at the first try.`
Changed tabs to spaces in code [#CLICKHOUSE-3]. 2017-04-01 07:20:54 +00:00			`*/`
			`ReadBuffer(Position ptr, size_t size) : BufferBase(ptr, size, 0) { working_buffer.resize(0); }`

translate comments: IO 2017-05-28 14:29:40 +00:00			`/** Used when the buffer is already full of data that can be read.`
			`* (in this case, pass 0 as an offset)`
Changed tabs to spaces in code [#CLICKHOUSE-3]. 2017-04-01 07:20:54 +00:00			`*/`
			`ReadBuffer(Position ptr, size_t size, size_t offset) : BufferBase(ptr, size, offset) {}`

			`void set(Position ptr, size_t size) { BufferBase::set(ptr, size, 0); working_buffer.resize(0); }`

translate comments: IO 2017-05-28 14:29:40 +00:00			`/** read next data and fill a buffer with it; set position to the beginning;`
			* return `false` in case of end, `true` otherwise; throw an exception, if something is wrong
Changed tabs to spaces in code [#CLICKHOUSE-3]. 2017-04-01 07:20:54 +00:00			`*/`
			`bool next()`
			`{`
			`bytes += offset();`
			`bool res = nextImpl();`
			`if (!res)`
			`working_buffer.resize(0);`

			`pos = working_buffer.begin() + working_buffer_offset;`
			`working_buffer_offset = 0;`
			`return res;`
			`}`


			`inline void nextIfAtEnd()`
			`{`
			`if (!hasPendingData())`
			`next();`
			`}`

			`virtual ~ReadBuffer() {}`


translate comments: IO 2017-05-28 14:29:40 +00:00			`/** Unlike std::istream, it returns true if all data was read`
			`* (and not in case there was an attempt to read after the end).`
			`* If at the moment the position is at the end of the buffer, it calls the next() method.`
			`* That is, it has a side effect - if the buffer is over, then it updates it and set the position to the beginning.`
Changed tabs to spaces in code [#CLICKHOUSE-3]. 2017-04-01 07:20:54 +00:00			`*`
translate comments: IO 2017-05-28 14:29:40 +00:00			`* Try to read after the end should throw an exception.`
Changed tabs to spaces in code [#CLICKHOUSE-3]. 2017-04-01 07:20:54 +00:00			`*/`
			`bool ALWAYS_INLINE eof()`
			`{`
			`return !hasPendingData() && !next();`
			`}`

			`void ignore()`
			`{`
			`if (!eof())`
			`++pos;`
			`else`
			`throw Exception("Attempt to read after eof", ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF);`
			`}`

			`void ignore(size_t n)`
			`{`
			`while (n != 0 && !eof())`
			`{`
			`size_t bytes_to_ignore = std::min(static_cast<size_t>(working_buffer.end() - pos), n);`
			`pos += bytes_to_ignore;`
			`n -= bytes_to_ignore;`
			`}`

			`if (n)`
			`throw Exception("Attempt to read after eof", ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF);`
			`}`

translate comments: IO 2017-05-28 14:29:40 +00:00			/// You could call this method `ignore`, and `ignore` call `ignoreStrict`.
Changed tabs to spaces in code [#CLICKHOUSE-3]. 2017-04-01 07:20:54 +00:00			`size_t tryIgnore(size_t n)`
			`{`
			`size_t bytes_ignored = 0;`

			`while (bytes_ignored < n && !eof())`
			`{`
			`size_t bytes_to_ignore = std::min(static_cast<size_t>(working_buffer.end() - pos), n - bytes_ignored);`
			`pos += bytes_to_ignore;`
			`bytes_ignored += bytes_to_ignore;`
			`}`

			`return bytes_ignored;`
			`}`

translate comments: IO 2017-05-28 14:29:40 +00:00			`/** Reads as many as there are, no more than n bytes. */`
Changed tabs to spaces in code [#CLICKHOUSE-3]. 2017-04-01 07:20:54 +00:00			`size_t read(char * to, size_t n)`
			`{`
			`size_t bytes_copied = 0;`

			`while (bytes_copied < n && !eof())`
			`{`
			`size_t bytes_to_copy = std::min(static_cast<size_t>(working_buffer.end() - pos), n - bytes_copied);`
			`::memcpy(to + bytes_copied, pos, bytes_to_copy);`
			`pos += bytes_to_copy;`
			`bytes_copied += bytes_to_copy;`
			`}`

			`return bytes_copied;`
			`}`

translate comments: IO 2017-05-28 14:29:40 +00:00			`/** Reads n bytes, if there are less - throws an exception. */`
Changed tabs to spaces in code [#CLICKHOUSE-3]. 2017-04-01 07:20:54 +00:00			`void readStrict(char * to, size_t n)`
			`{`
More information in 'Cannot read all data' errors 2018-07-13 14:42:30 +00:00			`auto read_bytes = read(to, n);`
			`if (n != read_bytes)`
			`throw Exception("Cannot read all data. Bytes read: " + std::to_string(read_bytes) + ". Bytes expected: " + std::to_string(n) + ".", ErrorCodes::CANNOT_READ_ALL_DATA);`
Changed tabs to spaces in code [#CLICKHOUSE-3]. 2017-04-01 07:20:54 +00:00			`}`

translate comments: IO 2017-05-28 14:29:40 +00:00			`/** A method that can be more efficiently implemented in successors, in the case of reading large enough blocks.`
			* The implementation can read data directly into `to`, without superfluous copying, if in `to` there is enough space for work.
			* For example, a CompressedReadBuffer can decompress the data directly into `to`, if the entire decompressed block fits there.
			`* By default - the same as read.`
			`* Don't use for small reads.`
Changed tabs to spaces in code [#CLICKHOUSE-3]. 2017-04-01 07:20:54 +00:00			`*/`
			`virtual size_t readBig(char * to, size_t n)`
			`{`
			`return read(to, n);`
			`}`
dbms: less copying when reading fixed columns in large blocks [#CONV-2944]. 2013-09-08 00:00:25 +00:00
dbms: Server: feature development. [#METR-15090] 2015-04-06 15:10:09 +00:00			`protected:`
translate comments: IO 2017-05-28 14:29:40 +00:00			/// The number of bytes to ignore from the initial position of `working_buffer` buffer.
Changed tabs to spaces in code [#CLICKHOUSE-3]. 2017-04-01 07:20:54 +00:00			`size_t working_buffer_offset = 0;`
dbms: Server: feature development. [#METR-15090] 2015-04-06 15:10:09 +00:00
dbms: fixed error. 2011-05-13 19:40:56 +00:00			`private:`
translate comments: IO 2017-05-28 14:29:40 +00:00			`/** Read the next data and fill a buffer with it.`
			* Return `false` in case of the end, `true` otherwise.
			`* Throw an exception if something is wrong.`
Changed tabs to spaces in code [#CLICKHOUSE-3]. 2017-04-01 07:20:54 +00:00			`*/`
Non significant changes according to clang's -Weverything, part 1 [#CLICKHOUSE-2] 2018-06-03 16:51:31 +00:00			`virtual bool nextImpl() { return false; }`
dbms: development 2010-06-04 18:25:25 +00:00			`};`


Implemented re-readable buffers, add unit tests. [#CLICKHOUSE-2070] 2017-02-09 10:10:13 +00:00			`using ReadBufferPtr = std::shared_ptr<ReadBuffer>;`


dbms: development 2010-06-04 18:25:25 +00:00			`}`