mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 07:31:57 +00:00
dbms: Server: queries with several replicas: development [#METR-14410]
This commit is contained in:
parent
15eb461e16
commit
a2f93f6326
@ -47,41 +47,16 @@ namespace DB
|
||||
void sendExternalTablesData(std::vector<ExternalTablesData> & data);
|
||||
|
||||
private:
|
||||
/// Описание реплики.
|
||||
struct Replica
|
||||
{
|
||||
Replica(Connection * connection_) : connection(connection_) {}
|
||||
|
||||
/// Соединение к реплике
|
||||
Connection * connection;
|
||||
|
||||
/// Номер следующего ожидаемого пакета.
|
||||
int next_packet_number = 0;
|
||||
|
||||
/// Есть ли данные, которые можно прочитать?
|
||||
bool can_read = false;
|
||||
|
||||
/// Является ли реплика валидной для чтения?
|
||||
bool is_valid = true;
|
||||
};
|
||||
|
||||
/// Реплики хэшированные по id сокета
|
||||
using ReplicaHash = std::unordered_map<int, Replica>;
|
||||
/// Проверить, есть ли данные, которые можно прочитать на каких-нибудь репликах.
|
||||
/// Возвращает соединение на реплику, с которой можно прочитать данные, если такая есть.
|
||||
Connection * waitForReadEvent();
|
||||
|
||||
private:
|
||||
/// Выбрать реплику, на которой можно прочитать данные.
|
||||
Replica & pickReplica();
|
||||
|
||||
/// Проверить, есть ли данные, которые можно прочитать на каких-нибудь репликах.
|
||||
int waitForReadEvent();
|
||||
/// Реплики хэшированные по id сокета
|
||||
using ReplicaHash = std::unordered_map<int, Connection *>;
|
||||
|
||||
private:
|
||||
const Settings & settings;
|
||||
|
||||
ReplicaHash replica_hash;
|
||||
size_t valid_replicas_count;
|
||||
|
||||
/// Номер следующего ожидаемого пакета.
|
||||
int next_packet_number = 0;
|
||||
};
|
||||
}
|
||||
|
@ -5,84 +5,95 @@ namespace DB
|
||||
ShardReplicas::ShardReplicas(std::vector<ConnectionPool::Entry> & entries, const Settings & settings_) :
|
||||
settings(settings_)
|
||||
{
|
||||
valid_replicas_count = entries.size();
|
||||
replica_hash.reserve(valid_replicas_count);
|
||||
replica_hash.reserve(entries.size());
|
||||
|
||||
for (auto & entry : entries)
|
||||
{
|
||||
Connection * connection = &*entry;
|
||||
replica_hash.insert(std::make_pair(connection->socket.impl()->sockfd(), Replica(connection)));
|
||||
replica_hash.insert(std::make_pair(connection->socket.impl()->sockfd(), connection));
|
||||
}
|
||||
}
|
||||
|
||||
int ShardReplicas::waitForReadEvent()
|
||||
Connection * ShardReplicas::waitForReadEvent()
|
||||
{
|
||||
if (valid_replicas_count == 0)
|
||||
return 0;
|
||||
|
||||
Poco::Net::Socket::SocketList write_list;
|
||||
Poco::Net::Socket::SocketList except_list;
|
||||
|
||||
Poco::Net::Socket::SocketList read_list;
|
||||
read_list.reserve(valid_replicas_count);
|
||||
read_list.reserve(replica_hash.size());
|
||||
|
||||
for (auto & e : replica_hash)
|
||||
{
|
||||
Replica & replica = e.second;
|
||||
replica.can_read = false;
|
||||
if (replica.is_valid)
|
||||
read_list.push_back(replica.connection->socket);
|
||||
Connection * connection = e.second;
|
||||
read_list.push_back(connection->socket);
|
||||
}
|
||||
|
||||
int n = Poco::Net::Socket::select(read_list, write_list, except_list, settings.poll_interval * 1000000);
|
||||
if (n == 0)
|
||||
return nullptr;
|
||||
|
||||
for (const auto & socket : read_list)
|
||||
{
|
||||
auto it = replica_hash.find(socket.impl()->sockfd());
|
||||
if (it == replica_hash.end())
|
||||
throw Exception("Unexpected replica", ErrorCodes::UNEXPECTED_REPLICA);
|
||||
Replica & replica = it->second;
|
||||
replica.can_read = true;
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
ShardReplicas::Replica & ShardReplicas::pickReplica()
|
||||
{
|
||||
Replica * res = nullptr;
|
||||
|
||||
int n = waitForReadEvent();
|
||||
if (n > 0)
|
||||
{
|
||||
int max_packet_number = -1;
|
||||
for (auto & e : replica_hash)
|
||||
{
|
||||
Replica & replica = e.second;
|
||||
if (replica.can_read && (replica.next_packet_number > max_packet_number))
|
||||
{
|
||||
max_packet_number = replica.next_packet_number;
|
||||
res = &replica;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (res == nullptr)
|
||||
throw Exception("No available replica", ErrorCodes::NO_AVAILABLE_REPLICA);
|
||||
|
||||
return *res;
|
||||
auto & socket = read_list[rand() % n];
|
||||
auto it = replica_hash.find(socket.impl()->sockfd());
|
||||
if (it == replica_hash.end())
|
||||
throw Exception("Unexpected replica", ErrorCodes::UNEXPECTED_REPLICA);
|
||||
return it->second;
|
||||
}
|
||||
|
||||
Connection::Packet ShardReplicas::receivePacket()
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
Replica & replica = pickReplica();
|
||||
bool retry = false;
|
||||
Connection * connection = waitForReadEvent();
|
||||
if (connection == nullptr)
|
||||
throw Exception("No available replica", ErrorCodes::NO_AVAILABLE_REPLICA);
|
||||
|
||||
while (replica.is_valid)
|
||||
Connection::Packet packet = connection->receivePacket();
|
||||
return packet;
|
||||
}
|
||||
|
||||
void ShardReplicas::sendQuery(const String & query, const String & query_id, UInt64 stage, bool with_pending_data)
|
||||
{
|
||||
Settings query_settings = settings;
|
||||
query_settings.parallel_replicas_count = replica_hash.size();
|
||||
UInt64 offset = 0;
|
||||
|
||||
for (auto & e : replica_hash)
|
||||
{
|
||||
Connection * connection = e.second;
|
||||
connection->sendQuery(query, query_id, stage, &query_settings, with_pending_data);
|
||||
query_settings.parallel_replica_offset = offset;
|
||||
++offset;
|
||||
}
|
||||
}
|
||||
|
||||
void ShardReplicas::disconnect()
|
||||
{
|
||||
for (auto & e : replica_hash)
|
||||
{
|
||||
Connection * connection = e.second;
|
||||
connection->disconnect();
|
||||
}
|
||||
}
|
||||
|
||||
void ShardReplicas::sendCancel()
|
||||
{
|
||||
for (auto & e : replica_hash)
|
||||
{
|
||||
Connection * connection = e.second;
|
||||
connection->sendCancel();
|
||||
}
|
||||
}
|
||||
|
||||
void ShardReplicas::drainResidualPackets()
|
||||
{
|
||||
bool caught_exceptions = false;
|
||||
|
||||
for (auto & e : replica_hash)
|
||||
{
|
||||
Connection * connection = e.second;
|
||||
bool again = true;
|
||||
|
||||
while (again)
|
||||
{
|
||||
Connection::Packet packet = replica.connection->receivePacket();
|
||||
Connection::Packet packet = connection->receivePacket();
|
||||
|
||||
switch (packet.type)
|
||||
{
|
||||
@ -94,123 +105,20 @@ namespace DB
|
||||
break;
|
||||
|
||||
case Protocol::Server::EndOfStream:
|
||||
again = false;
|
||||
continue;
|
||||
|
||||
case Protocol::Server::Exception:
|
||||
replica.is_valid = false;
|
||||
--valid_replicas_count;
|
||||
/// Больше ничего не читаем. Отменяем выполнение всех оставшихся запросов,
|
||||
/// затем получаем оставшиеся пакеты, чтобы не было рассинхронизации с
|
||||
/// репликами.
|
||||
sendCancel();
|
||||
drainResidualPackets();
|
||||
break;
|
||||
// XXX Что делать?
|
||||
caught_exceptions = true;
|
||||
again = false;
|
||||
continue;
|
||||
|
||||
default:
|
||||
/// Мы получили инвалидный пакет от реплики. Повторим попытку
|
||||
/// c другой реплики, если такая найдется.
|
||||
replica.is_valid = false;
|
||||
--valid_replicas_count;
|
||||
if (valid_replicas_count > 0)
|
||||
retry = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if ((replica.next_packet_number == next_packet_number) && !retry)
|
||||
{
|
||||
++replica.next_packet_number;
|
||||
++next_packet_number;
|
||||
return packet;
|
||||
}
|
||||
else
|
||||
{
|
||||
++replica.next_packet_number;
|
||||
retry = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ShardReplicas::sendQuery(const String & query, const String & query_id, UInt64 stage, bool with_pending_data)
|
||||
{
|
||||
Settings query_settings = settings;
|
||||
query_settings.parallel_replicas_count = replica_hash.size();
|
||||
UInt64 offset = 0;
|
||||
|
||||
for (auto & e : replica_hash)
|
||||
{
|
||||
Connection * connection = e.second.connection;
|
||||
connection->sendQuery(query, query_id, stage, &query_settings, with_pending_data);
|
||||
query_settings.parallel_replica_offset = offset;
|
||||
++offset;
|
||||
}
|
||||
}
|
||||
|
||||
void ShardReplicas::disconnect()
|
||||
{
|
||||
for (auto & e : replica_hash)
|
||||
{
|
||||
Replica & replica = e.second;
|
||||
if (replica.is_valid)
|
||||
{
|
||||
Connection * connection = replica.connection;
|
||||
connection->disconnect();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ShardReplicas::sendCancel()
|
||||
{
|
||||
for (auto & e : replica_hash)
|
||||
{
|
||||
Replica & replica = e.second;
|
||||
if (replica.is_valid)
|
||||
{
|
||||
Connection * connection = replica.connection;
|
||||
connection->sendCancel();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ShardReplicas::drainResidualPackets()
|
||||
{
|
||||
bool caught_exceptions = false;
|
||||
|
||||
for (auto & e : replica_hash)
|
||||
{
|
||||
Replica & replica = e.second;
|
||||
if (replica.is_valid)
|
||||
{
|
||||
Connection * connection = replica.connection;
|
||||
bool again = true;
|
||||
|
||||
while (again)
|
||||
{
|
||||
Connection::Packet packet = connection->receivePacket();
|
||||
|
||||
switch (packet.type)
|
||||
{
|
||||
case Protocol::Server::Data:
|
||||
case Protocol::Server::Progress:
|
||||
case Protocol::Server::ProfileInfo:
|
||||
case Protocol::Server::Totals:
|
||||
case Protocol::Server::Extremes:
|
||||
break;
|
||||
|
||||
case Protocol::Server::EndOfStream:
|
||||
again = false;
|
||||
continue;
|
||||
|
||||
case Protocol::Server::Exception:
|
||||
// XXX Что делать?
|
||||
caught_exceptions = true;
|
||||
again = false;
|
||||
continue;
|
||||
|
||||
default:
|
||||
// XXX Что делать?
|
||||
caught_exceptions = true;
|
||||
again = false;
|
||||
continue;
|
||||
}
|
||||
// XXX Что делать?
|
||||
caught_exceptions = true;
|
||||
again = false;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -223,21 +131,14 @@ namespace DB
|
||||
|
||||
std::string ShardReplicas::dumpAddresses() const
|
||||
{
|
||||
if (valid_replicas_count == 0)
|
||||
return "";
|
||||
|
||||
std::ostringstream os;
|
||||
for (auto & e : replica_hash)
|
||||
{
|
||||
char prefix = '\0';
|
||||
const Replica & replica = e.second;
|
||||
if (replica.is_valid)
|
||||
{
|
||||
const Connection * connection = replica.connection;
|
||||
os << prefix << connection->getServerAddress();
|
||||
if (prefix == '\0')
|
||||
prefix = ';';
|
||||
}
|
||||
const Connection * connection = e.second;
|
||||
os << prefix << connection->getServerAddress();
|
||||
if (prefix == '\0')
|
||||
prefix = ';';
|
||||
}
|
||||
|
||||
return os.str();
|
||||
@ -251,7 +152,7 @@ namespace DB
|
||||
auto it = data.begin();
|
||||
for (auto & e : replica_hash)
|
||||
{
|
||||
Connection * connection = e.second.connection;
|
||||
Connection * connection = e.second;
|
||||
connection->sendExternalTablesData(*it);
|
||||
++it;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user