Removed hashtable use ternary search tree

This commit is contained in:
morty 2018-06-04 17:50:16 +03:00
parent be0f0ecef9
commit 3ad80a6a0f
4 changed files with 115 additions and 484 deletions

View File

@ -1,126 +1,105 @@
#include <readline/readline.h>
#include <cstdlib>
#include "Completion.h" #include "Completion.h"
namespace Completion namespace Completion
{ {
static uint hashpjw(const char *arKey, uint nKeyLength) void TSTNode::add_word(char *word)
{ {
uint h = 0; insert(word, word);
uint g;
uint i;
for (i = 0; i < nKeyLength; i++) {
h = (h << 4) + arKey[i];
if ((g = (h & 0xF0000000))) {
h = h ^ (g >> 24);
h = h ^ g;
}
}
return h;
} }
int init_hash_table(HashTable *ht, size_t size) void TSTNode::insert(char *word, char *remainder)
{ {
ht->hashFunction = hashpjw; if (!*remainder) {
ht->tableSize = size; return;
ht->buckets = (Bucket **)calloc(size, sizeof(Bucket));
if (!ht->buckets) {
ht->initialized = false;
return HASH_FAILURE;
}
ht->initialized = true;
return HASH_SUCCESS;
} }
void hash_add_word(HashTable *ht, char *word) if (!token) {
token = *remainder;
}
if (token > *remainder) {
if (!left) {
left = (TSTNode *) calloc(1, sizeof(TSTNode));
}
return left->insert(word, remainder);
}
if (token < *remainder) {
if (!right) {
right = (TSTNode *) calloc(1, sizeof(TSTNode));
}
return right->insert(word, remainder);
}
auto newEntry = (Entry *) calloc(1, sizeof(Entry));
newEntry->text = word;
newEntry->next = entry;
entry = newEntry;
if (!middle) {
middle = (TSTNode *) calloc(1, sizeof(TSTNode));
}
return middle->insert(word, ++remainder);
}
Entry * TSTNode::find_all(const char *word)
{ {
uint i; if (!word) {
char *pos = word; return (Entry *) nullptr;
for (i = 1; *pos; i++, pos++) {
hash_insert_key(ht, word, i, word);
};
} }
int hash_insert_key(HashTable *ht, char *key, uint keyLength, char* word) return find(word, word);
}
Entry * TSTNode::find(const char *word, const char *remainder)
{ {
uint hash; if (token > *remainder) {
size_t bucketIndex; if (!left) {
Bucket *bucket; return (Entry *) nullptr;
if (keyLength <= 0) {
return HASH_FAILURE;
} }
hash = ht->hashFunction(key, keyLength); return left->find(word, remainder);
bucketIndex = hash % ht->tableSize;
bucket = ht->buckets[bucketIndex];
while (bucket) {
if ( (bucket->hash == hash) && (bucket->keyLength == keyLength)) {
if (!memcmp(bucket->key, key, keyLength)) {
auto *entry = (HashEntry *) calloc(1, sizeof(HashEntry));
if (entry == nullptr) {
return HASH_FAILURE;
}
entry->text = word;
entry->next = bucket->entry;
bucket->entry = entry;
return HASH_SUCCESS;
}
}
bucket = bucket->next;
}
bucket = (Bucket *) calloc(1, sizeof(Bucket));
if (bucket == nullptr) {
return HASH_FAILURE;
}
bucket->key = key;
bucket->keyLength = keyLength;
bucket->hash = hash;
bucket->entry = (HashEntry *) calloc(1, sizeof(HashEntry));
if (bucket->entry == nullptr) {
return HASH_FAILURE;
} }
bucket->entry->text = word; if (token < *remainder) {
bucket->entry->next = nullptr; if (!right) {
return (Entry *) nullptr;
bucket->next = ht->buckets[bucketIndex]; }
return right->find(word, remainder);
ht->buckets[bucketIndex] = bucket;
return HASH_SUCCESS;
} }
Bucket * hash_find_all_matches(HashTable *ht, const char *word, uint length, uint *res_length) if (!middle) {
return (Entry *) nullptr;
}
if (strlen(remainder) == 1) {
return entry;
}
return middle->find(word, ++remainder);
}
void TSTNode::free()
{ {
Bucket *bucket; if (left) {
uint hash; left->free();
size_t bucketIndex; std::free(left);
hash = ht->hashFunction(word, length);
bucketIndex = hash % ht->tableSize;
bucket = ht->buckets[bucketIndex];
while (bucket) {
if (
(bucket->hash == hash)
&& (bucket->keyLength == length)
&& (!memcmp(bucket->key, word, length))
) {
*res_length = length;
return bucket;
}
bucket = bucket->next;
} }
*res_length = 0; if (right) {
right->free();
return (Bucket *) nullptr; std::free(right);
} }
void hash_free(HashTable *ht) if (middle) {
{ middle->free();
free(ht->buckets); std::free(middle);
}
if (entry) {
entry->free();
std::free(entry);
}
} }
} }

View File

@ -1,39 +1,41 @@
#ifndef CLICKHOUSE_COMPLETION_H #ifndef CLICKHOUSE_COMPLETION_H
#define CLICKHOUSE_COMPLETION_H #define CLICKHOUSE_COMPLETION_H
#define HASH_SUCCESS 0 #include <iostream>
#define HASH_FAILURE 1 #include <cstring>
#include <readline/readline.h>
#include <sys/types.h>
//All of functionality for hash was taken from mysql-server project from completion_hash.cpp file
namespace Completion namespace Completion
{ {
struct HashEntry {
struct Entry {
char *text; char *text;
struct HashEntry *next; struct Entry *next;
}; void free()
{
if (next) {
next->free();
std::free(next);
}
std::free(text);
}
};
struct Bucket { class TSTNode {
uint hash; private:
char *key; TSTNode *left;
uint keyLength; TSTNode *right;
HashEntry *entry; TSTNode *middle;
struct Bucket *next; char token;
}; Entry *entry;
void insert(char *word, char *remainder);
Entry * find(const char *word, const char *remainder);
public:
void add_word(char *word);
Entry * find_all(const char *word);
void free();
};
struct HashTable {
bool initialized;
size_t tableSize;
uint (*hashFunction)(const char *key, uint keyLength);
Bucket **buckets;
};
int init_hash_table(HashTable *ht, size_t size);
void hash_add_word(HashTable *ht, char *word);
int hash_insert_key(HashTable *ht, char *key, uint keyLength, char* word);
void hash_free(HashTable *ht);
Bucket * hash_find_all_matches(HashTable *ht, const char *word, uint length, uint *res_length);
} }
#endif //CLICKHOUSE_COMPLETION_H #endif //CLICKHOUSE_COMPLETION_H

View File

@ -2,7 +2,7 @@
#define CLICKHOUSE_QUERY_PARTS_H #define CLICKHOUSE_QUERY_PARTS_H
typedef struct { typedef struct {
char *name; /* User printable name of the function. */ char *name; /* User printable name of the query part. */
} QUERYPART; } QUERYPART;
QUERYPART queryParts[] = { QUERYPART queryParts[] = {
@ -173,348 +173,6 @@ QUERYPART queryParts[] = {
{(char *)"Nested"}, {(char *)"Nested"},
{(char *)"Expression"}, {(char *)"Expression"},
{(char *)"Set"}, {(char *)"Set"},
//FUNCTIONS
{(char *)"plus"},
{(char *)"minus"},
{(char *)"multiply"},
{(char *)"divide"},
{(char *)"intDiv"},
{(char *)"intDivOrZero"},
{(char *)"modulo"},
{(char *)"negate"},
{(char *)"abs"},
{(char *)"gcd"},
{(char *)"lcm"},
{(char *)"equals"},
{(char *)"notEquals"},
{(char *)"less"},
{(char *)"greater"},
{(char *)"lessOrEquals"},
{(char *)"greaterOrEquals"},
{(char *)"and"},
{(char *)"or"},
{(char *)"not"},
{(char *)"xor"},
{(char *)"toUInt8"},
{(char *)"toUInt16"},
{(char *)"toUInt32"},
{(char *)"toUInt64"},
{(char *)"toInt8"},
{(char *)"toInt16"},
{(char *)"toInt32"},
{(char *)"toInt64"},
{(char *)"toFloat32"},
{(char *)"toFloat64"},
{(char *)"toUInt8OrZero"},
{(char *)"toUInt16OrZero"},
{(char *)"toUInt32OrZero"},
{(char *)"toUInt64OrZero"},
{(char *)"toInt8OrZero"},
{(char *)"toInt16OrZero"},
{(char *)"toInt32OrZero"},
{(char *)"toInt64OrZero"},
{(char *)"toFloat32OrZero"},
{(char *)"toFloat64OrZero"},
{(char *)"toDate"},
{(char *)"toDateTime"},
{(char *)"toString"},
{(char *)"toFixedString"},
{(char *)"toStringCutToZero"},
{(char *)"reinterpretAsUInt8"},
{(char *)"reinterpretAsUInt16"},
{(char *)"reinterpretAsUInt32"},
{(char *)"reinterpretAsUInt64"},
{(char *)"reinterpretAsInt8"},
{(char *)"reinterpretAsInt16"},
{(char *)"reinterpretAsInt32"},
{(char *)"reinterpretAsInt64"},
{(char *)"reinterpretAsFloat32"},
{(char *)"reinterpretAsFloat64"},
{(char *)"reinterpretAsDate"},
{(char *)"reinterpretAsDateTime"},
{(char *)"reinterpretAsString"},
{(char *)"CAST"},
{(char *)"toYear"},
{(char *)"toMonth"},
{(char *)"toDayOfMonth"},
{(char *)"toDayOfWeek"},
{(char *)"toHour"},
{(char *)"toMinute"},
{(char *)"toSecond"},
{(char *)"toMonday"},
{(char *)"toStartOfMonth"},
{(char *)"toStartOfQuarter"},
{(char *)"toStartOfYear"},
{(char *)"toStartOfMinute"},
{(char *)"toStartOfFiveMinute"},
{(char *)"toStartOfFifteenMinutes"},
{(char *)"toStartOfHour"},
{(char *)"toStartOfDay"},
{(char *)"toTime"},
{(char *)"toRelativeYearNum"},
{(char *)"toRelativeMonthNum"},
{(char *)"toRelativeWeekNum"},
{(char *)"toRelativeDayNum"},
{(char *)"toRelativeHourNum"},
{(char *)"toRelativeMinuteNum"},
{(char *)"toRelativeSecondNum"},
{(char *)"now"},
{(char *)"today"},
{(char *)"yesterday"},
{(char *)"timeSlot"},
{(char *)"empty"},
{(char *)"notEmpty"},
{(char *)"length"},
{(char *)"lengthUTF8"},
{(char *)"lower"},
{(char *)"upper"},
{(char *)"lowerUTF8"},
{(char *)"upperUTF8"},
{(char *)"reverse"},
{(char *)"reverseUTF8"},
{(char *)"concat"},
{(char *)"substringUTF8"},
{(char *)"appendTrailingCharIfAbsent"},
{(char *)"convertCharset"},
{(char *)"position"},
{(char *)"positionUTF8"},
{(char *)"match"},
{(char *)"extract"},
{(char *)"extractAll"},
{(char *)"like"},
{(char *)"notLike"},
{(char *)"replaceOne"},
{(char *)"replaceAll"},
{(char *)"replaceRegexpOne"},
{(char *)"replaceRegexpAll"},
{(char *)"if"},
{(char *)"e"},
{(char *)"pi"},
{(char *)"exp"},
{(char *)"log"},
{(char *)"exp2"},
{(char *)"log2"},
{(char *)"exp10"},
{(char *)"log10"},
{(char *)"sqrt"},
{(char *)"cbrt"},
{(char *)"erf"},
{(char *)"erfc"},
{(char *)"lgamma"},
{(char *)"tgamma"},
{(char *)"sin"},
{(char *)"cos"},
{(char *)"tan"},
{(char *)"asin"},
{(char *)"acos"},
{(char *)"atan"},
{(char *)"pow"},
{(char *)"floor"},
{(char *)"ceil"},
{(char *)"round"},
{(char *)"roundToExp2"},
{(char *)"roundDuration"},
{(char *)"roundAge"},
{(char *)"emptyArrayUInt8"},
{(char *)"emptyArrayUInt16"},
{(char *)"emptyArrayUInt32"},
{(char *)"emptyArrayUInt64"},
{(char *)"emptyArrayInt8"},
{(char *)"emptyArrayInt16"},
{(char *)"emptyArrayInt32"},
{(char *)"emptyArrayInt64"},
{(char *)"emptyArrayFloat32"},
{(char *)"emptyArrayFloat64"},
{(char *)"emptyArrayDate"},
{(char *)"emptyArrayDateTime"},
{(char *)"emptyArrayString"},
{(char *)"emptyArrayToSingle"},
{(char *)"range"},
{(char *)"array"},
{(char *)"arrayConcat"},
{(char *)"arrayElement"},
{(char *)"has"},
{(char *)"indexOf"},
{(char *)"countEqual"},
{(char *)"arrayEnumerate"},
{(char *)"arrayEnumerateUniq"},
{(char *)"arrayPopBack"},
{(char *)"arrayPopFront"},
{(char *)"arrayPushBack"},
{(char *)"arrayPushFront"},
{(char *)"arraySlice"},
{(char *)"arrayUniq"},
{(char *)"arrayJoin"},
{(char *)"splitByChar"},
{(char *)"splitByString"},
{(char *)"arrayStringConcat"},
{(char *)"alphaTokens"},
{(char *)"bitAnd"},
{(char *)"bitOr"},
{(char *)"bitXor"},
{(char *)"bitNot"},
{(char *)"bitShiftLeft"},
{(char *)"bitShiftRight"},
{(char *)"halfMD5"},
{(char *)"MD5"},
{(char *)"sipHash64"},
{(char *)"sipHash128"},
{(char *)"cityHash64"},
{(char *)"intHash32"},
{(char *)"intHash64"},
{(char *)"SHA1"},
{(char *)"SHA224"},
{(char *)"SHA256"},
{(char *)"URLHash"},
{(char *)"rand"},
{(char *)"rand64"},
{(char *)"hex"},
{(char *)"unhex"},
{(char *)"UUIDStringToNum"},
{(char *)"UUIDNumToString"},
{(char *)"bitmaskToList"},
{(char *)"bitmaskToArray"},
{(char *)"protocol"},
{(char *)"domain"},
{(char *)"domainWithoutWWW"},
{(char *)"topLevelDomain"},
{(char *)"firstSignificantSubdomain"},
{(char *)"cutToFirstSignificantSubdomain"},
{(char *)"path"},
{(char *)"pathFull"},
{(char *)"queryString"},
{(char *)"fragment"},
{(char *)"queryStringAndFragment"},
{(char *)"extractURLParameter"},
{(char *)"extractURLParameters"},
{(char *)"extractURLParameterNames"},
{(char *)"URLHierarchy"},
{(char *)"URLPathHierarchy"},
{(char *)"decodeURLComponent"},
{(char *)"cutWWW"},
{(char *)"cutQueryString"},
{(char *)"cutFragment"},
{(char *)"cutQueryStringAndFragment"},
{(char *)"cutURLParameter"},
{(char *)"IPv4NumToString"},
{(char *)"IPv4StringToNum"},
{(char *)"IPv6NumToString"},
{(char *)"IPv6StringToNum"},
{(char *)"visitParamHas"},
{(char *)"visitParamExtractUInt"},
{(char *)"visitParamExtractInt"},
{(char *)"visitParamExtractFloat"},
{(char *)"visitParamExtractBool"},
{(char *)"visitParamExtractRaw"},
{(char *)"visitParamExtractString"},
{(char *)"arrayMap"},
{(char *)"arrayFilter"},
{(char *)"arrayCount"},
{(char *)"arrayExists"},
{(char *)"arrayAll"},
{(char *)"arraySum"},
{(char *)"arrayFirst"},
{(char *)"arrayFirstIndex"},
{(char *)"arrayCumSum"},
{(char *)"arraySort"},
{(char *)"arrayReverseSort"},
{(char *)"hostName"},
{(char *)"visibleWidth"},
{(char *)"toTypeName"},
{(char *)"blockSize"},
{(char *)"materialize"},
{(char *)"ignore"},
{(char *)"sleep"},
{(char *)"currentDatabase"},
{(char *)"isFinite"},
{(char *)"isInfinite"},
{(char *)"isNaN"},
{(char *)"hasColumnInTable"},
{(char *)"bar"},
{(char *)"transform"},
{(char *)"formatReadableSize"},
{(char *)"least"},
{(char *)"greatest"},
{(char *)"uptime"},
{(char *)"version"},
{(char *)"rowNumberInAllBlocks"},
{(char *)"runningDifference"},
{(char *)"MACNumToString"},
{(char *)"MACStringToNum"},
{(char *)"MACStringToOUI"},
{(char *)"dictGetUInt8"},
{(char *)"dictGetUInt16"},
{(char *)"dictGetUInt32"},
{(char *)"dictGetUInt64"},
{(char *)"dictGetInt8"},
{(char *)"dictGetInt16"},
{(char *)"dictGetInt32"},
{(char *)"dictGetInt64"},
{(char *)"dictGetFloat32"},
{(char *)"dictGetFloat64"},
{(char *)"dictGetDate"},
{(char *)"dictGetDateTime"},
{(char *)"dictGetUUID"},
{(char *)"dictGetString"},
{(char *)"dictGetTOrDefault"},
{(char *)"dictIsIn"},
{(char *)"dictGetHierarchy"},
{(char *)"dictHas"},
{(char *)"regionToCity"},
{(char *)"regionToArea"},
{(char *)"regionToDistrict"},
{(char *)"regionToCountry"},
{(char *)"regionToContinent"},
{(char *)"regionToPopulation"},
{(char *)"regionIn"},
{(char *)"regionHierarchy"},
{(char *)"regionToName"},
{(char *)"globalIn"},
{(char *)"in"},
{(char *)"notIn"},
{(char *)"globalNotIn"},
{(char *)"tuple"},
{(char *)"tupleElement"},
{(char *)"count"},
{(char *)"any"},
{(char *)"anyHeavy"},
{(char *)"anyLast"},
{(char *)"min"},
{(char *)"max"},
{(char *)"argMin"},
{(char *)"argMax"},
{(char *)"sum"},
{(char *)"sumWithOverflow"},
{(char *)"sumMap"},
{(char *)"avg"},
{(char *)"uniq"},
{(char *)"uniqCombined"},
{(char *)"uniqHLL12"},
{(char *)"uniqExact"},
{(char *)"groupArray"},
{(char *)"groupArrayInsertAt"},
{(char *)"groupUniqArray"},
{(char *)"quantile"},
{(char *)"quantileDeterministic"},
{(char *)"quantileTiming"},
{(char *)"quantileTimingWeighted"},
{(char *)"quantileExact"},
{(char *)"quantileExactWeighted"},
{(char *)"quantileTDigest"},
{(char *)"median"},
{(char *)"quantiles"},
{(char *)"varSamp"},
{(char *)"varPop"},
{(char *)"stddevSamp"},
{(char *)"stddevPop"},
{(char *)"topK"},
{(char *)"covarSamp"},
{(char *)"covarPop"},
{(char *)"corr"},
{(char *)"sequenceMatch"},
{(char *)"sequenceCount"},
{(char *)"windowFunnel"},
{(char *)"uniqUpTo"},
//END OF LIST //END OF LIST
{(char *)nullptr}, {(char *)nullptr},
}; };

View File

@ -57,7 +57,7 @@
#include <AggregateFunctions/registerAggregateFunctions.h> #include <AggregateFunctions/registerAggregateFunctions.h>
#include <ext/scope_guard.h> #include <ext/scope_guard.h>
static Completion::HashTable ht; static Completion::TSTNode completionNode;
char ** query_parts_completion(const char *, int, int); char ** query_parts_completion(const char *, int, int);
char * query_parts_generator(const char *, int); char * query_parts_generator(const char *, int);
@ -340,12 +340,11 @@ private:
|| (now.month() == 1 && now.day() <= 5); || (now.month() == 1 && now.day() <= 5);
} }
void init_suggestions(Completion::HashTable *ht) void init_suggestions()
{ {
Completion::init_hash_table(ht, 128);
QUERYPART *qP = queryParts; QUERYPART *qP = queryParts;
while (qP->name) { while (qP->name) {
Completion::hash_add_word(ht, qP->name); completionNode.add_word(qP->name);
qP++; qP++;
} }
rl_attempted_completion_function = query_parts_completion; rl_attempted_completion_function = query_parts_completion;
@ -462,7 +461,7 @@ private:
if (print_time_to_stderr) if (print_time_to_stderr)
throw Exception("time option could be specified only in non-interactive mode", ErrorCodes::BAD_ARGUMENTS); throw Exception("time option could be specified only in non-interactive mode", ErrorCodes::BAD_ARGUMENTS);
#if USE_READLINE #if USE_READLINE
init_suggestions(&ht); init_suggestions();
#endif #endif
/// Turn tab completion off. /// Turn tab completion off.
@ -489,9 +488,6 @@ private:
} }
loop(); loop();
#if USE_READLINE
Completion::hash_free(&ht);
#endif
std::cout << (isNewYearMode() ? "Happy new year." : "Bye.") << std::endl; std::cout << (isNewYearMode() ? "Happy new year." : "Bye.") << std::endl;
@ -1580,19 +1576,15 @@ char ** query_parts_completion(const char *text, int start, int end)
char * query_parts_generator(const char *text, int state) char * query_parts_generator(const char *text, int state)
{ {
static int text_length; static int text_length;
static Completion::Bucket *bucket; static Completion::Entry *entry;
static Completion::HashEntry *entry;
char * found; char * found;
if (!state) text_length = (uint) strlen(text); if (!state) text_length = (uint) strlen(text);
if (text_length > 0) { if (text_length > 0) {
if (!state) { if (!state) {
uint length; entry = completionNode.find_all(text);
if (!entry) return (char *) nullptr;
bucket = Completion::hash_find_all_matches(&ht, text, (uint)strlen(text), &length);
if (!bucket) return (char *) nullptr;
entry = bucket->entry;
} }
if (entry) { if (entry) {
found = strdup(entry->text); found = strdup(entry->text);