mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Change cmake files
This commit is contained in:
parent
e4f9dc32e3
commit
51fab4c669
@ -1,3 +1,9 @@
|
||||
option(USE_CLD2 "Enable cld2" ${ENABLE_LIBRARIES})
|
||||
|
||||
if (NOT USE_CLD2)
|
||||
return()
|
||||
endif()
|
||||
|
||||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cld2")
|
||||
message (ERROR "submodule contrib/cld2 is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
endif()
|
||||
|
4
contrib/CMakeLists.txt
vendored
4
contrib/CMakeLists.txt
vendored
@ -48,7 +48,9 @@ add_subdirectory (murmurhash)
|
||||
add_subdirectory (replxx-cmake)
|
||||
add_subdirectory (unixodbc-cmake)
|
||||
add_subdirectory (nanodbc-cmake)
|
||||
add_subdirectory (cld2-cmake)
|
||||
if (USE_CLD2)
|
||||
add_subdirectory (cld2-cmake)
|
||||
endif()
|
||||
|
||||
if (USE_YAML_CPP)
|
||||
add_subdirectory (yaml-cpp-cmake)
|
||||
|
@ -29,4 +29,4 @@ set (SRCS
|
||||
add_library(cld2 ${SRCS})
|
||||
set_property(TARGET cld2 PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
target_compile_options (cld2 PRIVATE -Wno-reserved-id-macro -Wno-c++11-narrowing)
|
||||
target_include_directories(cld2 PUBLIC "${LIBRARY_DIR}/public")
|
||||
target_include_directories(cld2 SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}/public")
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -33,22 +33,22 @@ public:
|
||||
void parseEncodingFrequencies(const String & pt)
|
||||
{
|
||||
path_to_enc_freq = pt;
|
||||
loadEncodingsFrequency(pt);
|
||||
///loadEncodingsFrequency("/home/sergey/ClickHouse/programs/server/charset_freq.txt");
|
||||
///loadEncodingsFrequency(pt);
|
||||
loadEncodingsFrequency("/home/sergey/ClickHouse/programs/server/charset_freq.txt");
|
||||
}
|
||||
|
||||
void parseEmotionalDict(const String & pt)
|
||||
{
|
||||
path_to_emo_dict = pt;
|
||||
loadEmotionalDict(pt);
|
||||
///loadEmotionalDict("/home/sergey/ClickHouse/programs/server/emotional_dictionary_rus.txt");
|
||||
///loadEmotionalDict(pt);
|
||||
loadEmotionalDict("/home/sergey/ClickHouse/programs/server/emotional_dictionary_rus.txt");
|
||||
}
|
||||
|
||||
void parseProgrammingFrequency(const String & pt)
|
||||
{
|
||||
path_to_prog_freq = pt;
|
||||
loadProgrammingFrequency(pt);
|
||||
///loadProgrammingFrequency("/home/sergey/ClickHouse/programs/server/prog_freq.txt");
|
||||
///loadProgrammingFrequency(pt);
|
||||
loadProgrammingFrequency("/home/sergey/ClickHouse/programs/server/prog_freq.txt");
|
||||
}
|
||||
|
||||
|
||||
|
@ -80,8 +80,11 @@ if(USE_BASE64)
|
||||
endif()
|
||||
|
||||
target_link_libraries(clickhouse_functions PRIVATE lz4)
|
||||
target_link_libraries(clickhouse_functions PRIVATE cld2)
|
||||
target_include_directories(clickhouse_functions SYSTEM PRIVATE ${CLD2_INCLUDE_DIR})
|
||||
|
||||
if (USE_CLD2)
|
||||
target_link_libraries(clickhouse_functions PRIVATE cld2)
|
||||
target_include_directories(clickhouse_functions SYSTEM PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/cld2/public")
|
||||
endif()
|
||||
|
||||
if (USE_H3)
|
||||
target_link_libraries(clickhouse_functions PRIVATE ${H3_LIBRARY})
|
||||
|
@ -4,8 +4,7 @@
|
||||
#include <Common/UTF8Helpers.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include "/home/sergey/ClickHouse/contrib/cld2/public/compact_lang_det.h"
|
||||
//#include <cld2/compact_lang_det.h>
|
||||
|
||||
#include <cstring>
|
||||
#include <cmath>
|
||||
#include <unordered_map>
|
||||
|
@ -1,7 +1,7 @@
|
||||
#include <Functions/FunctionsTextClassification.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include "/home/sergey/ClickHouse/contrib/cld2/public/compact_lang_det.h"
|
||||
|
||||
//#include <cld2/compact_lang_det.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -101,7 +101,6 @@ struct ProgrammingClassificationImpl
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
static std::unordered_map<String, std::unordered_map<String, Float64>> programming_freq = FrequencyHolder::getInstance().getProgrammingFrequency();
|
||||
std::unordered_map<String, Float64> data_freq;
|
||||
|
||||
res_data.reserve(1024);
|
||||
res_offsets.resize(offsets.size());
|
||||
@ -112,6 +111,7 @@ struct ProgrammingClassificationImpl
|
||||
for (size_t i = 0; i < offsets.size(); ++i)
|
||||
{
|
||||
const char * haystack = reinterpret_cast<const char *>(&data[prev_offset]);
|
||||
std::unordered_map<String, Float64> data_freq;
|
||||
String str_data = haystack;
|
||||
|
||||
String prev_command;
|
||||
|
@ -23,8 +23,8 @@ struct TonalityClassificationImpl
|
||||
|
||||
static String get_tonality(const Float64 & tonality_level)
|
||||
{
|
||||
if (tonality_level < 0.25) { return "NEG"; }
|
||||
if (tonality_level > 0.5) { return "POS"; }
|
||||
if (tonality_level < 0.15) { return "NEG"; }
|
||||
if (tonality_level > 0.45) { return "POS"; }
|
||||
return "NEUT";
|
||||
}
|
||||
|
||||
@ -55,7 +55,8 @@ struct TonalityClassificationImpl
|
||||
if (emotional_dict.find(word) != emotional_dict.cend())
|
||||
{
|
||||
count_words += 1;
|
||||
weight += emotional_dict[word];
|
||||
Float64 cur_weight = emotional_dict[word];
|
||||
weight += cur_weight;
|
||||
}
|
||||
word = "";
|
||||
}
|
||||
@ -114,7 +115,8 @@ struct TonalityClassificationImpl
|
||||
if (emotional_dict.find(word) != emotional_dict.cend())
|
||||
{
|
||||
count_words += 1;
|
||||
weight += emotional_dict[word];
|
||||
Float64 cur_weight = emotional_dict[word];
|
||||
weight += cur_weight;
|
||||
}
|
||||
word = "";
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user