mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-28 02:21:59 +00:00
Test data generator: preparation [#CLICKHOUSE-2].
This commit is contained in:
parent
d64bac8487
commit
3106bccaff
@ -21,33 +21,8 @@ using namespace DB;
|
|||||||
struct Models
|
struct Models
|
||||||
{
|
{
|
||||||
MarkovModel Title;
|
MarkovModel Title;
|
||||||
MarkovModel URLDomain;
|
MarkovModel URL;
|
||||||
MarkovModel URLPath;
|
MarkovModel SearchPhrase;
|
||||||
MarkovModel RefererDomain;
|
|
||||||
MarkovModel RefererPath;
|
|
||||||
MarkovModel SearchRefererDomain;
|
|
||||||
MarkovModel SearchRefererPath;
|
|
||||||
MarkovModel MobilePhoneModel;
|
|
||||||
MarkovModel Params;
|
|
||||||
MarkovModel NotEmptySearchPhrase;
|
|
||||||
MarkovModel PageCharset;
|
|
||||||
MarkovModel OriginalURL;
|
|
||||||
MarkovModel BrowserLanguage;
|
|
||||||
MarkovModel BrowserCountry;
|
|
||||||
MarkovModel SocialNetwork;
|
|
||||||
MarkovModel SocialAction;
|
|
||||||
MarkovModel SocialSourcePage;
|
|
||||||
MarkovModel ParamCurrency;
|
|
||||||
MarkovModel OpenstatServiceName;
|
|
||||||
MarkovModel OpenstatCampaignID;
|
|
||||||
MarkovModel OpenstatAdID;
|
|
||||||
MarkovModel OpenstatSourceID;
|
|
||||||
MarkovModel UTMSource;
|
|
||||||
MarkovModel UTMMedium;
|
|
||||||
MarkovModel UTMCampaign;
|
|
||||||
MarkovModel UTMContent;
|
|
||||||
MarkovModel UTMTerm;
|
|
||||||
MarkovModel FromTag;
|
|
||||||
|
|
||||||
static void read(MarkovModel & model, const String & path)
|
static void read(MarkovModel & model, const String & path)
|
||||||
{
|
{
|
||||||
@ -58,33 +33,8 @@ struct Models
|
|||||||
Models()
|
Models()
|
||||||
{
|
{
|
||||||
read(Title, "Title.model");
|
read(Title, "Title.model");
|
||||||
read(URLDomain, "URLDomain.model");
|
read(URL, "URL.model");
|
||||||
read(URLPath, "URLPath.model");
|
read(SearchPhrase, "SearchPhrase.model");
|
||||||
read(RefererDomain, "RefererDomain.model");
|
|
||||||
read(RefererPath, "RefererPath.model");
|
|
||||||
read(SearchRefererDomain, "SearchRefererDomain.model");
|
|
||||||
read(SearchRefererPath, "SearchRefererPath.model");
|
|
||||||
read(MobilePhoneModel, "MobilePhoneModel.model");
|
|
||||||
read(Params, "Params.model");
|
|
||||||
read(NotEmptySearchPhrase, "NotEmptySearchPhrase.model");
|
|
||||||
read(PageCharset, "PageCharset.model");
|
|
||||||
read(OriginalURL, "OriginalURL.model");
|
|
||||||
read(BrowserLanguage, "BrowserLanguage.model");
|
|
||||||
read(BrowserCountry, "BrowserCountry.model");
|
|
||||||
read(SocialNetwork, "SocialNetwork.model");
|
|
||||||
read(SocialAction, "SocialAction.model");
|
|
||||||
read(SocialSourcePage, "SocialSourcePage.model");
|
|
||||||
read(ParamCurrency, "ParamCurrency.model");
|
|
||||||
read(OpenstatServiceName, "OpenstatServiceName.model");
|
|
||||||
read(OpenstatCampaignID, "OpenstatCampaignID.model");
|
|
||||||
read(OpenstatAdID, "OpenstatAdID.model");
|
|
||||||
read(OpenstatSourceID, "OpenstatSourceID.model");
|
|
||||||
read(UTMSource, "UTMSource.model");
|
|
||||||
read(UTMMedium, "UTMMedium.model");
|
|
||||||
read(UTMCampaign, "UTMCampaign.model");
|
|
||||||
read(UTMContent, "UTMContent.model");
|
|
||||||
read(UTMTerm, "UTMTerm.model");
|
|
||||||
read(FromTag, "FromTag.model");
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -95,16 +45,16 @@ struct Generator
|
|||||||
std::mt19937_64 random;
|
std::mt19937_64 random;
|
||||||
Models models;
|
Models models;
|
||||||
|
|
||||||
UInt64 WatchID = random();
|
// UInt64 WatchID = random();
|
||||||
String Title;
|
String Title;
|
||||||
String URL;
|
String URL;
|
||||||
String Referer;
|
/* String Referer;
|
||||||
String FlashMinor2;
|
String FlashMinor2;
|
||||||
String UserAgentMinor;
|
String UserAgentMinor;
|
||||||
String MobilePhoneModel;
|
String MobilePhoneModel;
|
||||||
String Params;
|
String Params;*/
|
||||||
String SearchPhrase;
|
String SearchPhrase;
|
||||||
String PageCharset;
|
/* String PageCharset;
|
||||||
String OriginalURL;
|
String OriginalURL;
|
||||||
String BrowserLanguage;
|
String BrowserLanguage;
|
||||||
String BrowserCountry;
|
String BrowserCountry;
|
||||||
@ -121,7 +71,7 @@ struct Generator
|
|||||||
String UTMCampaign;
|
String UTMCampaign;
|
||||||
String UTMContent;
|
String UTMContent;
|
||||||
String UTMTerm;
|
String UTMTerm;
|
||||||
String FromTag;
|
String FromTag;*/
|
||||||
|
|
||||||
Generator() : out(STDOUT_FILENO) {}
|
Generator() : out(STDOUT_FILENO) {}
|
||||||
|
|
||||||
@ -133,22 +83,22 @@ struct Generator
|
|||||||
auto gen_random64 = [&]{ return random(); };
|
auto gen_random64 = [&]{ return random(); };
|
||||||
|
|
||||||
/// Unique identifier of event.
|
/// Unique identifier of event.
|
||||||
WatchID += std::uniform_int_distribution<UInt64>(0, 10000000000)(random);
|
/* WatchID += std::uniform_int_distribution<UInt64>(0, 10000000000)(random);
|
||||||
writeText(WatchID, out);
|
writeText(WatchID, out);
|
||||||
writeChar('\t', out);
|
writeChar('\t', out);
|
||||||
|
|
||||||
bool JavaEnable = std::bernoulli_distribution(0.6)(random);
|
bool JavaEnable = std::bernoulli_distribution(0.6)(random);
|
||||||
writeText(JavaEnable, out);
|
writeText(JavaEnable, out);
|
||||||
writeChar('\t', out);
|
writeChar('\t', out);*/
|
||||||
|
|
||||||
Title.resize(10000);
|
Title.resize(10000);
|
||||||
Title.resize(models.Title.generate(&Title[0], Title.size(), gen_random64));
|
Title.resize(models.Title.generate(&Title[0], Title.size(), gen_random64));
|
||||||
writeText(Title, out);
|
writeText(Title, out);
|
||||||
writeChar('\t', out);
|
writeChar('\t', out);
|
||||||
|
|
||||||
bool GoodEvent = 1;
|
/* bool GoodEvent = 1;
|
||||||
writeText(GoodEvent, out);
|
writeText(GoodEvent, out);
|
||||||
writeChar('\t', out);
|
writeChar('\t', out);*/
|
||||||
|
|
||||||
LocalDateTime EventTime;
|
LocalDateTime EventTime;
|
||||||
EventTime.year(2013);
|
EventTime.year(2013);
|
||||||
@ -173,7 +123,7 @@ struct Generator
|
|||||||
writeText(CounterID, out);
|
writeText(CounterID, out);
|
||||||
writeChar('\t', out);
|
writeChar('\t', out);
|
||||||
|
|
||||||
UInt32 ClientIP = hash(2, powerLaw(5000, 1.1));
|
/* UInt32 ClientIP = hash(2, powerLaw(5000, 1.1));
|
||||||
writeText(ClientIP, out);
|
writeText(ClientIP, out);
|
||||||
writeChar('\t', out);
|
writeChar('\t', out);
|
||||||
|
|
||||||
@ -195,16 +145,15 @@ struct Generator
|
|||||||
UInt8 UserAgent = hash(7, powerLaw(10, 4)) % 100;
|
UInt8 UserAgent = hash(7, powerLaw(10, 4)) % 100;
|
||||||
writeText(UserAgent, out);
|
writeText(UserAgent, out);
|
||||||
writeChar('\t', out);
|
writeChar('\t', out);
|
||||||
|
*/
|
||||||
URL.resize(10000);
|
URL.resize(10000);
|
||||||
size_t protocol_size = models.URLProtocol.generate(&URL[0], 100, gen_random64);
|
URL.resize(models.URL.generate(&URL[0], URL.size(), gen_random64));
|
||||||
URL[protocol_size]
|
writeText(URL, out);
|
||||||
writeText(MobilePhoneModel, out);
|
|
||||||
writeChar('\t', out);
|
writeChar('\t', out);
|
||||||
|
|
||||||
/// Referer
|
/// Referer
|
||||||
|
|
||||||
bool Refresh = std::bernoulli_distribution(0.1)(random);
|
/* bool Refresh = std::bernoulli_distribution(0.1)(random);
|
||||||
writeText(Refresh, out);
|
writeText(Refresh, out);
|
||||||
writeChar('\t', out);
|
writeChar('\t', out);
|
||||||
|
|
||||||
@ -317,8 +266,8 @@ struct Generator
|
|||||||
UInt8 MobilePhone = IsMobile ? hash(16, powerLaw(10, 4)) % 100 : 0;
|
UInt8 MobilePhone = IsMobile ? hash(16, powerLaw(10, 4)) % 100 : 0;
|
||||||
writeText(MobilePhone, out);
|
writeText(MobilePhone, out);
|
||||||
writeChar('\t', out);
|
writeChar('\t', out);
|
||||||
|
*/
|
||||||
MobilePhoneModel.resize(100);
|
/* MobilePhoneModel.resize(100);
|
||||||
MobilePhoneModel.resize(models.MobilePhoneModel.generate(&MobilePhoneModel[0], MobilePhoneModel.size(), gen_random64));
|
MobilePhoneModel.resize(models.MobilePhoneModel.generate(&MobilePhoneModel[0], MobilePhoneModel.size(), gen_random64));
|
||||||
writeText(MobilePhoneModel, out);
|
writeText(MobilePhoneModel, out);
|
||||||
writeChar('\t', out);
|
writeChar('\t', out);
|
||||||
@ -348,12 +297,12 @@ struct Generator
|
|||||||
if (!SearchEngineID)
|
if (!SearchEngineID)
|
||||||
SearchPhrase.clear();
|
SearchPhrase.clear();
|
||||||
else
|
else
|
||||||
{
|
{*/
|
||||||
SearchPhrase.resize(1000);
|
SearchPhrase.resize(1000);
|
||||||
SearchPhrase.resize(models.NotEmptySearchPhrase.generate(&SearchPhrase[0], SearchPhrase.size(), gen_random64));
|
SearchPhrase.resize(models.SearchPhrase.generate(&SearchPhrase[0], SearchPhrase.size(), gen_random64));
|
||||||
}
|
// }
|
||||||
writeText(SearchPhrase, out);
|
writeText(SearchPhrase, out);
|
||||||
writeChar('\t', out);
|
/* writeChar('\t', out);
|
||||||
|
|
||||||
UInt8 AdvEngineID = weightedSelect<UInt8>(
|
UInt8 AdvEngineID = weightedSelect<UInt8>(
|
||||||
{0, 2, 12, 17, 18, 27, 34, 36}, {3000000, 30000, 3000, 30000, 1, 100, 40, 30});
|
{0, 2, 12, 17, 18, 27, 34, 36}, {3000000, 30000, 3000, 30000, 1, 100, 40, 30});
|
||||||
@ -370,11 +319,7 @@ struct Generator
|
|||||||
writeText(WindowClientWidth, out);
|
writeText(WindowClientWidth, out);
|
||||||
writeChar('\t', out);
|
writeChar('\t', out);
|
||||||
|
|
||||||
writeText(WindowClientHeight, out);
|
writeText(WindowClientHeight, out);*/
|
||||||
writeChar('\t', out);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
writeChar('\n', out);
|
writeChar('\n', out);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user