diff --git a/docs/en/getting-started/example-datasets/ontime.md b/docs/en/getting-started/example-datasets/ontime.md index 83673cdceb6..f18acc6fd50 100644 --- a/docs/en/getting-started/example-datasets/ontime.md +++ b/docs/en/getting-started/example-datasets/ontime.md @@ -21,120 +21,121 @@ echo https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performa Creating a table: ``` sql -CREATE TABLE `ontime` ( - `Year` UInt16, - `Quarter` UInt8, - `Month` UInt8, - `DayofMonth` UInt8, - `DayOfWeek` UInt8, - `FlightDate` Date, - `UniqueCarrier` FixedString(7), - `AirlineID` Int32, - `Carrier` FixedString(2), - `TailNum` String, - `FlightNum` String, - `OriginAirportID` Int32, - `OriginAirportSeqID` Int32, - `OriginCityMarketID` Int32, - `Origin` FixedString(5), - `OriginCityName` String, - `OriginState` FixedString(2), - `OriginStateFips` String, - `OriginStateName` String, - `OriginWac` Int32, - `DestAirportID` Int32, - `DestAirportSeqID` Int32, - `DestCityMarketID` Int32, - `Dest` FixedString(5), - `DestCityName` String, - `DestState` FixedString(2), - `DestStateFips` String, - `DestStateName` String, - `DestWac` Int32, - `CRSDepTime` Int32, - `DepTime` Int32, - `DepDelay` Int32, - `DepDelayMinutes` Int32, - `DepDel15` Int32, - `DepartureDelayGroups` String, - `DepTimeBlk` String, - `TaxiOut` Int32, - `WheelsOff` Int32, - `WheelsOn` Int32, - `TaxiIn` Int32, - `CRSArrTime` Int32, - `ArrTime` Int32, - `ArrDelay` Int32, - `ArrDelayMinutes` Int32, - `ArrDel15` Int32, - `ArrivalDelayGroups` Int32, - `ArrTimeBlk` String, - `Cancelled` UInt8, - `CancellationCode` FixedString(1), - `Diverted` UInt8, - `CRSElapsedTime` Int32, - `ActualElapsedTime` Int32, - `AirTime` Int32, - `Flights` Int32, - `Distance` Int32, - `DistanceGroup` UInt8, - `CarrierDelay` Int32, - `WeatherDelay` Int32, - `NASDelay` Int32, - `SecurityDelay` Int32, - `LateAircraftDelay` Int32, - `FirstDepTime` String, - `TotalAddGTime` String, - `LongestAddGTime` String, - `DivAirportLandings` String, - `DivReachedDest` String, - `DivActualElapsedTime` String, - `DivArrDelay` String, - `DivDistance` String, - `Div1Airport` String, - `Div1AirportID` Int32, - `Div1AirportSeqID` Int32, - `Div1WheelsOn` String, - `Div1TotalGTime` String, - `Div1LongestGTime` String, - `Div1WheelsOff` String, - `Div1TailNum` String, - `Div2Airport` String, - `Div2AirportID` Int32, - `Div2AirportSeqID` Int32, - `Div2WheelsOn` String, - `Div2TotalGTime` String, - `Div2LongestGTime` String, - `Div2WheelsOff` String, - `Div2TailNum` String, - `Div3Airport` String, - `Div3AirportID` Int32, - `Div3AirportSeqID` Int32, - `Div3WheelsOn` String, - `Div3TotalGTime` String, - `Div3LongestGTime` String, - `Div3WheelsOff` String, - `Div3TailNum` String, - `Div4Airport` String, - `Div4AirportID` Int32, - `Div4AirportSeqID` Int32, - `Div4WheelsOn` String, - `Div4TotalGTime` String, - `Div4LongestGTime` String, - `Div4WheelsOff` String, - `Div4TailNum` String, - `Div5Airport` String, - `Div5AirportID` Int32, - `Div5AirportSeqID` Int32, - `Div5WheelsOn` String, - `Div5TotalGTime` String, - `Div5LongestGTime` String, - `Div5WheelsOff` String, - `Div5TailNum` String +CREATE TABLE `ontime` +( + `Year` UInt16, + `Quarter` UInt8, + `Month` UInt8, + `DayofMonth` UInt8, + `DayOfWeek` UInt8, + `FlightDate` Date, + `Reporting_Airline` String, + `DOT_ID_Reporting_Airline` Int32, + `IATA_CODE_Reporting_Airline` String, + `Tail_Number` Int32, + `Flight_Number_Reporting_Airline` String, + `OriginAirportID` Int32, + `OriginAirportSeqID` Int32, + `OriginCityMarketID` Int32, + `Origin` FixedString(5), + `OriginCityName` String, + `OriginState` FixedString(2), + `OriginStateFips` String, + `OriginStateName` String, + `OriginWac` Int32, + `DestAirportID` Int32, + `DestAirportSeqID` Int32, + `DestCityMarketID` Int32, + `Dest` FixedString(5), + `DestCityName` String, + `DestState` FixedString(2), + `DestStateFips` String, + `DestStateName` String, + `DestWac` Int32, + `CRSDepTime` Int32, + `DepTime` Int32, + `DepDelay` Int32, + `DepDelayMinutes` Int32, + `DepDel15` Int32, + `DepartureDelayGroups` String, + `DepTimeBlk` String, + `TaxiOut` Int32, + `WheelsOff` Int32, + `WheelsOn` Int32, + `TaxiIn` Int32, + `CRSArrTime` Int32, + `ArrTime` Int32, + `ArrDelay` Int32, + `ArrDelayMinutes` Int32, + `ArrDel15` Int32, + `ArrivalDelayGroups` Int32, + `ArrTimeBlk` String, + `Cancelled` UInt8, + `CancellationCode` FixedString(1), + `Diverted` UInt8, + `CRSElapsedTime` Int32, + `ActualElapsedTime` Int32, + `AirTime` Nullable(Int32), + `Flights` Int32, + `Distance` Int32, + `DistanceGroup` UInt8, + `CarrierDelay` Int32, + `WeatherDelay` Int32, + `NASDelay` Int32, + `SecurityDelay` Int32, + `LateAircraftDelay` Int32, + `FirstDepTime` String, + `TotalAddGTime` String, + `LongestAddGTime` String, + `DivAirportLandings` String, + `DivReachedDest` String, + `DivActualElapsedTime` String, + `DivArrDelay` String, + `DivDistance` String, + `Div1Airport` String, + `Div1AirportID` Int32, + `Div1AirportSeqID` Int32, + `Div1WheelsOn` String, + `Div1TotalGTime` String, + `Div1LongestGTime` String, + `Div1WheelsOff` String, + `Div1TailNum` String, + `Div2Airport` String, + `Div2AirportID` Int32, + `Div2AirportSeqID` Int32, + `Div2WheelsOn` String, + `Div2TotalGTime` String, + `Div2LongestGTime` String, + `Div2WheelsOff` String, + `Div2TailNum` String, + `Div3Airport` String, + `Div3AirportID` Int32, + `Div3AirportSeqID` Int32, + `Div3WheelsOn` String, + `Div3TotalGTime` String, + `Div3LongestGTime` String, + `Div3WheelsOff` String, + `Div3TailNum` String, + `Div4Airport` String, + `Div4AirportID` Int32, + `Div4AirportSeqID` Int32, + `Div4WheelsOn` String, + `Div4TotalGTime` String, + `Div4LongestGTime` String, + `Div4WheelsOff` String, + `Div4TailNum` String, + `Div5Airport` String, + `Div5AirportID` Int32, + `Div5AirportSeqID` Int32, + `Div5WheelsOn` String, + `Div5TotalGTime` String, + `Div5LongestGTime` String, + `Div5WheelsOff` String, + `Div5TailNum` String ) ENGINE = MergeTree -PARTITION BY Year -ORDER BY (Carrier, FlightDate) -SETTINGS index_granularity = 8192; + PARTITION BY Year + ORDER BY (IATA_CODE_Reporting_Airline, FlightDate) + SETTINGS index_granularity = 8192; ``` Loading data with multiple threads: @@ -206,7 +207,7 @@ LIMIT 10; Q4. The number of delays by carrier for 2007 ``` sql -SELECT Carrier, count(*) +SELECT IATA_CODE_Reporting_Airline AS Carrier, count(*) FROM ontime WHERE DepDelay>10 AND Year=2007 GROUP BY Carrier @@ -220,29 +221,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3 FROM ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year=2007 GROUP BY Carrier -) +) q JOIN ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c2 FROM ontime WHERE Year=2007 GROUP BY Carrier -) USING Carrier +) qq USING Carrier ORDER BY c3 DESC; ``` Better version of the same query: ``` sql -SELECT Carrier, avg(DepDelay>10)*100 AS c3 +SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3 FROM ontime WHERE Year=2007 GROUP BY Carrier @@ -256,29 +257,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3 FROM ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year>=2000 AND Year<=2008 GROUP BY Carrier -) +) q JOIN ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c2 FROM ontime WHERE Year>=2000 AND Year<=2008 GROUP BY Carrier -) USING Carrier +) qq USING Carrier ORDER BY c3 DESC; ``` Better version of the same query: ``` sql -SELECT Carrier, avg(DepDelay>10)*100 AS c3 +SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3 FROM ontime WHERE Year>=2000 AND Year<=2008 GROUP BY Carrier @@ -297,7 +298,7 @@ FROM from ontime WHERE DepDelay>10 GROUP BY Year -) +) q JOIN ( select @@ -305,7 +306,7 @@ JOIN count(*) as c2 from ontime GROUP BY Year -) USING (Year) +) qq USING (Year) ORDER BY Year; ``` @@ -340,7 +341,7 @@ Q10. ``` sql SELECT - min(Year), max(Year), Carrier, count(*) AS cnt, + min(Year), max(Year), IATA_CODE_Reporting_Airline AS Carrier, count(*) AS cnt, sum(ArrDelayMinutes>30) AS flights_delayed, round(sum(ArrDelayMinutes>30)/count(*),2) AS rate FROM ontime diff --git a/docs/ja/getting-started/example-datasets/ontime.md b/docs/ja/getting-started/example-datasets/ontime.md index bd049e8caad..d12d8a36069 100644 --- a/docs/ja/getting-started/example-datasets/ontime.md +++ b/docs/ja/getting-started/example-datasets/ontime.md @@ -29,126 +29,127 @@ done テーブルの作成: ``` sql -CREATE TABLE `ontime` ( - `Year` UInt16, - `Quarter` UInt8, - `Month` UInt8, - `DayofMonth` UInt8, - `DayOfWeek` UInt8, - `FlightDate` Date, - `UniqueCarrier` FixedString(7), - `AirlineID` Int32, - `Carrier` FixedString(2), - `TailNum` String, - `FlightNum` String, - `OriginAirportID` Int32, - `OriginAirportSeqID` Int32, - `OriginCityMarketID` Int32, - `Origin` FixedString(5), - `OriginCityName` String, - `OriginState` FixedString(2), - `OriginStateFips` String, - `OriginStateName` String, - `OriginWac` Int32, - `DestAirportID` Int32, - `DestAirportSeqID` Int32, - `DestCityMarketID` Int32, - `Dest` FixedString(5), - `DestCityName` String, - `DestState` FixedString(2), - `DestStateFips` String, - `DestStateName` String, - `DestWac` Int32, - `CRSDepTime` Int32, - `DepTime` Int32, - `DepDelay` Int32, - `DepDelayMinutes` Int32, - `DepDel15` Int32, - `DepartureDelayGroups` String, - `DepTimeBlk` String, - `TaxiOut` Int32, - `WheelsOff` Int32, - `WheelsOn` Int32, - `TaxiIn` Int32, - `CRSArrTime` Int32, - `ArrTime` Int32, - `ArrDelay` Int32, - `ArrDelayMinutes` Int32, - `ArrDel15` Int32, - `ArrivalDelayGroups` Int32, - `ArrTimeBlk` String, - `Cancelled` UInt8, - `CancellationCode` FixedString(1), - `Diverted` UInt8, - `CRSElapsedTime` Int32, - `ActualElapsedTime` Int32, - `AirTime` Int32, - `Flights` Int32, - `Distance` Int32, - `DistanceGroup` UInt8, - `CarrierDelay` Int32, - `WeatherDelay` Int32, - `NASDelay` Int32, - `SecurityDelay` Int32, - `LateAircraftDelay` Int32, - `FirstDepTime` String, - `TotalAddGTime` String, - `LongestAddGTime` String, - `DivAirportLandings` String, - `DivReachedDest` String, - `DivActualElapsedTime` String, - `DivArrDelay` String, - `DivDistance` String, - `Div1Airport` String, - `Div1AirportID` Int32, - `Div1AirportSeqID` Int32, - `Div1WheelsOn` String, - `Div1TotalGTime` String, - `Div1LongestGTime` String, - `Div1WheelsOff` String, - `Div1TailNum` String, - `Div2Airport` String, - `Div2AirportID` Int32, - `Div2AirportSeqID` Int32, - `Div2WheelsOn` String, - `Div2TotalGTime` String, - `Div2LongestGTime` String, - `Div2WheelsOff` String, - `Div2TailNum` String, - `Div3Airport` String, - `Div3AirportID` Int32, - `Div3AirportSeqID` Int32, - `Div3WheelsOn` String, - `Div3TotalGTime` String, - `Div3LongestGTime` String, - `Div3WheelsOff` String, - `Div3TailNum` String, - `Div4Airport` String, - `Div4AirportID` Int32, - `Div4AirportSeqID` Int32, - `Div4WheelsOn` String, - `Div4TotalGTime` String, - `Div4LongestGTime` String, - `Div4WheelsOff` String, - `Div4TailNum` String, - `Div5Airport` String, - `Div5AirportID` Int32, - `Div5AirportSeqID` Int32, - `Div5WheelsOn` String, - `Div5TotalGTime` String, - `Div5LongestGTime` String, - `Div5WheelsOff` String, - `Div5TailNum` String +CREATE TABLE `ontime` +( + `Year` UInt16, + `Quarter` UInt8, + `Month` UInt8, + `DayofMonth` UInt8, + `DayOfWeek` UInt8, + `FlightDate` Date, + `Reporting_Airline` String, + `DOT_ID_Reporting_Airline` Int32, + `IATA_CODE_Reporting_Airline` String, + `Tail_Number` Int32, + `Flight_Number_Reporting_Airline` String, + `OriginAirportID` Int32, + `OriginAirportSeqID` Int32, + `OriginCityMarketID` Int32, + `Origin` FixedString(5), + `OriginCityName` String, + `OriginState` FixedString(2), + `OriginStateFips` String, + `OriginStateName` String, + `OriginWac` Int32, + `DestAirportID` Int32, + `DestAirportSeqID` Int32, + `DestCityMarketID` Int32, + `Dest` FixedString(5), + `DestCityName` String, + `DestState` FixedString(2), + `DestStateFips` String, + `DestStateName` String, + `DestWac` Int32, + `CRSDepTime` Int32, + `DepTime` Int32, + `DepDelay` Int32, + `DepDelayMinutes` Int32, + `DepDel15` Int32, + `DepartureDelayGroups` String, + `DepTimeBlk` String, + `TaxiOut` Int32, + `WheelsOff` Int32, + `WheelsOn` Int32, + `TaxiIn` Int32, + `CRSArrTime` Int32, + `ArrTime` Int32, + `ArrDelay` Int32, + `ArrDelayMinutes` Int32, + `ArrDel15` Int32, + `ArrivalDelayGroups` Int32, + `ArrTimeBlk` String, + `Cancelled` UInt8, + `CancellationCode` FixedString(1), + `Diverted` UInt8, + `CRSElapsedTime` Int32, + `ActualElapsedTime` Int32, + `AirTime` Nullable(Int32), + `Flights` Int32, + `Distance` Int32, + `DistanceGroup` UInt8, + `CarrierDelay` Int32, + `WeatherDelay` Int32, + `NASDelay` Int32, + `SecurityDelay` Int32, + `LateAircraftDelay` Int32, + `FirstDepTime` String, + `TotalAddGTime` String, + `LongestAddGTime` String, + `DivAirportLandings` String, + `DivReachedDest` String, + `DivActualElapsedTime` String, + `DivArrDelay` String, + `DivDistance` String, + `Div1Airport` String, + `Div1AirportID` Int32, + `Div1AirportSeqID` Int32, + `Div1WheelsOn` String, + `Div1TotalGTime` String, + `Div1LongestGTime` String, + `Div1WheelsOff` String, + `Div1TailNum` String, + `Div2Airport` String, + `Div2AirportID` Int32, + `Div2AirportSeqID` Int32, + `Div2WheelsOn` String, + `Div2TotalGTime` String, + `Div2LongestGTime` String, + `Div2WheelsOff` String, + `Div2TailNum` String, + `Div3Airport` String, + `Div3AirportID` Int32, + `Div3AirportSeqID` Int32, + `Div3WheelsOn` String, + `Div3TotalGTime` String, + `Div3LongestGTime` String, + `Div3WheelsOff` String, + `Div3TailNum` String, + `Div4Airport` String, + `Div4AirportID` Int32, + `Div4AirportSeqID` Int32, + `Div4WheelsOn` String, + `Div4TotalGTime` String, + `Div4LongestGTime` String, + `Div4WheelsOff` String, + `Div4TailNum` String, + `Div5Airport` String, + `Div5AirportID` Int32, + `Div5AirportSeqID` Int32, + `Div5WheelsOn` String, + `Div5TotalGTime` String, + `Div5LongestGTime` String, + `Div5WheelsOff` String, + `Div5TailNum` String ) ENGINE = MergeTree -PARTITION BY Year -ORDER BY (Carrier, FlightDate) -SETTINGS index_granularity = 8192; + PARTITION BY Year + ORDER BY (IATA_CODE_Reporting_Airline, FlightDate) + SETTINGS index_granularity = 8192; ``` データのロード: ``` bash -$ for i in *.zip; do echo $i; unzip -cq $i '*.csv' | sed 's/\.00//g' | clickhouse-client --host=example-perftest01j --query="INSERT INTO ontime FORMAT CSVWithNames"; done +ls -1 *.zip | xargs -I{} -P $(nproc) bash -c "echo {}; unzip -cq {} '*.csv' | sed 's/\.00//g' | clickhouse-client --input_format_with_names_use_header=0 --query='INSERT INTO ontime FORMAT CSVWithNames'" ``` ## パーティション済みデータのダウンロード {#download-of-prepared-partitions} @@ -212,10 +213,10 @@ LIMIT 10; Q4. 2007年のキャリア別の遅延の数 ``` sql -SELECT Carrier, count(*) +SELECT IATA_CODE_Reporting_Airline AS Carrier, count(*) FROM ontime WHERE DepDelay>10 AND Year=2007 -GROUP BY Carrier +GROUP BY IATA_CODE_Reporting_Airline ORDER BY count(*) DESC; ``` @@ -226,32 +227,32 @@ SELECT Carrier, c, c2, c*100/c2 as c3 FROM ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year=2007 GROUP BY Carrier -) +) q JOIN ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c2 FROM ontime WHERE Year=2007 GROUP BY Carrier -) USING Carrier +) qq USING Carrier ORDER BY c3 DESC; ``` 同じクエリのより良いバージョン: ``` sql -SELECT Carrier, avg(DepDelay>10)*100 AS c3 +SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3 FROM ontime WHERE Year=2007 -GROUP BY Carrier +GROUP BY IATA_CODE_Reporting_Airline ORDER BY c3 DESC ``` @@ -262,29 +263,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3 FROM ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year>=2000 AND Year<=2008 GROUP BY Carrier -) +) q JOIN ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c2 FROM ontime WHERE Year>=2000 AND Year<=2008 GROUP BY Carrier -) USING Carrier +) qq USING Carrier ORDER BY c3 DESC; ``` 同じクエリのより良いバージョン: ``` sql -SELECT Carrier, avg(DepDelay>10)*100 AS c3 +SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3 FROM ontime WHERE Year>=2000 AND Year<=2008 GROUP BY Carrier @@ -303,7 +304,7 @@ FROM from ontime WHERE DepDelay>10 GROUP BY Year -) +) q JOIN ( select @@ -311,7 +312,7 @@ JOIN count(*) as c2 from ontime GROUP BY Year -) USING (Year) +) qq USING (Year) ORDER BY Year; ``` @@ -346,7 +347,7 @@ Q10. ``` sql SELECT - min(Year), max(Year), Carrier, count(*) AS cnt, + min(Year), max(Year), IATA_CODE_Reporting_Airline AS Carrier, count(*) AS cnt, sum(ArrDelayMinutes>30) AS flights_delayed, round(sum(ArrDelayMinutes>30)/count(*),2) AS rate FROM ontime diff --git a/docs/ru/getting-started/example-datasets/ontime.md b/docs/ru/getting-started/example-datasets/ontime.md index be5b1cd1b70..d46b7e75e7f 100644 --- a/docs/ru/getting-started/example-datasets/ontime.md +++ b/docs/ru/getting-started/example-datasets/ontime.md @@ -27,126 +27,127 @@ done Создание таблицы: ``` sql -CREATE TABLE `ontime` ( - `Year` UInt16, - `Quarter` UInt8, - `Month` UInt8, - `DayofMonth` UInt8, - `DayOfWeek` UInt8, - `FlightDate` Date, - `UniqueCarrier` FixedString(7), - `AirlineID` Int32, - `Carrier` FixedString(2), - `TailNum` String, - `FlightNum` String, - `OriginAirportID` Int32, - `OriginAirportSeqID` Int32, - `OriginCityMarketID` Int32, - `Origin` FixedString(5), - `OriginCityName` String, - `OriginState` FixedString(2), - `OriginStateFips` String, - `OriginStateName` String, - `OriginWac` Int32, - `DestAirportID` Int32, - `DestAirportSeqID` Int32, - `DestCityMarketID` Int32, - `Dest` FixedString(5), - `DestCityName` String, - `DestState` FixedString(2), - `DestStateFips` String, - `DestStateName` String, - `DestWac` Int32, - `CRSDepTime` Int32, - `DepTime` Int32, - `DepDelay` Int32, - `DepDelayMinutes` Int32, - `DepDel15` Int32, - `DepartureDelayGroups` String, - `DepTimeBlk` String, - `TaxiOut` Int32, - `WheelsOff` Int32, - `WheelsOn` Int32, - `TaxiIn` Int32, - `CRSArrTime` Int32, - `ArrTime` Int32, - `ArrDelay` Int32, - `ArrDelayMinutes` Int32, - `ArrDel15` Int32, - `ArrivalDelayGroups` Int32, - `ArrTimeBlk` String, - `Cancelled` UInt8, - `CancellationCode` FixedString(1), - `Diverted` UInt8, - `CRSElapsedTime` Int32, - `ActualElapsedTime` Int32, - `AirTime` Int32, - `Flights` Int32, - `Distance` Int32, - `DistanceGroup` UInt8, - `CarrierDelay` Int32, - `WeatherDelay` Int32, - `NASDelay` Int32, - `SecurityDelay` Int32, - `LateAircraftDelay` Int32, - `FirstDepTime` String, - `TotalAddGTime` String, - `LongestAddGTime` String, - `DivAirportLandings` String, - `DivReachedDest` String, - `DivActualElapsedTime` String, - `DivArrDelay` String, - `DivDistance` String, - `Div1Airport` String, - `Div1AirportID` Int32, - `Div1AirportSeqID` Int32, - `Div1WheelsOn` String, - `Div1TotalGTime` String, - `Div1LongestGTime` String, - `Div1WheelsOff` String, - `Div1TailNum` String, - `Div2Airport` String, - `Div2AirportID` Int32, - `Div2AirportSeqID` Int32, - `Div2WheelsOn` String, - `Div2TotalGTime` String, - `Div2LongestGTime` String, - `Div2WheelsOff` String, - `Div2TailNum` String, - `Div3Airport` String, - `Div3AirportID` Int32, - `Div3AirportSeqID` Int32, - `Div3WheelsOn` String, - `Div3TotalGTime` String, - `Div3LongestGTime` String, - `Div3WheelsOff` String, - `Div3TailNum` String, - `Div4Airport` String, - `Div4AirportID` Int32, - `Div4AirportSeqID` Int32, - `Div4WheelsOn` String, - `Div4TotalGTime` String, - `Div4LongestGTime` String, - `Div4WheelsOff` String, - `Div4TailNum` String, - `Div5Airport` String, - `Div5AirportID` Int32, - `Div5AirportSeqID` Int32, - `Div5WheelsOn` String, - `Div5TotalGTime` String, - `Div5LongestGTime` String, - `Div5WheelsOff` String, - `Div5TailNum` String +CREATE TABLE `ontime` +( + `Year` UInt16, + `Quarter` UInt8, + `Month` UInt8, + `DayofMonth` UInt8, + `DayOfWeek` UInt8, + `FlightDate` Date, + `Reporting_Airline` String, + `DOT_ID_Reporting_Airline` Int32, + `IATA_CODE_Reporting_Airline` String, + `Tail_Number` Int32, + `Flight_Number_Reporting_Airline` String, + `OriginAirportID` Int32, + `OriginAirportSeqID` Int32, + `OriginCityMarketID` Int32, + `Origin` FixedString(5), + `OriginCityName` String, + `OriginState` FixedString(2), + `OriginStateFips` String, + `OriginStateName` String, + `OriginWac` Int32, + `DestAirportID` Int32, + `DestAirportSeqID` Int32, + `DestCityMarketID` Int32, + `Dest` FixedString(5), + `DestCityName` String, + `DestState` FixedString(2), + `DestStateFips` String, + `DestStateName` String, + `DestWac` Int32, + `CRSDepTime` Int32, + `DepTime` Int32, + `DepDelay` Int32, + `DepDelayMinutes` Int32, + `DepDel15` Int32, + `DepartureDelayGroups` String, + `DepTimeBlk` String, + `TaxiOut` Int32, + `WheelsOff` Int32, + `WheelsOn` Int32, + `TaxiIn` Int32, + `CRSArrTime` Int32, + `ArrTime` Int32, + `ArrDelay` Int32, + `ArrDelayMinutes` Int32, + `ArrDel15` Int32, + `ArrivalDelayGroups` Int32, + `ArrTimeBlk` String, + `Cancelled` UInt8, + `CancellationCode` FixedString(1), + `Diverted` UInt8, + `CRSElapsedTime` Int32, + `ActualElapsedTime` Int32, + `AirTime` Nullable(Int32), + `Flights` Int32, + `Distance` Int32, + `DistanceGroup` UInt8, + `CarrierDelay` Int32, + `WeatherDelay` Int32, + `NASDelay` Int32, + `SecurityDelay` Int32, + `LateAircraftDelay` Int32, + `FirstDepTime` String, + `TotalAddGTime` String, + `LongestAddGTime` String, + `DivAirportLandings` String, + `DivReachedDest` String, + `DivActualElapsedTime` String, + `DivArrDelay` String, + `DivDistance` String, + `Div1Airport` String, + `Div1AirportID` Int32, + `Div1AirportSeqID` Int32, + `Div1WheelsOn` String, + `Div1TotalGTime` String, + `Div1LongestGTime` String, + `Div1WheelsOff` String, + `Div1TailNum` String, + `Div2Airport` String, + `Div2AirportID` Int32, + `Div2AirportSeqID` Int32, + `Div2WheelsOn` String, + `Div2TotalGTime` String, + `Div2LongestGTime` String, + `Div2WheelsOff` String, + `Div2TailNum` String, + `Div3Airport` String, + `Div3AirportID` Int32, + `Div3AirportSeqID` Int32, + `Div3WheelsOn` String, + `Div3TotalGTime` String, + `Div3LongestGTime` String, + `Div3WheelsOff` String, + `Div3TailNum` String, + `Div4Airport` String, + `Div4AirportID` Int32, + `Div4AirportSeqID` Int32, + `Div4WheelsOn` String, + `Div4TotalGTime` String, + `Div4LongestGTime` String, + `Div4WheelsOff` String, + `Div4TailNum` String, + `Div5Airport` String, + `Div5AirportID` Int32, + `Div5AirportSeqID` Int32, + `Div5WheelsOn` String, + `Div5TotalGTime` String, + `Div5LongestGTime` String, + `Div5WheelsOff` String, + `Div5TailNum` String ) ENGINE = MergeTree -PARTITION BY Year -ORDER BY (Carrier, FlightDate) -SETTINGS index_granularity = 8192; + PARTITION BY Year + ORDER BY (IATA_CODE_Reporting_Airline, FlightDate) + SETTINGS index_granularity = 8192; ``` Загрузка данных: ``` bash -$ for i in *.zip; do echo $i; unzip -cq $i '*.csv' | sed 's/\.00//g' | clickhouse-client --host=example-perftest01j --query="INSERT INTO ontime FORMAT CSVWithNames"; done +ls -1 *.zip | xargs -I{} -P $(nproc) bash -c "echo {}; unzip -cq {} '*.csv' | sed 's/\.00//g' | clickhouse-client --input_format_with_names_use_header=0 --query='INSERT INTO ontime FORMAT CSVWithNames'" ``` ## Скачивание готовых партиций {#skachivanie-gotovykh-partitsii} @@ -211,7 +212,7 @@ LIMIT 10; Q4. Количество задержек по перевозчикам за 2007 год ``` sql -SELECT Carrier, count(*) +SELECT IATA_CODE_Reporting_Airline AS Carrier, count(*) FROM ontime WHERE DepDelay>10 AND Year=2007 GROUP BY Carrier @@ -225,29 +226,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3 FROM ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year=2007 GROUP BY Carrier -) +) q JOIN ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c2 FROM ontime WHERE Year=2007 GROUP BY Carrier -) USING Carrier +) qq USING Carrier ORDER BY c3 DESC; ``` Более оптимальная версия того же запроса: ``` sql -SELECT Carrier, avg(DepDelay>10)*100 AS c3 +SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3 FROM ontime WHERE Year=2007 GROUP BY Carrier @@ -261,29 +262,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3 FROM ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year>=2000 AND Year<=2008 GROUP BY Carrier -) +) q JOIN ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c2 FROM ontime WHERE Year>=2000 AND Year<=2008 GROUP BY Carrier -) USING Carrier +) qq USING Carrier ORDER BY c3 DESC; ``` Более оптимальная версия того же запроса: ``` sql -SELECT Carrier, avg(DepDelay>10)*100 AS c3 +SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3 FROM ontime WHERE Year>=2000 AND Year<=2008 GROUP BY Carrier @@ -302,7 +303,7 @@ FROM from ontime WHERE DepDelay>10 GROUP BY Year -) +) q JOIN ( select @@ -310,7 +311,7 @@ JOIN count(*) as c2 from ontime GROUP BY Year -) USING (Year) +) qq USING (Year) ORDER BY Year; ``` @@ -346,7 +347,7 @@ Q10. ``` sql SELECT - min(Year), max(Year), Carrier, count(*) AS cnt, + min(Year), max(Year), IATA_CODE_Reporting_Airline AS Carrier, count(*) AS cnt, sum(ArrDelayMinutes>30) AS flights_delayed, round(sum(ArrDelayMinutes>30)/count(*),2) AS rate FROM ontime diff --git a/docs/zh/getting-started/example-datasets/ontime.md b/docs/zh/getting-started/example-datasets/ontime.md index 3921f71fc7e..6d888b2196c 100644 --- a/docs/zh/getting-started/example-datasets/ontime.md +++ b/docs/zh/getting-started/example-datasets/ontime.md @@ -29,126 +29,127 @@ done 创建表结构: ``` sql -CREATE TABLE `ontime` ( - `Year` UInt16, - `Quarter` UInt8, - `Month` UInt8, - `DayofMonth` UInt8, - `DayOfWeek` UInt8, - `FlightDate` Date, - `UniqueCarrier` FixedString(7), - `AirlineID` Int32, - `Carrier` FixedString(2), - `TailNum` String, - `FlightNum` String, - `OriginAirportID` Int32, - `OriginAirportSeqID` Int32, - `OriginCityMarketID` Int32, - `Origin` FixedString(5), - `OriginCityName` String, - `OriginState` FixedString(2), - `OriginStateFips` String, - `OriginStateName` String, - `OriginWac` Int32, - `DestAirportID` Int32, - `DestAirportSeqID` Int32, - `DestCityMarketID` Int32, - `Dest` FixedString(5), - `DestCityName` String, - `DestState` FixedString(2), - `DestStateFips` String, - `DestStateName` String, - `DestWac` Int32, - `CRSDepTime` Int32, - `DepTime` Int32, - `DepDelay` Int32, - `DepDelayMinutes` Int32, - `DepDel15` Int32, - `DepartureDelayGroups` String, - `DepTimeBlk` String, - `TaxiOut` Int32, - `WheelsOff` Int32, - `WheelsOn` Int32, - `TaxiIn` Int32, - `CRSArrTime` Int32, - `ArrTime` Int32, - `ArrDelay` Int32, - `ArrDelayMinutes` Int32, - `ArrDel15` Int32, - `ArrivalDelayGroups` Int32, - `ArrTimeBlk` String, - `Cancelled` UInt8, - `CancellationCode` FixedString(1), - `Diverted` UInt8, - `CRSElapsedTime` Int32, - `ActualElapsedTime` Int32, - `AirTime` Int32, - `Flights` Int32, - `Distance` Int32, - `DistanceGroup` UInt8, - `CarrierDelay` Int32, - `WeatherDelay` Int32, - `NASDelay` Int32, - `SecurityDelay` Int32, - `LateAircraftDelay` Int32, - `FirstDepTime` String, - `TotalAddGTime` String, - `LongestAddGTime` String, - `DivAirportLandings` String, - `DivReachedDest` String, - `DivActualElapsedTime` String, - `DivArrDelay` String, - `DivDistance` String, - `Div1Airport` String, - `Div1AirportID` Int32, - `Div1AirportSeqID` Int32, - `Div1WheelsOn` String, - `Div1TotalGTime` String, - `Div1LongestGTime` String, - `Div1WheelsOff` String, - `Div1TailNum` String, - `Div2Airport` String, - `Div2AirportID` Int32, - `Div2AirportSeqID` Int32, - `Div2WheelsOn` String, - `Div2TotalGTime` String, - `Div2LongestGTime` String, - `Div2WheelsOff` String, - `Div2TailNum` String, - `Div3Airport` String, - `Div3AirportID` Int32, - `Div3AirportSeqID` Int32, - `Div3WheelsOn` String, - `Div3TotalGTime` String, - `Div3LongestGTime` String, - `Div3WheelsOff` String, - `Div3TailNum` String, - `Div4Airport` String, - `Div4AirportID` Int32, - `Div4AirportSeqID` Int32, - `Div4WheelsOn` String, - `Div4TotalGTime` String, - `Div4LongestGTime` String, - `Div4WheelsOff` String, - `Div4TailNum` String, - `Div5Airport` String, - `Div5AirportID` Int32, - `Div5AirportSeqID` Int32, - `Div5WheelsOn` String, - `Div5TotalGTime` String, - `Div5LongestGTime` String, - `Div5WheelsOff` String, - `Div5TailNum` String +CREATE TABLE `ontime` +( + `Year` UInt16, + `Quarter` UInt8, + `Month` UInt8, + `DayofMonth` UInt8, + `DayOfWeek` UInt8, + `FlightDate` Date, + `Reporting_Airline` String, + `DOT_ID_Reporting_Airline` Int32, + `IATA_CODE_Reporting_Airline` String, + `Tail_Number` Int32, + `Flight_Number_Reporting_Airline` String, + `OriginAirportID` Int32, + `OriginAirportSeqID` Int32, + `OriginCityMarketID` Int32, + `Origin` FixedString(5), + `OriginCityName` String, + `OriginState` FixedString(2), + `OriginStateFips` String, + `OriginStateName` String, + `OriginWac` Int32, + `DestAirportID` Int32, + `DestAirportSeqID` Int32, + `DestCityMarketID` Int32, + `Dest` FixedString(5), + `DestCityName` String, + `DestState` FixedString(2), + `DestStateFips` String, + `DestStateName` String, + `DestWac` Int32, + `CRSDepTime` Int32, + `DepTime` Int32, + `DepDelay` Int32, + `DepDelayMinutes` Int32, + `DepDel15` Int32, + `DepartureDelayGroups` String, + `DepTimeBlk` String, + `TaxiOut` Int32, + `WheelsOff` Int32, + `WheelsOn` Int32, + `TaxiIn` Int32, + `CRSArrTime` Int32, + `ArrTime` Int32, + `ArrDelay` Int32, + `ArrDelayMinutes` Int32, + `ArrDel15` Int32, + `ArrivalDelayGroups` Int32, + `ArrTimeBlk` String, + `Cancelled` UInt8, + `CancellationCode` FixedString(1), + `Diverted` UInt8, + `CRSElapsedTime` Int32, + `ActualElapsedTime` Int32, + `AirTime` Nullable(Int32), + `Flights` Int32, + `Distance` Int32, + `DistanceGroup` UInt8, + `CarrierDelay` Int32, + `WeatherDelay` Int32, + `NASDelay` Int32, + `SecurityDelay` Int32, + `LateAircraftDelay` Int32, + `FirstDepTime` String, + `TotalAddGTime` String, + `LongestAddGTime` String, + `DivAirportLandings` String, + `DivReachedDest` String, + `DivActualElapsedTime` String, + `DivArrDelay` String, + `DivDistance` String, + `Div1Airport` String, + `Div1AirportID` Int32, + `Div1AirportSeqID` Int32, + `Div1WheelsOn` String, + `Div1TotalGTime` String, + `Div1LongestGTime` String, + `Div1WheelsOff` String, + `Div1TailNum` String, + `Div2Airport` String, + `Div2AirportID` Int32, + `Div2AirportSeqID` Int32, + `Div2WheelsOn` String, + `Div2TotalGTime` String, + `Div2LongestGTime` String, + `Div2WheelsOff` String, + `Div2TailNum` String, + `Div3Airport` String, + `Div3AirportID` Int32, + `Div3AirportSeqID` Int32, + `Div3WheelsOn` String, + `Div3TotalGTime` String, + `Div3LongestGTime` String, + `Div3WheelsOff` String, + `Div3TailNum` String, + `Div4Airport` String, + `Div4AirportID` Int32, + `Div4AirportSeqID` Int32, + `Div4WheelsOn` String, + `Div4TotalGTime` String, + `Div4LongestGTime` String, + `Div4WheelsOff` String, + `Div4TailNum` String, + `Div5Airport` String, + `Div5AirportID` Int32, + `Div5AirportSeqID` Int32, + `Div5WheelsOn` String, + `Div5TotalGTime` String, + `Div5LongestGTime` String, + `Div5WheelsOff` String, + `Div5TailNum` String ) ENGINE = MergeTree -PARTITION BY Year -ORDER BY (Carrier, FlightDate) -SETTINGS index_granularity = 8192; + PARTITION BY Year + ORDER BY (IATA_CODE_Reporting_Airline, FlightDate) + SETTINGS index_granularity = 8192; ``` 加载数据: ``` bash -$ for i in *.zip; do echo $i; unzip -cq $i '*.csv' | sed 's/\.00//g' | clickhouse-client --host=example-perftest01j --query="INSERT INTO ontime FORMAT CSVWithNames"; done +ls -1 *.zip | xargs -I{} -P $(nproc) bash -c "echo {}; unzip -cq {} '*.csv' | sed 's/\.00//g' | clickhouse-client --input_format_with_names_use_header=0 --query='INSERT INTO ontime FORMAT CSVWithNames'" ``` ## 下载预处理好的分区数据 {#xia-zai-yu-chu-li-hao-de-fen-qu-shu-ju} @@ -212,7 +213,7 @@ LIMIT 10; Q4. 查询2007年各航空公司延误超过10分钟以上的次数 ``` sql -SELECT Carrier, count(*) +SELECT IATA_CODE_Reporting_Airline AS Carrier, count(*) FROM ontime WHERE DepDelay>10 AND Year=2007 GROUP BY Carrier @@ -226,29 +227,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3 FROM ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year=2007 GROUP BY Carrier -) +) q JOIN ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c2 FROM ontime WHERE Year=2007 GROUP BY Carrier -) USING Carrier +) qq USING Carrier ORDER BY c3 DESC; ``` 更好的查询版本: ``` sql -SELECT Carrier, avg(DepDelay>10)*100 AS c3 +SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3 FROM ontime WHERE Year=2007 GROUP BY Carrier @@ -262,29 +263,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3 FROM ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year>=2000 AND Year<=2008 GROUP BY Carrier -) +) q JOIN ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c2 FROM ontime WHERE Year>=2000 AND Year<=2008 GROUP BY Carrier -) USING Carrier +) qq USING Carrier ORDER BY c3 DESC; ``` 更好的查询版本: ``` sql -SELECT Carrier, avg(DepDelay>10)*100 AS c3 +SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3 FROM ontime WHERE Year>=2000 AND Year<=2008 GROUP BY Carrier @@ -303,7 +304,7 @@ FROM from ontime WHERE DepDelay>10 GROUP BY Year -) +) q JOIN ( select @@ -311,7 +312,7 @@ JOIN count(*) as c2 from ontime GROUP BY Year -) USING (Year) +) qq USING (Year) ORDER BY Year; ``` @@ -346,7 +347,7 @@ Q10. ``` sql SELECT - min(Year), max(Year), Carrier, count(*) AS cnt, + min(Year), max(Year), IATA_CODE_Reporting_Airline AS Carrier, count(*) AS cnt, sum(ArrDelayMinutes>30) AS flights_delayed, round(sum(ArrDelayMinutes>30)/count(*),2) AS rate FROM ontime