mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
commit
6f08f945e8
@ -21,120 +21,121 @@ echo https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performa
|
|||||||
Creating a table:
|
Creating a table:
|
||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
CREATE TABLE `ontime` (
|
CREATE TABLE `ontime`
|
||||||
`Year` UInt16,
|
(
|
||||||
`Quarter` UInt8,
|
`Year` UInt16,
|
||||||
`Month` UInt8,
|
`Quarter` UInt8,
|
||||||
`DayofMonth` UInt8,
|
`Month` UInt8,
|
||||||
`DayOfWeek` UInt8,
|
`DayofMonth` UInt8,
|
||||||
`FlightDate` Date,
|
`DayOfWeek` UInt8,
|
||||||
`UniqueCarrier` FixedString(7),
|
`FlightDate` Date,
|
||||||
`AirlineID` Int32,
|
`Reporting_Airline` String,
|
||||||
`Carrier` FixedString(2),
|
`DOT_ID_Reporting_Airline` Int32,
|
||||||
`TailNum` String,
|
`IATA_CODE_Reporting_Airline` String,
|
||||||
`FlightNum` String,
|
`Tail_Number` Int32,
|
||||||
`OriginAirportID` Int32,
|
`Flight_Number_Reporting_Airline` String,
|
||||||
`OriginAirportSeqID` Int32,
|
`OriginAirportID` Int32,
|
||||||
`OriginCityMarketID` Int32,
|
`OriginAirportSeqID` Int32,
|
||||||
`Origin` FixedString(5),
|
`OriginCityMarketID` Int32,
|
||||||
`OriginCityName` String,
|
`Origin` FixedString(5),
|
||||||
`OriginState` FixedString(2),
|
`OriginCityName` String,
|
||||||
`OriginStateFips` String,
|
`OriginState` FixedString(2),
|
||||||
`OriginStateName` String,
|
`OriginStateFips` String,
|
||||||
`OriginWac` Int32,
|
`OriginStateName` String,
|
||||||
`DestAirportID` Int32,
|
`OriginWac` Int32,
|
||||||
`DestAirportSeqID` Int32,
|
`DestAirportID` Int32,
|
||||||
`DestCityMarketID` Int32,
|
`DestAirportSeqID` Int32,
|
||||||
`Dest` FixedString(5),
|
`DestCityMarketID` Int32,
|
||||||
`DestCityName` String,
|
`Dest` FixedString(5),
|
||||||
`DestState` FixedString(2),
|
`DestCityName` String,
|
||||||
`DestStateFips` String,
|
`DestState` FixedString(2),
|
||||||
`DestStateName` String,
|
`DestStateFips` String,
|
||||||
`DestWac` Int32,
|
`DestStateName` String,
|
||||||
`CRSDepTime` Int32,
|
`DestWac` Int32,
|
||||||
`DepTime` Int32,
|
`CRSDepTime` Int32,
|
||||||
`DepDelay` Int32,
|
`DepTime` Int32,
|
||||||
`DepDelayMinutes` Int32,
|
`DepDelay` Int32,
|
||||||
`DepDel15` Int32,
|
`DepDelayMinutes` Int32,
|
||||||
`DepartureDelayGroups` String,
|
`DepDel15` Int32,
|
||||||
`DepTimeBlk` String,
|
`DepartureDelayGroups` String,
|
||||||
`TaxiOut` Int32,
|
`DepTimeBlk` String,
|
||||||
`WheelsOff` Int32,
|
`TaxiOut` Int32,
|
||||||
`WheelsOn` Int32,
|
`WheelsOff` Int32,
|
||||||
`TaxiIn` Int32,
|
`WheelsOn` Int32,
|
||||||
`CRSArrTime` Int32,
|
`TaxiIn` Int32,
|
||||||
`ArrTime` Int32,
|
`CRSArrTime` Int32,
|
||||||
`ArrDelay` Int32,
|
`ArrTime` Int32,
|
||||||
`ArrDelayMinutes` Int32,
|
`ArrDelay` Int32,
|
||||||
`ArrDel15` Int32,
|
`ArrDelayMinutes` Int32,
|
||||||
`ArrivalDelayGroups` Int32,
|
`ArrDel15` Int32,
|
||||||
`ArrTimeBlk` String,
|
`ArrivalDelayGroups` Int32,
|
||||||
`Cancelled` UInt8,
|
`ArrTimeBlk` String,
|
||||||
`CancellationCode` FixedString(1),
|
`Cancelled` UInt8,
|
||||||
`Diverted` UInt8,
|
`CancellationCode` FixedString(1),
|
||||||
`CRSElapsedTime` Int32,
|
`Diverted` UInt8,
|
||||||
`ActualElapsedTime` Int32,
|
`CRSElapsedTime` Int32,
|
||||||
`AirTime` Int32,
|
`ActualElapsedTime` Int32,
|
||||||
`Flights` Int32,
|
`AirTime` Nullable(Int32),
|
||||||
`Distance` Int32,
|
`Flights` Int32,
|
||||||
`DistanceGroup` UInt8,
|
`Distance` Int32,
|
||||||
`CarrierDelay` Int32,
|
`DistanceGroup` UInt8,
|
||||||
`WeatherDelay` Int32,
|
`CarrierDelay` Int32,
|
||||||
`NASDelay` Int32,
|
`WeatherDelay` Int32,
|
||||||
`SecurityDelay` Int32,
|
`NASDelay` Int32,
|
||||||
`LateAircraftDelay` Int32,
|
`SecurityDelay` Int32,
|
||||||
`FirstDepTime` String,
|
`LateAircraftDelay` Int32,
|
||||||
`TotalAddGTime` String,
|
`FirstDepTime` String,
|
||||||
`LongestAddGTime` String,
|
`TotalAddGTime` String,
|
||||||
`DivAirportLandings` String,
|
`LongestAddGTime` String,
|
||||||
`DivReachedDest` String,
|
`DivAirportLandings` String,
|
||||||
`DivActualElapsedTime` String,
|
`DivReachedDest` String,
|
||||||
`DivArrDelay` String,
|
`DivActualElapsedTime` String,
|
||||||
`DivDistance` String,
|
`DivArrDelay` String,
|
||||||
`Div1Airport` String,
|
`DivDistance` String,
|
||||||
`Div1AirportID` Int32,
|
`Div1Airport` String,
|
||||||
`Div1AirportSeqID` Int32,
|
`Div1AirportID` Int32,
|
||||||
`Div1WheelsOn` String,
|
`Div1AirportSeqID` Int32,
|
||||||
`Div1TotalGTime` String,
|
`Div1WheelsOn` String,
|
||||||
`Div1LongestGTime` String,
|
`Div1TotalGTime` String,
|
||||||
`Div1WheelsOff` String,
|
`Div1LongestGTime` String,
|
||||||
`Div1TailNum` String,
|
`Div1WheelsOff` String,
|
||||||
`Div2Airport` String,
|
`Div1TailNum` String,
|
||||||
`Div2AirportID` Int32,
|
`Div2Airport` String,
|
||||||
`Div2AirportSeqID` Int32,
|
`Div2AirportID` Int32,
|
||||||
`Div2WheelsOn` String,
|
`Div2AirportSeqID` Int32,
|
||||||
`Div2TotalGTime` String,
|
`Div2WheelsOn` String,
|
||||||
`Div2LongestGTime` String,
|
`Div2TotalGTime` String,
|
||||||
`Div2WheelsOff` String,
|
`Div2LongestGTime` String,
|
||||||
`Div2TailNum` String,
|
`Div2WheelsOff` String,
|
||||||
`Div3Airport` String,
|
`Div2TailNum` String,
|
||||||
`Div3AirportID` Int32,
|
`Div3Airport` String,
|
||||||
`Div3AirportSeqID` Int32,
|
`Div3AirportID` Int32,
|
||||||
`Div3WheelsOn` String,
|
`Div3AirportSeqID` Int32,
|
||||||
`Div3TotalGTime` String,
|
`Div3WheelsOn` String,
|
||||||
`Div3LongestGTime` String,
|
`Div3TotalGTime` String,
|
||||||
`Div3WheelsOff` String,
|
`Div3LongestGTime` String,
|
||||||
`Div3TailNum` String,
|
`Div3WheelsOff` String,
|
||||||
`Div4Airport` String,
|
`Div3TailNum` String,
|
||||||
`Div4AirportID` Int32,
|
`Div4Airport` String,
|
||||||
`Div4AirportSeqID` Int32,
|
`Div4AirportID` Int32,
|
||||||
`Div4WheelsOn` String,
|
`Div4AirportSeqID` Int32,
|
||||||
`Div4TotalGTime` String,
|
`Div4WheelsOn` String,
|
||||||
`Div4LongestGTime` String,
|
`Div4TotalGTime` String,
|
||||||
`Div4WheelsOff` String,
|
`Div4LongestGTime` String,
|
||||||
`Div4TailNum` String,
|
`Div4WheelsOff` String,
|
||||||
`Div5Airport` String,
|
`Div4TailNum` String,
|
||||||
`Div5AirportID` Int32,
|
`Div5Airport` String,
|
||||||
`Div5AirportSeqID` Int32,
|
`Div5AirportID` Int32,
|
||||||
`Div5WheelsOn` String,
|
`Div5AirportSeqID` Int32,
|
||||||
`Div5TotalGTime` String,
|
`Div5WheelsOn` String,
|
||||||
`Div5LongestGTime` String,
|
`Div5TotalGTime` String,
|
||||||
`Div5WheelsOff` String,
|
`Div5LongestGTime` String,
|
||||||
`Div5TailNum` String
|
`Div5WheelsOff` String,
|
||||||
|
`Div5TailNum` String
|
||||||
) ENGINE = MergeTree
|
) ENGINE = MergeTree
|
||||||
PARTITION BY Year
|
PARTITION BY Year
|
||||||
ORDER BY (Carrier, FlightDate)
|
ORDER BY (IATA_CODE_Reporting_Airline, FlightDate)
|
||||||
SETTINGS index_granularity = 8192;
|
SETTINGS index_granularity = 8192;
|
||||||
```
|
```
|
||||||
|
|
||||||
Loading data with multiple threads:
|
Loading data with multiple threads:
|
||||||
@ -206,7 +207,7 @@ LIMIT 10;
|
|||||||
Q4. The number of delays by carrier for 2007
|
Q4. The number of delays by carrier for 2007
|
||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
SELECT Carrier, count(*)
|
SELECT IATA_CODE_Reporting_Airline AS Carrier, count(*)
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE DepDelay>10 AND Year=2007
|
WHERE DepDelay>10 AND Year=2007
|
||||||
GROUP BY Carrier
|
GROUP BY Carrier
|
||||||
@ -220,29 +221,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3
|
|||||||
FROM
|
FROM
|
||||||
(
|
(
|
||||||
SELECT
|
SELECT
|
||||||
Carrier,
|
IATA_CODE_Reporting_Airline AS Carrier,
|
||||||
count(*) AS c
|
count(*) AS c
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE DepDelay>10
|
WHERE DepDelay>10
|
||||||
AND Year=2007
|
AND Year=2007
|
||||||
GROUP BY Carrier
|
GROUP BY Carrier
|
||||||
)
|
) q
|
||||||
JOIN
|
JOIN
|
||||||
(
|
(
|
||||||
SELECT
|
SELECT
|
||||||
Carrier,
|
IATA_CODE_Reporting_Airline AS Carrier,
|
||||||
count(*) AS c2
|
count(*) AS c2
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE Year=2007
|
WHERE Year=2007
|
||||||
GROUP BY Carrier
|
GROUP BY Carrier
|
||||||
) USING Carrier
|
) qq USING Carrier
|
||||||
ORDER BY c3 DESC;
|
ORDER BY c3 DESC;
|
||||||
```
|
```
|
||||||
|
|
||||||
Better version of the same query:
|
Better version of the same query:
|
||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
SELECT Carrier, avg(DepDelay>10)*100 AS c3
|
SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE Year=2007
|
WHERE Year=2007
|
||||||
GROUP BY Carrier
|
GROUP BY Carrier
|
||||||
@ -256,29 +257,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3
|
|||||||
FROM
|
FROM
|
||||||
(
|
(
|
||||||
SELECT
|
SELECT
|
||||||
Carrier,
|
IATA_CODE_Reporting_Airline AS Carrier,
|
||||||
count(*) AS c
|
count(*) AS c
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE DepDelay>10
|
WHERE DepDelay>10
|
||||||
AND Year>=2000 AND Year<=2008
|
AND Year>=2000 AND Year<=2008
|
||||||
GROUP BY Carrier
|
GROUP BY Carrier
|
||||||
)
|
) q
|
||||||
JOIN
|
JOIN
|
||||||
(
|
(
|
||||||
SELECT
|
SELECT
|
||||||
Carrier,
|
IATA_CODE_Reporting_Airline AS Carrier,
|
||||||
count(*) AS c2
|
count(*) AS c2
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE Year>=2000 AND Year<=2008
|
WHERE Year>=2000 AND Year<=2008
|
||||||
GROUP BY Carrier
|
GROUP BY Carrier
|
||||||
) USING Carrier
|
) qq USING Carrier
|
||||||
ORDER BY c3 DESC;
|
ORDER BY c3 DESC;
|
||||||
```
|
```
|
||||||
|
|
||||||
Better version of the same query:
|
Better version of the same query:
|
||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
SELECT Carrier, avg(DepDelay>10)*100 AS c3
|
SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE Year>=2000 AND Year<=2008
|
WHERE Year>=2000 AND Year<=2008
|
||||||
GROUP BY Carrier
|
GROUP BY Carrier
|
||||||
@ -297,7 +298,7 @@ FROM
|
|||||||
from ontime
|
from ontime
|
||||||
WHERE DepDelay>10
|
WHERE DepDelay>10
|
||||||
GROUP BY Year
|
GROUP BY Year
|
||||||
)
|
) q
|
||||||
JOIN
|
JOIN
|
||||||
(
|
(
|
||||||
select
|
select
|
||||||
@ -305,7 +306,7 @@ JOIN
|
|||||||
count(*) as c2
|
count(*) as c2
|
||||||
from ontime
|
from ontime
|
||||||
GROUP BY Year
|
GROUP BY Year
|
||||||
) USING (Year)
|
) qq USING (Year)
|
||||||
ORDER BY Year;
|
ORDER BY Year;
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -340,7 +341,7 @@ Q10.
|
|||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
SELECT
|
SELECT
|
||||||
min(Year), max(Year), Carrier, count(*) AS cnt,
|
min(Year), max(Year), IATA_CODE_Reporting_Airline AS Carrier, count(*) AS cnt,
|
||||||
sum(ArrDelayMinutes>30) AS flights_delayed,
|
sum(ArrDelayMinutes>30) AS flights_delayed,
|
||||||
round(sum(ArrDelayMinutes>30)/count(*),2) AS rate
|
round(sum(ArrDelayMinutes>30)/count(*),2) AS rate
|
||||||
FROM ontime
|
FROM ontime
|
||||||
|
@ -29,126 +29,127 @@ done
|
|||||||
テーブルの作成:
|
テーブルの作成:
|
||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
CREATE TABLE `ontime` (
|
CREATE TABLE `ontime`
|
||||||
`Year` UInt16,
|
(
|
||||||
`Quarter` UInt8,
|
`Year` UInt16,
|
||||||
`Month` UInt8,
|
`Quarter` UInt8,
|
||||||
`DayofMonth` UInt8,
|
`Month` UInt8,
|
||||||
`DayOfWeek` UInt8,
|
`DayofMonth` UInt8,
|
||||||
`FlightDate` Date,
|
`DayOfWeek` UInt8,
|
||||||
`UniqueCarrier` FixedString(7),
|
`FlightDate` Date,
|
||||||
`AirlineID` Int32,
|
`Reporting_Airline` String,
|
||||||
`Carrier` FixedString(2),
|
`DOT_ID_Reporting_Airline` Int32,
|
||||||
`TailNum` String,
|
`IATA_CODE_Reporting_Airline` String,
|
||||||
`FlightNum` String,
|
`Tail_Number` Int32,
|
||||||
`OriginAirportID` Int32,
|
`Flight_Number_Reporting_Airline` String,
|
||||||
`OriginAirportSeqID` Int32,
|
`OriginAirportID` Int32,
|
||||||
`OriginCityMarketID` Int32,
|
`OriginAirportSeqID` Int32,
|
||||||
`Origin` FixedString(5),
|
`OriginCityMarketID` Int32,
|
||||||
`OriginCityName` String,
|
`Origin` FixedString(5),
|
||||||
`OriginState` FixedString(2),
|
`OriginCityName` String,
|
||||||
`OriginStateFips` String,
|
`OriginState` FixedString(2),
|
||||||
`OriginStateName` String,
|
`OriginStateFips` String,
|
||||||
`OriginWac` Int32,
|
`OriginStateName` String,
|
||||||
`DestAirportID` Int32,
|
`OriginWac` Int32,
|
||||||
`DestAirportSeqID` Int32,
|
`DestAirportID` Int32,
|
||||||
`DestCityMarketID` Int32,
|
`DestAirportSeqID` Int32,
|
||||||
`Dest` FixedString(5),
|
`DestCityMarketID` Int32,
|
||||||
`DestCityName` String,
|
`Dest` FixedString(5),
|
||||||
`DestState` FixedString(2),
|
`DestCityName` String,
|
||||||
`DestStateFips` String,
|
`DestState` FixedString(2),
|
||||||
`DestStateName` String,
|
`DestStateFips` String,
|
||||||
`DestWac` Int32,
|
`DestStateName` String,
|
||||||
`CRSDepTime` Int32,
|
`DestWac` Int32,
|
||||||
`DepTime` Int32,
|
`CRSDepTime` Int32,
|
||||||
`DepDelay` Int32,
|
`DepTime` Int32,
|
||||||
`DepDelayMinutes` Int32,
|
`DepDelay` Int32,
|
||||||
`DepDel15` Int32,
|
`DepDelayMinutes` Int32,
|
||||||
`DepartureDelayGroups` String,
|
`DepDel15` Int32,
|
||||||
`DepTimeBlk` String,
|
`DepartureDelayGroups` String,
|
||||||
`TaxiOut` Int32,
|
`DepTimeBlk` String,
|
||||||
`WheelsOff` Int32,
|
`TaxiOut` Int32,
|
||||||
`WheelsOn` Int32,
|
`WheelsOff` Int32,
|
||||||
`TaxiIn` Int32,
|
`WheelsOn` Int32,
|
||||||
`CRSArrTime` Int32,
|
`TaxiIn` Int32,
|
||||||
`ArrTime` Int32,
|
`CRSArrTime` Int32,
|
||||||
`ArrDelay` Int32,
|
`ArrTime` Int32,
|
||||||
`ArrDelayMinutes` Int32,
|
`ArrDelay` Int32,
|
||||||
`ArrDel15` Int32,
|
`ArrDelayMinutes` Int32,
|
||||||
`ArrivalDelayGroups` Int32,
|
`ArrDel15` Int32,
|
||||||
`ArrTimeBlk` String,
|
`ArrivalDelayGroups` Int32,
|
||||||
`Cancelled` UInt8,
|
`ArrTimeBlk` String,
|
||||||
`CancellationCode` FixedString(1),
|
`Cancelled` UInt8,
|
||||||
`Diverted` UInt8,
|
`CancellationCode` FixedString(1),
|
||||||
`CRSElapsedTime` Int32,
|
`Diverted` UInt8,
|
||||||
`ActualElapsedTime` Int32,
|
`CRSElapsedTime` Int32,
|
||||||
`AirTime` Int32,
|
`ActualElapsedTime` Int32,
|
||||||
`Flights` Int32,
|
`AirTime` Nullable(Int32),
|
||||||
`Distance` Int32,
|
`Flights` Int32,
|
||||||
`DistanceGroup` UInt8,
|
`Distance` Int32,
|
||||||
`CarrierDelay` Int32,
|
`DistanceGroup` UInt8,
|
||||||
`WeatherDelay` Int32,
|
`CarrierDelay` Int32,
|
||||||
`NASDelay` Int32,
|
`WeatherDelay` Int32,
|
||||||
`SecurityDelay` Int32,
|
`NASDelay` Int32,
|
||||||
`LateAircraftDelay` Int32,
|
`SecurityDelay` Int32,
|
||||||
`FirstDepTime` String,
|
`LateAircraftDelay` Int32,
|
||||||
`TotalAddGTime` String,
|
`FirstDepTime` String,
|
||||||
`LongestAddGTime` String,
|
`TotalAddGTime` String,
|
||||||
`DivAirportLandings` String,
|
`LongestAddGTime` String,
|
||||||
`DivReachedDest` String,
|
`DivAirportLandings` String,
|
||||||
`DivActualElapsedTime` String,
|
`DivReachedDest` String,
|
||||||
`DivArrDelay` String,
|
`DivActualElapsedTime` String,
|
||||||
`DivDistance` String,
|
`DivArrDelay` String,
|
||||||
`Div1Airport` String,
|
`DivDistance` String,
|
||||||
`Div1AirportID` Int32,
|
`Div1Airport` String,
|
||||||
`Div1AirportSeqID` Int32,
|
`Div1AirportID` Int32,
|
||||||
`Div1WheelsOn` String,
|
`Div1AirportSeqID` Int32,
|
||||||
`Div1TotalGTime` String,
|
`Div1WheelsOn` String,
|
||||||
`Div1LongestGTime` String,
|
`Div1TotalGTime` String,
|
||||||
`Div1WheelsOff` String,
|
`Div1LongestGTime` String,
|
||||||
`Div1TailNum` String,
|
`Div1WheelsOff` String,
|
||||||
`Div2Airport` String,
|
`Div1TailNum` String,
|
||||||
`Div2AirportID` Int32,
|
`Div2Airport` String,
|
||||||
`Div2AirportSeqID` Int32,
|
`Div2AirportID` Int32,
|
||||||
`Div2WheelsOn` String,
|
`Div2AirportSeqID` Int32,
|
||||||
`Div2TotalGTime` String,
|
`Div2WheelsOn` String,
|
||||||
`Div2LongestGTime` String,
|
`Div2TotalGTime` String,
|
||||||
`Div2WheelsOff` String,
|
`Div2LongestGTime` String,
|
||||||
`Div2TailNum` String,
|
`Div2WheelsOff` String,
|
||||||
`Div3Airport` String,
|
`Div2TailNum` String,
|
||||||
`Div3AirportID` Int32,
|
`Div3Airport` String,
|
||||||
`Div3AirportSeqID` Int32,
|
`Div3AirportID` Int32,
|
||||||
`Div3WheelsOn` String,
|
`Div3AirportSeqID` Int32,
|
||||||
`Div3TotalGTime` String,
|
`Div3WheelsOn` String,
|
||||||
`Div3LongestGTime` String,
|
`Div3TotalGTime` String,
|
||||||
`Div3WheelsOff` String,
|
`Div3LongestGTime` String,
|
||||||
`Div3TailNum` String,
|
`Div3WheelsOff` String,
|
||||||
`Div4Airport` String,
|
`Div3TailNum` String,
|
||||||
`Div4AirportID` Int32,
|
`Div4Airport` String,
|
||||||
`Div4AirportSeqID` Int32,
|
`Div4AirportID` Int32,
|
||||||
`Div4WheelsOn` String,
|
`Div4AirportSeqID` Int32,
|
||||||
`Div4TotalGTime` String,
|
`Div4WheelsOn` String,
|
||||||
`Div4LongestGTime` String,
|
`Div4TotalGTime` String,
|
||||||
`Div4WheelsOff` String,
|
`Div4LongestGTime` String,
|
||||||
`Div4TailNum` String,
|
`Div4WheelsOff` String,
|
||||||
`Div5Airport` String,
|
`Div4TailNum` String,
|
||||||
`Div5AirportID` Int32,
|
`Div5Airport` String,
|
||||||
`Div5AirportSeqID` Int32,
|
`Div5AirportID` Int32,
|
||||||
`Div5WheelsOn` String,
|
`Div5AirportSeqID` Int32,
|
||||||
`Div5TotalGTime` String,
|
`Div5WheelsOn` String,
|
||||||
`Div5LongestGTime` String,
|
`Div5TotalGTime` String,
|
||||||
`Div5WheelsOff` String,
|
`Div5LongestGTime` String,
|
||||||
`Div5TailNum` String
|
`Div5WheelsOff` String,
|
||||||
|
`Div5TailNum` String
|
||||||
) ENGINE = MergeTree
|
) ENGINE = MergeTree
|
||||||
PARTITION BY Year
|
PARTITION BY Year
|
||||||
ORDER BY (Carrier, FlightDate)
|
ORDER BY (IATA_CODE_Reporting_Airline, FlightDate)
|
||||||
SETTINGS index_granularity = 8192;
|
SETTINGS index_granularity = 8192;
|
||||||
```
|
```
|
||||||
|
|
||||||
データのロード:
|
データのロード:
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
$ for i in *.zip; do echo $i; unzip -cq $i '*.csv' | sed 's/\.00//g' | clickhouse-client --host=example-perftest01j --query="INSERT INTO ontime FORMAT CSVWithNames"; done
|
ls -1 *.zip | xargs -I{} -P $(nproc) bash -c "echo {}; unzip -cq {} '*.csv' | sed 's/\.00//g' | clickhouse-client --input_format_with_names_use_header=0 --query='INSERT INTO ontime FORMAT CSVWithNames'"
|
||||||
```
|
```
|
||||||
|
|
||||||
## パーティション済みデータのダウンロード {#download-of-prepared-partitions}
|
## パーティション済みデータのダウンロード {#download-of-prepared-partitions}
|
||||||
@ -212,10 +213,10 @@ LIMIT 10;
|
|||||||
Q4. 2007年のキャリア別の遅延の数
|
Q4. 2007年のキャリア別の遅延の数
|
||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
SELECT Carrier, count(*)
|
SELECT IATA_CODE_Reporting_Airline AS Carrier, count(*)
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE DepDelay>10 AND Year=2007
|
WHERE DepDelay>10 AND Year=2007
|
||||||
GROUP BY Carrier
|
GROUP BY IATA_CODE_Reporting_Airline
|
||||||
ORDER BY count(*) DESC;
|
ORDER BY count(*) DESC;
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -226,32 +227,32 @@ SELECT Carrier, c, c2, c*100/c2 as c3
|
|||||||
FROM
|
FROM
|
||||||
(
|
(
|
||||||
SELECT
|
SELECT
|
||||||
Carrier,
|
IATA_CODE_Reporting_Airline AS Carrier,
|
||||||
count(*) AS c
|
count(*) AS c
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE DepDelay>10
|
WHERE DepDelay>10
|
||||||
AND Year=2007
|
AND Year=2007
|
||||||
GROUP BY Carrier
|
GROUP BY Carrier
|
||||||
)
|
) q
|
||||||
JOIN
|
JOIN
|
||||||
(
|
(
|
||||||
SELECT
|
SELECT
|
||||||
Carrier,
|
IATA_CODE_Reporting_Airline AS Carrier,
|
||||||
count(*) AS c2
|
count(*) AS c2
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE Year=2007
|
WHERE Year=2007
|
||||||
GROUP BY Carrier
|
GROUP BY Carrier
|
||||||
) USING Carrier
|
) qq USING Carrier
|
||||||
ORDER BY c3 DESC;
|
ORDER BY c3 DESC;
|
||||||
```
|
```
|
||||||
|
|
||||||
同じクエリのより良いバージョン:
|
同じクエリのより良いバージョン:
|
||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
SELECT Carrier, avg(DepDelay>10)*100 AS c3
|
SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE Year=2007
|
WHERE Year=2007
|
||||||
GROUP BY Carrier
|
GROUP BY IATA_CODE_Reporting_Airline
|
||||||
ORDER BY c3 DESC
|
ORDER BY c3 DESC
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -262,29 +263,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3
|
|||||||
FROM
|
FROM
|
||||||
(
|
(
|
||||||
SELECT
|
SELECT
|
||||||
Carrier,
|
IATA_CODE_Reporting_Airline AS Carrier,
|
||||||
count(*) AS c
|
count(*) AS c
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE DepDelay>10
|
WHERE DepDelay>10
|
||||||
AND Year>=2000 AND Year<=2008
|
AND Year>=2000 AND Year<=2008
|
||||||
GROUP BY Carrier
|
GROUP BY Carrier
|
||||||
)
|
) q
|
||||||
JOIN
|
JOIN
|
||||||
(
|
(
|
||||||
SELECT
|
SELECT
|
||||||
Carrier,
|
IATA_CODE_Reporting_Airline AS Carrier,
|
||||||
count(*) AS c2
|
count(*) AS c2
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE Year>=2000 AND Year<=2008
|
WHERE Year>=2000 AND Year<=2008
|
||||||
GROUP BY Carrier
|
GROUP BY Carrier
|
||||||
) USING Carrier
|
) qq USING Carrier
|
||||||
ORDER BY c3 DESC;
|
ORDER BY c3 DESC;
|
||||||
```
|
```
|
||||||
|
|
||||||
同じクエリのより良いバージョン:
|
同じクエリのより良いバージョン:
|
||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
SELECT Carrier, avg(DepDelay>10)*100 AS c3
|
SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE Year>=2000 AND Year<=2008
|
WHERE Year>=2000 AND Year<=2008
|
||||||
GROUP BY Carrier
|
GROUP BY Carrier
|
||||||
@ -303,7 +304,7 @@ FROM
|
|||||||
from ontime
|
from ontime
|
||||||
WHERE DepDelay>10
|
WHERE DepDelay>10
|
||||||
GROUP BY Year
|
GROUP BY Year
|
||||||
)
|
) q
|
||||||
JOIN
|
JOIN
|
||||||
(
|
(
|
||||||
select
|
select
|
||||||
@ -311,7 +312,7 @@ JOIN
|
|||||||
count(*) as c2
|
count(*) as c2
|
||||||
from ontime
|
from ontime
|
||||||
GROUP BY Year
|
GROUP BY Year
|
||||||
) USING (Year)
|
) qq USING (Year)
|
||||||
ORDER BY Year;
|
ORDER BY Year;
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -346,7 +347,7 @@ Q10.
|
|||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
SELECT
|
SELECT
|
||||||
min(Year), max(Year), Carrier, count(*) AS cnt,
|
min(Year), max(Year), IATA_CODE_Reporting_Airline AS Carrier, count(*) AS cnt,
|
||||||
sum(ArrDelayMinutes>30) AS flights_delayed,
|
sum(ArrDelayMinutes>30) AS flights_delayed,
|
||||||
round(sum(ArrDelayMinutes>30)/count(*),2) AS rate
|
round(sum(ArrDelayMinutes>30)/count(*),2) AS rate
|
||||||
FROM ontime
|
FROM ontime
|
||||||
|
@ -27,126 +27,127 @@ done
|
|||||||
Создание таблицы:
|
Создание таблицы:
|
||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
CREATE TABLE `ontime` (
|
CREATE TABLE `ontime`
|
||||||
`Year` UInt16,
|
(
|
||||||
`Quarter` UInt8,
|
`Year` UInt16,
|
||||||
`Month` UInt8,
|
`Quarter` UInt8,
|
||||||
`DayofMonth` UInt8,
|
`Month` UInt8,
|
||||||
`DayOfWeek` UInt8,
|
`DayofMonth` UInt8,
|
||||||
`FlightDate` Date,
|
`DayOfWeek` UInt8,
|
||||||
`UniqueCarrier` FixedString(7),
|
`FlightDate` Date,
|
||||||
`AirlineID` Int32,
|
`Reporting_Airline` String,
|
||||||
`Carrier` FixedString(2),
|
`DOT_ID_Reporting_Airline` Int32,
|
||||||
`TailNum` String,
|
`IATA_CODE_Reporting_Airline` String,
|
||||||
`FlightNum` String,
|
`Tail_Number` Int32,
|
||||||
`OriginAirportID` Int32,
|
`Flight_Number_Reporting_Airline` String,
|
||||||
`OriginAirportSeqID` Int32,
|
`OriginAirportID` Int32,
|
||||||
`OriginCityMarketID` Int32,
|
`OriginAirportSeqID` Int32,
|
||||||
`Origin` FixedString(5),
|
`OriginCityMarketID` Int32,
|
||||||
`OriginCityName` String,
|
`Origin` FixedString(5),
|
||||||
`OriginState` FixedString(2),
|
`OriginCityName` String,
|
||||||
`OriginStateFips` String,
|
`OriginState` FixedString(2),
|
||||||
`OriginStateName` String,
|
`OriginStateFips` String,
|
||||||
`OriginWac` Int32,
|
`OriginStateName` String,
|
||||||
`DestAirportID` Int32,
|
`OriginWac` Int32,
|
||||||
`DestAirportSeqID` Int32,
|
`DestAirportID` Int32,
|
||||||
`DestCityMarketID` Int32,
|
`DestAirportSeqID` Int32,
|
||||||
`Dest` FixedString(5),
|
`DestCityMarketID` Int32,
|
||||||
`DestCityName` String,
|
`Dest` FixedString(5),
|
||||||
`DestState` FixedString(2),
|
`DestCityName` String,
|
||||||
`DestStateFips` String,
|
`DestState` FixedString(2),
|
||||||
`DestStateName` String,
|
`DestStateFips` String,
|
||||||
`DestWac` Int32,
|
`DestStateName` String,
|
||||||
`CRSDepTime` Int32,
|
`DestWac` Int32,
|
||||||
`DepTime` Int32,
|
`CRSDepTime` Int32,
|
||||||
`DepDelay` Int32,
|
`DepTime` Int32,
|
||||||
`DepDelayMinutes` Int32,
|
`DepDelay` Int32,
|
||||||
`DepDel15` Int32,
|
`DepDelayMinutes` Int32,
|
||||||
`DepartureDelayGroups` String,
|
`DepDel15` Int32,
|
||||||
`DepTimeBlk` String,
|
`DepartureDelayGroups` String,
|
||||||
`TaxiOut` Int32,
|
`DepTimeBlk` String,
|
||||||
`WheelsOff` Int32,
|
`TaxiOut` Int32,
|
||||||
`WheelsOn` Int32,
|
`WheelsOff` Int32,
|
||||||
`TaxiIn` Int32,
|
`WheelsOn` Int32,
|
||||||
`CRSArrTime` Int32,
|
`TaxiIn` Int32,
|
||||||
`ArrTime` Int32,
|
`CRSArrTime` Int32,
|
||||||
`ArrDelay` Int32,
|
`ArrTime` Int32,
|
||||||
`ArrDelayMinutes` Int32,
|
`ArrDelay` Int32,
|
||||||
`ArrDel15` Int32,
|
`ArrDelayMinutes` Int32,
|
||||||
`ArrivalDelayGroups` Int32,
|
`ArrDel15` Int32,
|
||||||
`ArrTimeBlk` String,
|
`ArrivalDelayGroups` Int32,
|
||||||
`Cancelled` UInt8,
|
`ArrTimeBlk` String,
|
||||||
`CancellationCode` FixedString(1),
|
`Cancelled` UInt8,
|
||||||
`Diverted` UInt8,
|
`CancellationCode` FixedString(1),
|
||||||
`CRSElapsedTime` Int32,
|
`Diverted` UInt8,
|
||||||
`ActualElapsedTime` Int32,
|
`CRSElapsedTime` Int32,
|
||||||
`AirTime` Int32,
|
`ActualElapsedTime` Int32,
|
||||||
`Flights` Int32,
|
`AirTime` Nullable(Int32),
|
||||||
`Distance` Int32,
|
`Flights` Int32,
|
||||||
`DistanceGroup` UInt8,
|
`Distance` Int32,
|
||||||
`CarrierDelay` Int32,
|
`DistanceGroup` UInt8,
|
||||||
`WeatherDelay` Int32,
|
`CarrierDelay` Int32,
|
||||||
`NASDelay` Int32,
|
`WeatherDelay` Int32,
|
||||||
`SecurityDelay` Int32,
|
`NASDelay` Int32,
|
||||||
`LateAircraftDelay` Int32,
|
`SecurityDelay` Int32,
|
||||||
`FirstDepTime` String,
|
`LateAircraftDelay` Int32,
|
||||||
`TotalAddGTime` String,
|
`FirstDepTime` String,
|
||||||
`LongestAddGTime` String,
|
`TotalAddGTime` String,
|
||||||
`DivAirportLandings` String,
|
`LongestAddGTime` String,
|
||||||
`DivReachedDest` String,
|
`DivAirportLandings` String,
|
||||||
`DivActualElapsedTime` String,
|
`DivReachedDest` String,
|
||||||
`DivArrDelay` String,
|
`DivActualElapsedTime` String,
|
||||||
`DivDistance` String,
|
`DivArrDelay` String,
|
||||||
`Div1Airport` String,
|
`DivDistance` String,
|
||||||
`Div1AirportID` Int32,
|
`Div1Airport` String,
|
||||||
`Div1AirportSeqID` Int32,
|
`Div1AirportID` Int32,
|
||||||
`Div1WheelsOn` String,
|
`Div1AirportSeqID` Int32,
|
||||||
`Div1TotalGTime` String,
|
`Div1WheelsOn` String,
|
||||||
`Div1LongestGTime` String,
|
`Div1TotalGTime` String,
|
||||||
`Div1WheelsOff` String,
|
`Div1LongestGTime` String,
|
||||||
`Div1TailNum` String,
|
`Div1WheelsOff` String,
|
||||||
`Div2Airport` String,
|
`Div1TailNum` String,
|
||||||
`Div2AirportID` Int32,
|
`Div2Airport` String,
|
||||||
`Div2AirportSeqID` Int32,
|
`Div2AirportID` Int32,
|
||||||
`Div2WheelsOn` String,
|
`Div2AirportSeqID` Int32,
|
||||||
`Div2TotalGTime` String,
|
`Div2WheelsOn` String,
|
||||||
`Div2LongestGTime` String,
|
`Div2TotalGTime` String,
|
||||||
`Div2WheelsOff` String,
|
`Div2LongestGTime` String,
|
||||||
`Div2TailNum` String,
|
`Div2WheelsOff` String,
|
||||||
`Div3Airport` String,
|
`Div2TailNum` String,
|
||||||
`Div3AirportID` Int32,
|
`Div3Airport` String,
|
||||||
`Div3AirportSeqID` Int32,
|
`Div3AirportID` Int32,
|
||||||
`Div3WheelsOn` String,
|
`Div3AirportSeqID` Int32,
|
||||||
`Div3TotalGTime` String,
|
`Div3WheelsOn` String,
|
||||||
`Div3LongestGTime` String,
|
`Div3TotalGTime` String,
|
||||||
`Div3WheelsOff` String,
|
`Div3LongestGTime` String,
|
||||||
`Div3TailNum` String,
|
`Div3WheelsOff` String,
|
||||||
`Div4Airport` String,
|
`Div3TailNum` String,
|
||||||
`Div4AirportID` Int32,
|
`Div4Airport` String,
|
||||||
`Div4AirportSeqID` Int32,
|
`Div4AirportID` Int32,
|
||||||
`Div4WheelsOn` String,
|
`Div4AirportSeqID` Int32,
|
||||||
`Div4TotalGTime` String,
|
`Div4WheelsOn` String,
|
||||||
`Div4LongestGTime` String,
|
`Div4TotalGTime` String,
|
||||||
`Div4WheelsOff` String,
|
`Div4LongestGTime` String,
|
||||||
`Div4TailNum` String,
|
`Div4WheelsOff` String,
|
||||||
`Div5Airport` String,
|
`Div4TailNum` String,
|
||||||
`Div5AirportID` Int32,
|
`Div5Airport` String,
|
||||||
`Div5AirportSeqID` Int32,
|
`Div5AirportID` Int32,
|
||||||
`Div5WheelsOn` String,
|
`Div5AirportSeqID` Int32,
|
||||||
`Div5TotalGTime` String,
|
`Div5WheelsOn` String,
|
||||||
`Div5LongestGTime` String,
|
`Div5TotalGTime` String,
|
||||||
`Div5WheelsOff` String,
|
`Div5LongestGTime` String,
|
||||||
`Div5TailNum` String
|
`Div5WheelsOff` String,
|
||||||
|
`Div5TailNum` String
|
||||||
) ENGINE = MergeTree
|
) ENGINE = MergeTree
|
||||||
PARTITION BY Year
|
PARTITION BY Year
|
||||||
ORDER BY (Carrier, FlightDate)
|
ORDER BY (IATA_CODE_Reporting_Airline, FlightDate)
|
||||||
SETTINGS index_granularity = 8192;
|
SETTINGS index_granularity = 8192;
|
||||||
```
|
```
|
||||||
|
|
||||||
Загрузка данных:
|
Загрузка данных:
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
$ for i in *.zip; do echo $i; unzip -cq $i '*.csv' | sed 's/\.00//g' | clickhouse-client --host=example-perftest01j --query="INSERT INTO ontime FORMAT CSVWithNames"; done
|
ls -1 *.zip | xargs -I{} -P $(nproc) bash -c "echo {}; unzip -cq {} '*.csv' | sed 's/\.00//g' | clickhouse-client --input_format_with_names_use_header=0 --query='INSERT INTO ontime FORMAT CSVWithNames'"
|
||||||
```
|
```
|
||||||
|
|
||||||
## Скачивание готовых партиций {#skachivanie-gotovykh-partitsii}
|
## Скачивание готовых партиций {#skachivanie-gotovykh-partitsii}
|
||||||
@ -211,7 +212,7 @@ LIMIT 10;
|
|||||||
Q4. Количество задержек по перевозчикам за 2007 год
|
Q4. Количество задержек по перевозчикам за 2007 год
|
||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
SELECT Carrier, count(*)
|
SELECT IATA_CODE_Reporting_Airline AS Carrier, count(*)
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE DepDelay>10 AND Year=2007
|
WHERE DepDelay>10 AND Year=2007
|
||||||
GROUP BY Carrier
|
GROUP BY Carrier
|
||||||
@ -225,29 +226,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3
|
|||||||
FROM
|
FROM
|
||||||
(
|
(
|
||||||
SELECT
|
SELECT
|
||||||
Carrier,
|
IATA_CODE_Reporting_Airline AS Carrier,
|
||||||
count(*) AS c
|
count(*) AS c
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE DepDelay>10
|
WHERE DepDelay>10
|
||||||
AND Year=2007
|
AND Year=2007
|
||||||
GROUP BY Carrier
|
GROUP BY Carrier
|
||||||
)
|
) q
|
||||||
JOIN
|
JOIN
|
||||||
(
|
(
|
||||||
SELECT
|
SELECT
|
||||||
Carrier,
|
IATA_CODE_Reporting_Airline AS Carrier,
|
||||||
count(*) AS c2
|
count(*) AS c2
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE Year=2007
|
WHERE Year=2007
|
||||||
GROUP BY Carrier
|
GROUP BY Carrier
|
||||||
) USING Carrier
|
) qq USING Carrier
|
||||||
ORDER BY c3 DESC;
|
ORDER BY c3 DESC;
|
||||||
```
|
```
|
||||||
|
|
||||||
Более оптимальная версия того же запроса:
|
Более оптимальная версия того же запроса:
|
||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
SELECT Carrier, avg(DepDelay>10)*100 AS c3
|
SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE Year=2007
|
WHERE Year=2007
|
||||||
GROUP BY Carrier
|
GROUP BY Carrier
|
||||||
@ -261,29 +262,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3
|
|||||||
FROM
|
FROM
|
||||||
(
|
(
|
||||||
SELECT
|
SELECT
|
||||||
Carrier,
|
IATA_CODE_Reporting_Airline AS Carrier,
|
||||||
count(*) AS c
|
count(*) AS c
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE DepDelay>10
|
WHERE DepDelay>10
|
||||||
AND Year>=2000 AND Year<=2008
|
AND Year>=2000 AND Year<=2008
|
||||||
GROUP BY Carrier
|
GROUP BY Carrier
|
||||||
)
|
) q
|
||||||
JOIN
|
JOIN
|
||||||
(
|
(
|
||||||
SELECT
|
SELECT
|
||||||
Carrier,
|
IATA_CODE_Reporting_Airline AS Carrier,
|
||||||
count(*) AS c2
|
count(*) AS c2
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE Year>=2000 AND Year<=2008
|
WHERE Year>=2000 AND Year<=2008
|
||||||
GROUP BY Carrier
|
GROUP BY Carrier
|
||||||
) USING Carrier
|
) qq USING Carrier
|
||||||
ORDER BY c3 DESC;
|
ORDER BY c3 DESC;
|
||||||
```
|
```
|
||||||
|
|
||||||
Более оптимальная версия того же запроса:
|
Более оптимальная версия того же запроса:
|
||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
SELECT Carrier, avg(DepDelay>10)*100 AS c3
|
SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE Year>=2000 AND Year<=2008
|
WHERE Year>=2000 AND Year<=2008
|
||||||
GROUP BY Carrier
|
GROUP BY Carrier
|
||||||
@ -302,7 +303,7 @@ FROM
|
|||||||
from ontime
|
from ontime
|
||||||
WHERE DepDelay>10
|
WHERE DepDelay>10
|
||||||
GROUP BY Year
|
GROUP BY Year
|
||||||
)
|
) q
|
||||||
JOIN
|
JOIN
|
||||||
(
|
(
|
||||||
select
|
select
|
||||||
@ -310,7 +311,7 @@ JOIN
|
|||||||
count(*) as c2
|
count(*) as c2
|
||||||
from ontime
|
from ontime
|
||||||
GROUP BY Year
|
GROUP BY Year
|
||||||
) USING (Year)
|
) qq USING (Year)
|
||||||
ORDER BY Year;
|
ORDER BY Year;
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -346,7 +347,7 @@ Q10.
|
|||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
SELECT
|
SELECT
|
||||||
min(Year), max(Year), Carrier, count(*) AS cnt,
|
min(Year), max(Year), IATA_CODE_Reporting_Airline AS Carrier, count(*) AS cnt,
|
||||||
sum(ArrDelayMinutes>30) AS flights_delayed,
|
sum(ArrDelayMinutes>30) AS flights_delayed,
|
||||||
round(sum(ArrDelayMinutes>30)/count(*),2) AS rate
|
round(sum(ArrDelayMinutes>30)/count(*),2) AS rate
|
||||||
FROM ontime
|
FROM ontime
|
||||||
|
@ -29,126 +29,127 @@ done
|
|||||||
创建表结构:
|
创建表结构:
|
||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
CREATE TABLE `ontime` (
|
CREATE TABLE `ontime`
|
||||||
`Year` UInt16,
|
(
|
||||||
`Quarter` UInt8,
|
`Year` UInt16,
|
||||||
`Month` UInt8,
|
`Quarter` UInt8,
|
||||||
`DayofMonth` UInt8,
|
`Month` UInt8,
|
||||||
`DayOfWeek` UInt8,
|
`DayofMonth` UInt8,
|
||||||
`FlightDate` Date,
|
`DayOfWeek` UInt8,
|
||||||
`UniqueCarrier` FixedString(7),
|
`FlightDate` Date,
|
||||||
`AirlineID` Int32,
|
`Reporting_Airline` String,
|
||||||
`Carrier` FixedString(2),
|
`DOT_ID_Reporting_Airline` Int32,
|
||||||
`TailNum` String,
|
`IATA_CODE_Reporting_Airline` String,
|
||||||
`FlightNum` String,
|
`Tail_Number` Int32,
|
||||||
`OriginAirportID` Int32,
|
`Flight_Number_Reporting_Airline` String,
|
||||||
`OriginAirportSeqID` Int32,
|
`OriginAirportID` Int32,
|
||||||
`OriginCityMarketID` Int32,
|
`OriginAirportSeqID` Int32,
|
||||||
`Origin` FixedString(5),
|
`OriginCityMarketID` Int32,
|
||||||
`OriginCityName` String,
|
`Origin` FixedString(5),
|
||||||
`OriginState` FixedString(2),
|
`OriginCityName` String,
|
||||||
`OriginStateFips` String,
|
`OriginState` FixedString(2),
|
||||||
`OriginStateName` String,
|
`OriginStateFips` String,
|
||||||
`OriginWac` Int32,
|
`OriginStateName` String,
|
||||||
`DestAirportID` Int32,
|
`OriginWac` Int32,
|
||||||
`DestAirportSeqID` Int32,
|
`DestAirportID` Int32,
|
||||||
`DestCityMarketID` Int32,
|
`DestAirportSeqID` Int32,
|
||||||
`Dest` FixedString(5),
|
`DestCityMarketID` Int32,
|
||||||
`DestCityName` String,
|
`Dest` FixedString(5),
|
||||||
`DestState` FixedString(2),
|
`DestCityName` String,
|
||||||
`DestStateFips` String,
|
`DestState` FixedString(2),
|
||||||
`DestStateName` String,
|
`DestStateFips` String,
|
||||||
`DestWac` Int32,
|
`DestStateName` String,
|
||||||
`CRSDepTime` Int32,
|
`DestWac` Int32,
|
||||||
`DepTime` Int32,
|
`CRSDepTime` Int32,
|
||||||
`DepDelay` Int32,
|
`DepTime` Int32,
|
||||||
`DepDelayMinutes` Int32,
|
`DepDelay` Int32,
|
||||||
`DepDel15` Int32,
|
`DepDelayMinutes` Int32,
|
||||||
`DepartureDelayGroups` String,
|
`DepDel15` Int32,
|
||||||
`DepTimeBlk` String,
|
`DepartureDelayGroups` String,
|
||||||
`TaxiOut` Int32,
|
`DepTimeBlk` String,
|
||||||
`WheelsOff` Int32,
|
`TaxiOut` Int32,
|
||||||
`WheelsOn` Int32,
|
`WheelsOff` Int32,
|
||||||
`TaxiIn` Int32,
|
`WheelsOn` Int32,
|
||||||
`CRSArrTime` Int32,
|
`TaxiIn` Int32,
|
||||||
`ArrTime` Int32,
|
`CRSArrTime` Int32,
|
||||||
`ArrDelay` Int32,
|
`ArrTime` Int32,
|
||||||
`ArrDelayMinutes` Int32,
|
`ArrDelay` Int32,
|
||||||
`ArrDel15` Int32,
|
`ArrDelayMinutes` Int32,
|
||||||
`ArrivalDelayGroups` Int32,
|
`ArrDel15` Int32,
|
||||||
`ArrTimeBlk` String,
|
`ArrivalDelayGroups` Int32,
|
||||||
`Cancelled` UInt8,
|
`ArrTimeBlk` String,
|
||||||
`CancellationCode` FixedString(1),
|
`Cancelled` UInt8,
|
||||||
`Diverted` UInt8,
|
`CancellationCode` FixedString(1),
|
||||||
`CRSElapsedTime` Int32,
|
`Diverted` UInt8,
|
||||||
`ActualElapsedTime` Int32,
|
`CRSElapsedTime` Int32,
|
||||||
`AirTime` Int32,
|
`ActualElapsedTime` Int32,
|
||||||
`Flights` Int32,
|
`AirTime` Nullable(Int32),
|
||||||
`Distance` Int32,
|
`Flights` Int32,
|
||||||
`DistanceGroup` UInt8,
|
`Distance` Int32,
|
||||||
`CarrierDelay` Int32,
|
`DistanceGroup` UInt8,
|
||||||
`WeatherDelay` Int32,
|
`CarrierDelay` Int32,
|
||||||
`NASDelay` Int32,
|
`WeatherDelay` Int32,
|
||||||
`SecurityDelay` Int32,
|
`NASDelay` Int32,
|
||||||
`LateAircraftDelay` Int32,
|
`SecurityDelay` Int32,
|
||||||
`FirstDepTime` String,
|
`LateAircraftDelay` Int32,
|
||||||
`TotalAddGTime` String,
|
`FirstDepTime` String,
|
||||||
`LongestAddGTime` String,
|
`TotalAddGTime` String,
|
||||||
`DivAirportLandings` String,
|
`LongestAddGTime` String,
|
||||||
`DivReachedDest` String,
|
`DivAirportLandings` String,
|
||||||
`DivActualElapsedTime` String,
|
`DivReachedDest` String,
|
||||||
`DivArrDelay` String,
|
`DivActualElapsedTime` String,
|
||||||
`DivDistance` String,
|
`DivArrDelay` String,
|
||||||
`Div1Airport` String,
|
`DivDistance` String,
|
||||||
`Div1AirportID` Int32,
|
`Div1Airport` String,
|
||||||
`Div1AirportSeqID` Int32,
|
`Div1AirportID` Int32,
|
||||||
`Div1WheelsOn` String,
|
`Div1AirportSeqID` Int32,
|
||||||
`Div1TotalGTime` String,
|
`Div1WheelsOn` String,
|
||||||
`Div1LongestGTime` String,
|
`Div1TotalGTime` String,
|
||||||
`Div1WheelsOff` String,
|
`Div1LongestGTime` String,
|
||||||
`Div1TailNum` String,
|
`Div1WheelsOff` String,
|
||||||
`Div2Airport` String,
|
`Div1TailNum` String,
|
||||||
`Div2AirportID` Int32,
|
`Div2Airport` String,
|
||||||
`Div2AirportSeqID` Int32,
|
`Div2AirportID` Int32,
|
||||||
`Div2WheelsOn` String,
|
`Div2AirportSeqID` Int32,
|
||||||
`Div2TotalGTime` String,
|
`Div2WheelsOn` String,
|
||||||
`Div2LongestGTime` String,
|
`Div2TotalGTime` String,
|
||||||
`Div2WheelsOff` String,
|
`Div2LongestGTime` String,
|
||||||
`Div2TailNum` String,
|
`Div2WheelsOff` String,
|
||||||
`Div3Airport` String,
|
`Div2TailNum` String,
|
||||||
`Div3AirportID` Int32,
|
`Div3Airport` String,
|
||||||
`Div3AirportSeqID` Int32,
|
`Div3AirportID` Int32,
|
||||||
`Div3WheelsOn` String,
|
`Div3AirportSeqID` Int32,
|
||||||
`Div3TotalGTime` String,
|
`Div3WheelsOn` String,
|
||||||
`Div3LongestGTime` String,
|
`Div3TotalGTime` String,
|
||||||
`Div3WheelsOff` String,
|
`Div3LongestGTime` String,
|
||||||
`Div3TailNum` String,
|
`Div3WheelsOff` String,
|
||||||
`Div4Airport` String,
|
`Div3TailNum` String,
|
||||||
`Div4AirportID` Int32,
|
`Div4Airport` String,
|
||||||
`Div4AirportSeqID` Int32,
|
`Div4AirportID` Int32,
|
||||||
`Div4WheelsOn` String,
|
`Div4AirportSeqID` Int32,
|
||||||
`Div4TotalGTime` String,
|
`Div4WheelsOn` String,
|
||||||
`Div4LongestGTime` String,
|
`Div4TotalGTime` String,
|
||||||
`Div4WheelsOff` String,
|
`Div4LongestGTime` String,
|
||||||
`Div4TailNum` String,
|
`Div4WheelsOff` String,
|
||||||
`Div5Airport` String,
|
`Div4TailNum` String,
|
||||||
`Div5AirportID` Int32,
|
`Div5Airport` String,
|
||||||
`Div5AirportSeqID` Int32,
|
`Div5AirportID` Int32,
|
||||||
`Div5WheelsOn` String,
|
`Div5AirportSeqID` Int32,
|
||||||
`Div5TotalGTime` String,
|
`Div5WheelsOn` String,
|
||||||
`Div5LongestGTime` String,
|
`Div5TotalGTime` String,
|
||||||
`Div5WheelsOff` String,
|
`Div5LongestGTime` String,
|
||||||
`Div5TailNum` String
|
`Div5WheelsOff` String,
|
||||||
|
`Div5TailNum` String
|
||||||
) ENGINE = MergeTree
|
) ENGINE = MergeTree
|
||||||
PARTITION BY Year
|
PARTITION BY Year
|
||||||
ORDER BY (Carrier, FlightDate)
|
ORDER BY (IATA_CODE_Reporting_Airline, FlightDate)
|
||||||
SETTINGS index_granularity = 8192;
|
SETTINGS index_granularity = 8192;
|
||||||
```
|
```
|
||||||
|
|
||||||
加载数据:
|
加载数据:
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
$ for i in *.zip; do echo $i; unzip -cq $i '*.csv' | sed 's/\.00//g' | clickhouse-client --host=example-perftest01j --query="INSERT INTO ontime FORMAT CSVWithNames"; done
|
ls -1 *.zip | xargs -I{} -P $(nproc) bash -c "echo {}; unzip -cq {} '*.csv' | sed 's/\.00//g' | clickhouse-client --input_format_with_names_use_header=0 --query='INSERT INTO ontime FORMAT CSVWithNames'"
|
||||||
```
|
```
|
||||||
|
|
||||||
## 下载预处理好的分区数据 {#xia-zai-yu-chu-li-hao-de-fen-qu-shu-ju}
|
## 下载预处理好的分区数据 {#xia-zai-yu-chu-li-hao-de-fen-qu-shu-ju}
|
||||||
@ -212,7 +213,7 @@ LIMIT 10;
|
|||||||
Q4. 查询2007年各航空公司延误超过10分钟以上的次数
|
Q4. 查询2007年各航空公司延误超过10分钟以上的次数
|
||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
SELECT Carrier, count(*)
|
SELECT IATA_CODE_Reporting_Airline AS Carrier, count(*)
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE DepDelay>10 AND Year=2007
|
WHERE DepDelay>10 AND Year=2007
|
||||||
GROUP BY Carrier
|
GROUP BY Carrier
|
||||||
@ -226,29 +227,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3
|
|||||||
FROM
|
FROM
|
||||||
(
|
(
|
||||||
SELECT
|
SELECT
|
||||||
Carrier,
|
IATA_CODE_Reporting_Airline AS Carrier,
|
||||||
count(*) AS c
|
count(*) AS c
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE DepDelay>10
|
WHERE DepDelay>10
|
||||||
AND Year=2007
|
AND Year=2007
|
||||||
GROUP BY Carrier
|
GROUP BY Carrier
|
||||||
)
|
) q
|
||||||
JOIN
|
JOIN
|
||||||
(
|
(
|
||||||
SELECT
|
SELECT
|
||||||
Carrier,
|
IATA_CODE_Reporting_Airline AS Carrier,
|
||||||
count(*) AS c2
|
count(*) AS c2
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE Year=2007
|
WHERE Year=2007
|
||||||
GROUP BY Carrier
|
GROUP BY Carrier
|
||||||
) USING Carrier
|
) qq USING Carrier
|
||||||
ORDER BY c3 DESC;
|
ORDER BY c3 DESC;
|
||||||
```
|
```
|
||||||
|
|
||||||
更好的查询版本:
|
更好的查询版本:
|
||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
SELECT Carrier, avg(DepDelay>10)*100 AS c3
|
SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE Year=2007
|
WHERE Year=2007
|
||||||
GROUP BY Carrier
|
GROUP BY Carrier
|
||||||
@ -262,29 +263,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3
|
|||||||
FROM
|
FROM
|
||||||
(
|
(
|
||||||
SELECT
|
SELECT
|
||||||
Carrier,
|
IATA_CODE_Reporting_Airline AS Carrier,
|
||||||
count(*) AS c
|
count(*) AS c
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE DepDelay>10
|
WHERE DepDelay>10
|
||||||
AND Year>=2000 AND Year<=2008
|
AND Year>=2000 AND Year<=2008
|
||||||
GROUP BY Carrier
|
GROUP BY Carrier
|
||||||
)
|
) q
|
||||||
JOIN
|
JOIN
|
||||||
(
|
(
|
||||||
SELECT
|
SELECT
|
||||||
Carrier,
|
IATA_CODE_Reporting_Airline AS Carrier,
|
||||||
count(*) AS c2
|
count(*) AS c2
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE Year>=2000 AND Year<=2008
|
WHERE Year>=2000 AND Year<=2008
|
||||||
GROUP BY Carrier
|
GROUP BY Carrier
|
||||||
) USING Carrier
|
) qq USING Carrier
|
||||||
ORDER BY c3 DESC;
|
ORDER BY c3 DESC;
|
||||||
```
|
```
|
||||||
|
|
||||||
更好的查询版本:
|
更好的查询版本:
|
||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
SELECT Carrier, avg(DepDelay>10)*100 AS c3
|
SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3
|
||||||
FROM ontime
|
FROM ontime
|
||||||
WHERE Year>=2000 AND Year<=2008
|
WHERE Year>=2000 AND Year<=2008
|
||||||
GROUP BY Carrier
|
GROUP BY Carrier
|
||||||
@ -303,7 +304,7 @@ FROM
|
|||||||
from ontime
|
from ontime
|
||||||
WHERE DepDelay>10
|
WHERE DepDelay>10
|
||||||
GROUP BY Year
|
GROUP BY Year
|
||||||
)
|
) q
|
||||||
JOIN
|
JOIN
|
||||||
(
|
(
|
||||||
select
|
select
|
||||||
@ -311,7 +312,7 @@ JOIN
|
|||||||
count(*) as c2
|
count(*) as c2
|
||||||
from ontime
|
from ontime
|
||||||
GROUP BY Year
|
GROUP BY Year
|
||||||
) USING (Year)
|
) qq USING (Year)
|
||||||
ORDER BY Year;
|
ORDER BY Year;
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -346,7 +347,7 @@ Q10.
|
|||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
SELECT
|
SELECT
|
||||||
min(Year), max(Year), Carrier, count(*) AS cnt,
|
min(Year), max(Year), IATA_CODE_Reporting_Airline AS Carrier, count(*) AS cnt,
|
||||||
sum(ArrDelayMinutes>30) AS flights_delayed,
|
sum(ArrDelayMinutes>30) AS flights_delayed,
|
||||||
round(sum(ArrDelayMinutes>30)/count(*),2) AS rate
|
round(sum(ArrDelayMinutes>30)/count(*),2) AS rate
|
||||||
FROM ontime
|
FROM ontime
|
||||||
|
Loading…
Reference in New Issue
Block a user