mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
commit
6f08f945e8
@ -21,120 +21,121 @@ echo https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performa
|
||||
Creating a table:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE `ontime` (
|
||||
`Year` UInt16,
|
||||
`Quarter` UInt8,
|
||||
`Month` UInt8,
|
||||
`DayofMonth` UInt8,
|
||||
`DayOfWeek` UInt8,
|
||||
`FlightDate` Date,
|
||||
`UniqueCarrier` FixedString(7),
|
||||
`AirlineID` Int32,
|
||||
`Carrier` FixedString(2),
|
||||
`TailNum` String,
|
||||
`FlightNum` String,
|
||||
`OriginAirportID` Int32,
|
||||
`OriginAirportSeqID` Int32,
|
||||
`OriginCityMarketID` Int32,
|
||||
`Origin` FixedString(5),
|
||||
`OriginCityName` String,
|
||||
`OriginState` FixedString(2),
|
||||
`OriginStateFips` String,
|
||||
`OriginStateName` String,
|
||||
`OriginWac` Int32,
|
||||
`DestAirportID` Int32,
|
||||
`DestAirportSeqID` Int32,
|
||||
`DestCityMarketID` Int32,
|
||||
`Dest` FixedString(5),
|
||||
`DestCityName` String,
|
||||
`DestState` FixedString(2),
|
||||
`DestStateFips` String,
|
||||
`DestStateName` String,
|
||||
`DestWac` Int32,
|
||||
`CRSDepTime` Int32,
|
||||
`DepTime` Int32,
|
||||
`DepDelay` Int32,
|
||||
`DepDelayMinutes` Int32,
|
||||
`DepDel15` Int32,
|
||||
`DepartureDelayGroups` String,
|
||||
`DepTimeBlk` String,
|
||||
`TaxiOut` Int32,
|
||||
`WheelsOff` Int32,
|
||||
`WheelsOn` Int32,
|
||||
`TaxiIn` Int32,
|
||||
`CRSArrTime` Int32,
|
||||
`ArrTime` Int32,
|
||||
`ArrDelay` Int32,
|
||||
`ArrDelayMinutes` Int32,
|
||||
`ArrDel15` Int32,
|
||||
`ArrivalDelayGroups` Int32,
|
||||
`ArrTimeBlk` String,
|
||||
`Cancelled` UInt8,
|
||||
`CancellationCode` FixedString(1),
|
||||
`Diverted` UInt8,
|
||||
`CRSElapsedTime` Int32,
|
||||
`ActualElapsedTime` Int32,
|
||||
`AirTime` Int32,
|
||||
`Flights` Int32,
|
||||
`Distance` Int32,
|
||||
`DistanceGroup` UInt8,
|
||||
`CarrierDelay` Int32,
|
||||
`WeatherDelay` Int32,
|
||||
`NASDelay` Int32,
|
||||
`SecurityDelay` Int32,
|
||||
`LateAircraftDelay` Int32,
|
||||
`FirstDepTime` String,
|
||||
`TotalAddGTime` String,
|
||||
`LongestAddGTime` String,
|
||||
`DivAirportLandings` String,
|
||||
`DivReachedDest` String,
|
||||
`DivActualElapsedTime` String,
|
||||
`DivArrDelay` String,
|
||||
`DivDistance` String,
|
||||
`Div1Airport` String,
|
||||
`Div1AirportID` Int32,
|
||||
`Div1AirportSeqID` Int32,
|
||||
`Div1WheelsOn` String,
|
||||
`Div1TotalGTime` String,
|
||||
`Div1LongestGTime` String,
|
||||
`Div1WheelsOff` String,
|
||||
`Div1TailNum` String,
|
||||
`Div2Airport` String,
|
||||
`Div2AirportID` Int32,
|
||||
`Div2AirportSeqID` Int32,
|
||||
`Div2WheelsOn` String,
|
||||
`Div2TotalGTime` String,
|
||||
`Div2LongestGTime` String,
|
||||
`Div2WheelsOff` String,
|
||||
`Div2TailNum` String,
|
||||
`Div3Airport` String,
|
||||
`Div3AirportID` Int32,
|
||||
`Div3AirportSeqID` Int32,
|
||||
`Div3WheelsOn` String,
|
||||
`Div3TotalGTime` String,
|
||||
`Div3LongestGTime` String,
|
||||
`Div3WheelsOff` String,
|
||||
`Div3TailNum` String,
|
||||
`Div4Airport` String,
|
||||
`Div4AirportID` Int32,
|
||||
`Div4AirportSeqID` Int32,
|
||||
`Div4WheelsOn` String,
|
||||
`Div4TotalGTime` String,
|
||||
`Div4LongestGTime` String,
|
||||
`Div4WheelsOff` String,
|
||||
`Div4TailNum` String,
|
||||
`Div5Airport` String,
|
||||
`Div5AirportID` Int32,
|
||||
`Div5AirportSeqID` Int32,
|
||||
`Div5WheelsOn` String,
|
||||
`Div5TotalGTime` String,
|
||||
`Div5LongestGTime` String,
|
||||
`Div5WheelsOff` String,
|
||||
`Div5TailNum` String
|
||||
CREATE TABLE `ontime`
|
||||
(
|
||||
`Year` UInt16,
|
||||
`Quarter` UInt8,
|
||||
`Month` UInt8,
|
||||
`DayofMonth` UInt8,
|
||||
`DayOfWeek` UInt8,
|
||||
`FlightDate` Date,
|
||||
`Reporting_Airline` String,
|
||||
`DOT_ID_Reporting_Airline` Int32,
|
||||
`IATA_CODE_Reporting_Airline` String,
|
||||
`Tail_Number` Int32,
|
||||
`Flight_Number_Reporting_Airline` String,
|
||||
`OriginAirportID` Int32,
|
||||
`OriginAirportSeqID` Int32,
|
||||
`OriginCityMarketID` Int32,
|
||||
`Origin` FixedString(5),
|
||||
`OriginCityName` String,
|
||||
`OriginState` FixedString(2),
|
||||
`OriginStateFips` String,
|
||||
`OriginStateName` String,
|
||||
`OriginWac` Int32,
|
||||
`DestAirportID` Int32,
|
||||
`DestAirportSeqID` Int32,
|
||||
`DestCityMarketID` Int32,
|
||||
`Dest` FixedString(5),
|
||||
`DestCityName` String,
|
||||
`DestState` FixedString(2),
|
||||
`DestStateFips` String,
|
||||
`DestStateName` String,
|
||||
`DestWac` Int32,
|
||||
`CRSDepTime` Int32,
|
||||
`DepTime` Int32,
|
||||
`DepDelay` Int32,
|
||||
`DepDelayMinutes` Int32,
|
||||
`DepDel15` Int32,
|
||||
`DepartureDelayGroups` String,
|
||||
`DepTimeBlk` String,
|
||||
`TaxiOut` Int32,
|
||||
`WheelsOff` Int32,
|
||||
`WheelsOn` Int32,
|
||||
`TaxiIn` Int32,
|
||||
`CRSArrTime` Int32,
|
||||
`ArrTime` Int32,
|
||||
`ArrDelay` Int32,
|
||||
`ArrDelayMinutes` Int32,
|
||||
`ArrDel15` Int32,
|
||||
`ArrivalDelayGroups` Int32,
|
||||
`ArrTimeBlk` String,
|
||||
`Cancelled` UInt8,
|
||||
`CancellationCode` FixedString(1),
|
||||
`Diverted` UInt8,
|
||||
`CRSElapsedTime` Int32,
|
||||
`ActualElapsedTime` Int32,
|
||||
`AirTime` Nullable(Int32),
|
||||
`Flights` Int32,
|
||||
`Distance` Int32,
|
||||
`DistanceGroup` UInt8,
|
||||
`CarrierDelay` Int32,
|
||||
`WeatherDelay` Int32,
|
||||
`NASDelay` Int32,
|
||||
`SecurityDelay` Int32,
|
||||
`LateAircraftDelay` Int32,
|
||||
`FirstDepTime` String,
|
||||
`TotalAddGTime` String,
|
||||
`LongestAddGTime` String,
|
||||
`DivAirportLandings` String,
|
||||
`DivReachedDest` String,
|
||||
`DivActualElapsedTime` String,
|
||||
`DivArrDelay` String,
|
||||
`DivDistance` String,
|
||||
`Div1Airport` String,
|
||||
`Div1AirportID` Int32,
|
||||
`Div1AirportSeqID` Int32,
|
||||
`Div1WheelsOn` String,
|
||||
`Div1TotalGTime` String,
|
||||
`Div1LongestGTime` String,
|
||||
`Div1WheelsOff` String,
|
||||
`Div1TailNum` String,
|
||||
`Div2Airport` String,
|
||||
`Div2AirportID` Int32,
|
||||
`Div2AirportSeqID` Int32,
|
||||
`Div2WheelsOn` String,
|
||||
`Div2TotalGTime` String,
|
||||
`Div2LongestGTime` String,
|
||||
`Div2WheelsOff` String,
|
||||
`Div2TailNum` String,
|
||||
`Div3Airport` String,
|
||||
`Div3AirportID` Int32,
|
||||
`Div3AirportSeqID` Int32,
|
||||
`Div3WheelsOn` String,
|
||||
`Div3TotalGTime` String,
|
||||
`Div3LongestGTime` String,
|
||||
`Div3WheelsOff` String,
|
||||
`Div3TailNum` String,
|
||||
`Div4Airport` String,
|
||||
`Div4AirportID` Int32,
|
||||
`Div4AirportSeqID` Int32,
|
||||
`Div4WheelsOn` String,
|
||||
`Div4TotalGTime` String,
|
||||
`Div4LongestGTime` String,
|
||||
`Div4WheelsOff` String,
|
||||
`Div4TailNum` String,
|
||||
`Div5Airport` String,
|
||||
`Div5AirportID` Int32,
|
||||
`Div5AirportSeqID` Int32,
|
||||
`Div5WheelsOn` String,
|
||||
`Div5TotalGTime` String,
|
||||
`Div5LongestGTime` String,
|
||||
`Div5WheelsOff` String,
|
||||
`Div5TailNum` String
|
||||
) ENGINE = MergeTree
|
||||
PARTITION BY Year
|
||||
ORDER BY (Carrier, FlightDate)
|
||||
SETTINGS index_granularity = 8192;
|
||||
PARTITION BY Year
|
||||
ORDER BY (IATA_CODE_Reporting_Airline, FlightDate)
|
||||
SETTINGS index_granularity = 8192;
|
||||
```
|
||||
|
||||
Loading data with multiple threads:
|
||||
@ -206,7 +207,7 @@ LIMIT 10;
|
||||
Q4. The number of delays by carrier for 2007
|
||||
|
||||
``` sql
|
||||
SELECT Carrier, count(*)
|
||||
SELECT IATA_CODE_Reporting_Airline AS Carrier, count(*)
|
||||
FROM ontime
|
||||
WHERE DepDelay>10 AND Year=2007
|
||||
GROUP BY Carrier
|
||||
@ -220,29 +221,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
Carrier,
|
||||
IATA_CODE_Reporting_Airline AS Carrier,
|
||||
count(*) AS c
|
||||
FROM ontime
|
||||
WHERE DepDelay>10
|
||||
AND Year=2007
|
||||
GROUP BY Carrier
|
||||
)
|
||||
) q
|
||||
JOIN
|
||||
(
|
||||
SELECT
|
||||
Carrier,
|
||||
IATA_CODE_Reporting_Airline AS Carrier,
|
||||
count(*) AS c2
|
||||
FROM ontime
|
||||
WHERE Year=2007
|
||||
GROUP BY Carrier
|
||||
) USING Carrier
|
||||
) qq USING Carrier
|
||||
ORDER BY c3 DESC;
|
||||
```
|
||||
|
||||
Better version of the same query:
|
||||
|
||||
``` sql
|
||||
SELECT Carrier, avg(DepDelay>10)*100 AS c3
|
||||
SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3
|
||||
FROM ontime
|
||||
WHERE Year=2007
|
||||
GROUP BY Carrier
|
||||
@ -256,29 +257,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
Carrier,
|
||||
IATA_CODE_Reporting_Airline AS Carrier,
|
||||
count(*) AS c
|
||||
FROM ontime
|
||||
WHERE DepDelay>10
|
||||
AND Year>=2000 AND Year<=2008
|
||||
GROUP BY Carrier
|
||||
)
|
||||
) q
|
||||
JOIN
|
||||
(
|
||||
SELECT
|
||||
Carrier,
|
||||
IATA_CODE_Reporting_Airline AS Carrier,
|
||||
count(*) AS c2
|
||||
FROM ontime
|
||||
WHERE Year>=2000 AND Year<=2008
|
||||
GROUP BY Carrier
|
||||
) USING Carrier
|
||||
) qq USING Carrier
|
||||
ORDER BY c3 DESC;
|
||||
```
|
||||
|
||||
Better version of the same query:
|
||||
|
||||
``` sql
|
||||
SELECT Carrier, avg(DepDelay>10)*100 AS c3
|
||||
SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3
|
||||
FROM ontime
|
||||
WHERE Year>=2000 AND Year<=2008
|
||||
GROUP BY Carrier
|
||||
@ -297,7 +298,7 @@ FROM
|
||||
from ontime
|
||||
WHERE DepDelay>10
|
||||
GROUP BY Year
|
||||
)
|
||||
) q
|
||||
JOIN
|
||||
(
|
||||
select
|
||||
@ -305,7 +306,7 @@ JOIN
|
||||
count(*) as c2
|
||||
from ontime
|
||||
GROUP BY Year
|
||||
) USING (Year)
|
||||
) qq USING (Year)
|
||||
ORDER BY Year;
|
||||
```
|
||||
|
||||
@ -340,7 +341,7 @@ Q10.
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
min(Year), max(Year), Carrier, count(*) AS cnt,
|
||||
min(Year), max(Year), IATA_CODE_Reporting_Airline AS Carrier, count(*) AS cnt,
|
||||
sum(ArrDelayMinutes>30) AS flights_delayed,
|
||||
round(sum(ArrDelayMinutes>30)/count(*),2) AS rate
|
||||
FROM ontime
|
||||
|
@ -29,126 +29,127 @@ done
|
||||
テーブルの作成:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE `ontime` (
|
||||
`Year` UInt16,
|
||||
`Quarter` UInt8,
|
||||
`Month` UInt8,
|
||||
`DayofMonth` UInt8,
|
||||
`DayOfWeek` UInt8,
|
||||
`FlightDate` Date,
|
||||
`UniqueCarrier` FixedString(7),
|
||||
`AirlineID` Int32,
|
||||
`Carrier` FixedString(2),
|
||||
`TailNum` String,
|
||||
`FlightNum` String,
|
||||
`OriginAirportID` Int32,
|
||||
`OriginAirportSeqID` Int32,
|
||||
`OriginCityMarketID` Int32,
|
||||
`Origin` FixedString(5),
|
||||
`OriginCityName` String,
|
||||
`OriginState` FixedString(2),
|
||||
`OriginStateFips` String,
|
||||
`OriginStateName` String,
|
||||
`OriginWac` Int32,
|
||||
`DestAirportID` Int32,
|
||||
`DestAirportSeqID` Int32,
|
||||
`DestCityMarketID` Int32,
|
||||
`Dest` FixedString(5),
|
||||
`DestCityName` String,
|
||||
`DestState` FixedString(2),
|
||||
`DestStateFips` String,
|
||||
`DestStateName` String,
|
||||
`DestWac` Int32,
|
||||
`CRSDepTime` Int32,
|
||||
`DepTime` Int32,
|
||||
`DepDelay` Int32,
|
||||
`DepDelayMinutes` Int32,
|
||||
`DepDel15` Int32,
|
||||
`DepartureDelayGroups` String,
|
||||
`DepTimeBlk` String,
|
||||
`TaxiOut` Int32,
|
||||
`WheelsOff` Int32,
|
||||
`WheelsOn` Int32,
|
||||
`TaxiIn` Int32,
|
||||
`CRSArrTime` Int32,
|
||||
`ArrTime` Int32,
|
||||
`ArrDelay` Int32,
|
||||
`ArrDelayMinutes` Int32,
|
||||
`ArrDel15` Int32,
|
||||
`ArrivalDelayGroups` Int32,
|
||||
`ArrTimeBlk` String,
|
||||
`Cancelled` UInt8,
|
||||
`CancellationCode` FixedString(1),
|
||||
`Diverted` UInt8,
|
||||
`CRSElapsedTime` Int32,
|
||||
`ActualElapsedTime` Int32,
|
||||
`AirTime` Int32,
|
||||
`Flights` Int32,
|
||||
`Distance` Int32,
|
||||
`DistanceGroup` UInt8,
|
||||
`CarrierDelay` Int32,
|
||||
`WeatherDelay` Int32,
|
||||
`NASDelay` Int32,
|
||||
`SecurityDelay` Int32,
|
||||
`LateAircraftDelay` Int32,
|
||||
`FirstDepTime` String,
|
||||
`TotalAddGTime` String,
|
||||
`LongestAddGTime` String,
|
||||
`DivAirportLandings` String,
|
||||
`DivReachedDest` String,
|
||||
`DivActualElapsedTime` String,
|
||||
`DivArrDelay` String,
|
||||
`DivDistance` String,
|
||||
`Div1Airport` String,
|
||||
`Div1AirportID` Int32,
|
||||
`Div1AirportSeqID` Int32,
|
||||
`Div1WheelsOn` String,
|
||||
`Div1TotalGTime` String,
|
||||
`Div1LongestGTime` String,
|
||||
`Div1WheelsOff` String,
|
||||
`Div1TailNum` String,
|
||||
`Div2Airport` String,
|
||||
`Div2AirportID` Int32,
|
||||
`Div2AirportSeqID` Int32,
|
||||
`Div2WheelsOn` String,
|
||||
`Div2TotalGTime` String,
|
||||
`Div2LongestGTime` String,
|
||||
`Div2WheelsOff` String,
|
||||
`Div2TailNum` String,
|
||||
`Div3Airport` String,
|
||||
`Div3AirportID` Int32,
|
||||
`Div3AirportSeqID` Int32,
|
||||
`Div3WheelsOn` String,
|
||||
`Div3TotalGTime` String,
|
||||
`Div3LongestGTime` String,
|
||||
`Div3WheelsOff` String,
|
||||
`Div3TailNum` String,
|
||||
`Div4Airport` String,
|
||||
`Div4AirportID` Int32,
|
||||
`Div4AirportSeqID` Int32,
|
||||
`Div4WheelsOn` String,
|
||||
`Div4TotalGTime` String,
|
||||
`Div4LongestGTime` String,
|
||||
`Div4WheelsOff` String,
|
||||
`Div4TailNum` String,
|
||||
`Div5Airport` String,
|
||||
`Div5AirportID` Int32,
|
||||
`Div5AirportSeqID` Int32,
|
||||
`Div5WheelsOn` String,
|
||||
`Div5TotalGTime` String,
|
||||
`Div5LongestGTime` String,
|
||||
`Div5WheelsOff` String,
|
||||
`Div5TailNum` String
|
||||
CREATE TABLE `ontime`
|
||||
(
|
||||
`Year` UInt16,
|
||||
`Quarter` UInt8,
|
||||
`Month` UInt8,
|
||||
`DayofMonth` UInt8,
|
||||
`DayOfWeek` UInt8,
|
||||
`FlightDate` Date,
|
||||
`Reporting_Airline` String,
|
||||
`DOT_ID_Reporting_Airline` Int32,
|
||||
`IATA_CODE_Reporting_Airline` String,
|
||||
`Tail_Number` Int32,
|
||||
`Flight_Number_Reporting_Airline` String,
|
||||
`OriginAirportID` Int32,
|
||||
`OriginAirportSeqID` Int32,
|
||||
`OriginCityMarketID` Int32,
|
||||
`Origin` FixedString(5),
|
||||
`OriginCityName` String,
|
||||
`OriginState` FixedString(2),
|
||||
`OriginStateFips` String,
|
||||
`OriginStateName` String,
|
||||
`OriginWac` Int32,
|
||||
`DestAirportID` Int32,
|
||||
`DestAirportSeqID` Int32,
|
||||
`DestCityMarketID` Int32,
|
||||
`Dest` FixedString(5),
|
||||
`DestCityName` String,
|
||||
`DestState` FixedString(2),
|
||||
`DestStateFips` String,
|
||||
`DestStateName` String,
|
||||
`DestWac` Int32,
|
||||
`CRSDepTime` Int32,
|
||||
`DepTime` Int32,
|
||||
`DepDelay` Int32,
|
||||
`DepDelayMinutes` Int32,
|
||||
`DepDel15` Int32,
|
||||
`DepartureDelayGroups` String,
|
||||
`DepTimeBlk` String,
|
||||
`TaxiOut` Int32,
|
||||
`WheelsOff` Int32,
|
||||
`WheelsOn` Int32,
|
||||
`TaxiIn` Int32,
|
||||
`CRSArrTime` Int32,
|
||||
`ArrTime` Int32,
|
||||
`ArrDelay` Int32,
|
||||
`ArrDelayMinutes` Int32,
|
||||
`ArrDel15` Int32,
|
||||
`ArrivalDelayGroups` Int32,
|
||||
`ArrTimeBlk` String,
|
||||
`Cancelled` UInt8,
|
||||
`CancellationCode` FixedString(1),
|
||||
`Diverted` UInt8,
|
||||
`CRSElapsedTime` Int32,
|
||||
`ActualElapsedTime` Int32,
|
||||
`AirTime` Nullable(Int32),
|
||||
`Flights` Int32,
|
||||
`Distance` Int32,
|
||||
`DistanceGroup` UInt8,
|
||||
`CarrierDelay` Int32,
|
||||
`WeatherDelay` Int32,
|
||||
`NASDelay` Int32,
|
||||
`SecurityDelay` Int32,
|
||||
`LateAircraftDelay` Int32,
|
||||
`FirstDepTime` String,
|
||||
`TotalAddGTime` String,
|
||||
`LongestAddGTime` String,
|
||||
`DivAirportLandings` String,
|
||||
`DivReachedDest` String,
|
||||
`DivActualElapsedTime` String,
|
||||
`DivArrDelay` String,
|
||||
`DivDistance` String,
|
||||
`Div1Airport` String,
|
||||
`Div1AirportID` Int32,
|
||||
`Div1AirportSeqID` Int32,
|
||||
`Div1WheelsOn` String,
|
||||
`Div1TotalGTime` String,
|
||||
`Div1LongestGTime` String,
|
||||
`Div1WheelsOff` String,
|
||||
`Div1TailNum` String,
|
||||
`Div2Airport` String,
|
||||
`Div2AirportID` Int32,
|
||||
`Div2AirportSeqID` Int32,
|
||||
`Div2WheelsOn` String,
|
||||
`Div2TotalGTime` String,
|
||||
`Div2LongestGTime` String,
|
||||
`Div2WheelsOff` String,
|
||||
`Div2TailNum` String,
|
||||
`Div3Airport` String,
|
||||
`Div3AirportID` Int32,
|
||||
`Div3AirportSeqID` Int32,
|
||||
`Div3WheelsOn` String,
|
||||
`Div3TotalGTime` String,
|
||||
`Div3LongestGTime` String,
|
||||
`Div3WheelsOff` String,
|
||||
`Div3TailNum` String,
|
||||
`Div4Airport` String,
|
||||
`Div4AirportID` Int32,
|
||||
`Div4AirportSeqID` Int32,
|
||||
`Div4WheelsOn` String,
|
||||
`Div4TotalGTime` String,
|
||||
`Div4LongestGTime` String,
|
||||
`Div4WheelsOff` String,
|
||||
`Div4TailNum` String,
|
||||
`Div5Airport` String,
|
||||
`Div5AirportID` Int32,
|
||||
`Div5AirportSeqID` Int32,
|
||||
`Div5WheelsOn` String,
|
||||
`Div5TotalGTime` String,
|
||||
`Div5LongestGTime` String,
|
||||
`Div5WheelsOff` String,
|
||||
`Div5TailNum` String
|
||||
) ENGINE = MergeTree
|
||||
PARTITION BY Year
|
||||
ORDER BY (Carrier, FlightDate)
|
||||
SETTINGS index_granularity = 8192;
|
||||
PARTITION BY Year
|
||||
ORDER BY (IATA_CODE_Reporting_Airline, FlightDate)
|
||||
SETTINGS index_granularity = 8192;
|
||||
```
|
||||
|
||||
データのロード:
|
||||
|
||||
``` bash
|
||||
$ for i in *.zip; do echo $i; unzip -cq $i '*.csv' | sed 's/\.00//g' | clickhouse-client --host=example-perftest01j --query="INSERT INTO ontime FORMAT CSVWithNames"; done
|
||||
ls -1 *.zip | xargs -I{} -P $(nproc) bash -c "echo {}; unzip -cq {} '*.csv' | sed 's/\.00//g' | clickhouse-client --input_format_with_names_use_header=0 --query='INSERT INTO ontime FORMAT CSVWithNames'"
|
||||
```
|
||||
|
||||
## パーティション済みデータのダウンロード {#download-of-prepared-partitions}
|
||||
@ -212,10 +213,10 @@ LIMIT 10;
|
||||
Q4. 2007年のキャリア別の遅延の数
|
||||
|
||||
``` sql
|
||||
SELECT Carrier, count(*)
|
||||
SELECT IATA_CODE_Reporting_Airline AS Carrier, count(*)
|
||||
FROM ontime
|
||||
WHERE DepDelay>10 AND Year=2007
|
||||
GROUP BY Carrier
|
||||
GROUP BY IATA_CODE_Reporting_Airline
|
||||
ORDER BY count(*) DESC;
|
||||
```
|
||||
|
||||
@ -226,32 +227,32 @@ SELECT Carrier, c, c2, c*100/c2 as c3
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
Carrier,
|
||||
IATA_CODE_Reporting_Airline AS Carrier,
|
||||
count(*) AS c
|
||||
FROM ontime
|
||||
WHERE DepDelay>10
|
||||
AND Year=2007
|
||||
GROUP BY Carrier
|
||||
)
|
||||
) q
|
||||
JOIN
|
||||
(
|
||||
SELECT
|
||||
Carrier,
|
||||
IATA_CODE_Reporting_Airline AS Carrier,
|
||||
count(*) AS c2
|
||||
FROM ontime
|
||||
WHERE Year=2007
|
||||
GROUP BY Carrier
|
||||
) USING Carrier
|
||||
) qq USING Carrier
|
||||
ORDER BY c3 DESC;
|
||||
```
|
||||
|
||||
同じクエリのより良いバージョン:
|
||||
|
||||
``` sql
|
||||
SELECT Carrier, avg(DepDelay>10)*100 AS c3
|
||||
SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3
|
||||
FROM ontime
|
||||
WHERE Year=2007
|
||||
GROUP BY Carrier
|
||||
GROUP BY IATA_CODE_Reporting_Airline
|
||||
ORDER BY c3 DESC
|
||||
```
|
||||
|
||||
@ -262,29 +263,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
Carrier,
|
||||
IATA_CODE_Reporting_Airline AS Carrier,
|
||||
count(*) AS c
|
||||
FROM ontime
|
||||
WHERE DepDelay>10
|
||||
AND Year>=2000 AND Year<=2008
|
||||
GROUP BY Carrier
|
||||
)
|
||||
) q
|
||||
JOIN
|
||||
(
|
||||
SELECT
|
||||
Carrier,
|
||||
IATA_CODE_Reporting_Airline AS Carrier,
|
||||
count(*) AS c2
|
||||
FROM ontime
|
||||
WHERE Year>=2000 AND Year<=2008
|
||||
GROUP BY Carrier
|
||||
) USING Carrier
|
||||
) qq USING Carrier
|
||||
ORDER BY c3 DESC;
|
||||
```
|
||||
|
||||
同じクエリのより良いバージョン:
|
||||
|
||||
``` sql
|
||||
SELECT Carrier, avg(DepDelay>10)*100 AS c3
|
||||
SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3
|
||||
FROM ontime
|
||||
WHERE Year>=2000 AND Year<=2008
|
||||
GROUP BY Carrier
|
||||
@ -303,7 +304,7 @@ FROM
|
||||
from ontime
|
||||
WHERE DepDelay>10
|
||||
GROUP BY Year
|
||||
)
|
||||
) q
|
||||
JOIN
|
||||
(
|
||||
select
|
||||
@ -311,7 +312,7 @@ JOIN
|
||||
count(*) as c2
|
||||
from ontime
|
||||
GROUP BY Year
|
||||
) USING (Year)
|
||||
) qq USING (Year)
|
||||
ORDER BY Year;
|
||||
```
|
||||
|
||||
@ -346,7 +347,7 @@ Q10.
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
min(Year), max(Year), Carrier, count(*) AS cnt,
|
||||
min(Year), max(Year), IATA_CODE_Reporting_Airline AS Carrier, count(*) AS cnt,
|
||||
sum(ArrDelayMinutes>30) AS flights_delayed,
|
||||
round(sum(ArrDelayMinutes>30)/count(*),2) AS rate
|
||||
FROM ontime
|
||||
|
@ -27,126 +27,127 @@ done
|
||||
Создание таблицы:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE `ontime` (
|
||||
`Year` UInt16,
|
||||
`Quarter` UInt8,
|
||||
`Month` UInt8,
|
||||
`DayofMonth` UInt8,
|
||||
`DayOfWeek` UInt8,
|
||||
`FlightDate` Date,
|
||||
`UniqueCarrier` FixedString(7),
|
||||
`AirlineID` Int32,
|
||||
`Carrier` FixedString(2),
|
||||
`TailNum` String,
|
||||
`FlightNum` String,
|
||||
`OriginAirportID` Int32,
|
||||
`OriginAirportSeqID` Int32,
|
||||
`OriginCityMarketID` Int32,
|
||||
`Origin` FixedString(5),
|
||||
`OriginCityName` String,
|
||||
`OriginState` FixedString(2),
|
||||
`OriginStateFips` String,
|
||||
`OriginStateName` String,
|
||||
`OriginWac` Int32,
|
||||
`DestAirportID` Int32,
|
||||
`DestAirportSeqID` Int32,
|
||||
`DestCityMarketID` Int32,
|
||||
`Dest` FixedString(5),
|
||||
`DestCityName` String,
|
||||
`DestState` FixedString(2),
|
||||
`DestStateFips` String,
|
||||
`DestStateName` String,
|
||||
`DestWac` Int32,
|
||||
`CRSDepTime` Int32,
|
||||
`DepTime` Int32,
|
||||
`DepDelay` Int32,
|
||||
`DepDelayMinutes` Int32,
|
||||
`DepDel15` Int32,
|
||||
`DepartureDelayGroups` String,
|
||||
`DepTimeBlk` String,
|
||||
`TaxiOut` Int32,
|
||||
`WheelsOff` Int32,
|
||||
`WheelsOn` Int32,
|
||||
`TaxiIn` Int32,
|
||||
`CRSArrTime` Int32,
|
||||
`ArrTime` Int32,
|
||||
`ArrDelay` Int32,
|
||||
`ArrDelayMinutes` Int32,
|
||||
`ArrDel15` Int32,
|
||||
`ArrivalDelayGroups` Int32,
|
||||
`ArrTimeBlk` String,
|
||||
`Cancelled` UInt8,
|
||||
`CancellationCode` FixedString(1),
|
||||
`Diverted` UInt8,
|
||||
`CRSElapsedTime` Int32,
|
||||
`ActualElapsedTime` Int32,
|
||||
`AirTime` Int32,
|
||||
`Flights` Int32,
|
||||
`Distance` Int32,
|
||||
`DistanceGroup` UInt8,
|
||||
`CarrierDelay` Int32,
|
||||
`WeatherDelay` Int32,
|
||||
`NASDelay` Int32,
|
||||
`SecurityDelay` Int32,
|
||||
`LateAircraftDelay` Int32,
|
||||
`FirstDepTime` String,
|
||||
`TotalAddGTime` String,
|
||||
`LongestAddGTime` String,
|
||||
`DivAirportLandings` String,
|
||||
`DivReachedDest` String,
|
||||
`DivActualElapsedTime` String,
|
||||
`DivArrDelay` String,
|
||||
`DivDistance` String,
|
||||
`Div1Airport` String,
|
||||
`Div1AirportID` Int32,
|
||||
`Div1AirportSeqID` Int32,
|
||||
`Div1WheelsOn` String,
|
||||
`Div1TotalGTime` String,
|
||||
`Div1LongestGTime` String,
|
||||
`Div1WheelsOff` String,
|
||||
`Div1TailNum` String,
|
||||
`Div2Airport` String,
|
||||
`Div2AirportID` Int32,
|
||||
`Div2AirportSeqID` Int32,
|
||||
`Div2WheelsOn` String,
|
||||
`Div2TotalGTime` String,
|
||||
`Div2LongestGTime` String,
|
||||
`Div2WheelsOff` String,
|
||||
`Div2TailNum` String,
|
||||
`Div3Airport` String,
|
||||
`Div3AirportID` Int32,
|
||||
`Div3AirportSeqID` Int32,
|
||||
`Div3WheelsOn` String,
|
||||
`Div3TotalGTime` String,
|
||||
`Div3LongestGTime` String,
|
||||
`Div3WheelsOff` String,
|
||||
`Div3TailNum` String,
|
||||
`Div4Airport` String,
|
||||
`Div4AirportID` Int32,
|
||||
`Div4AirportSeqID` Int32,
|
||||
`Div4WheelsOn` String,
|
||||
`Div4TotalGTime` String,
|
||||
`Div4LongestGTime` String,
|
||||
`Div4WheelsOff` String,
|
||||
`Div4TailNum` String,
|
||||
`Div5Airport` String,
|
||||
`Div5AirportID` Int32,
|
||||
`Div5AirportSeqID` Int32,
|
||||
`Div5WheelsOn` String,
|
||||
`Div5TotalGTime` String,
|
||||
`Div5LongestGTime` String,
|
||||
`Div5WheelsOff` String,
|
||||
`Div5TailNum` String
|
||||
CREATE TABLE `ontime`
|
||||
(
|
||||
`Year` UInt16,
|
||||
`Quarter` UInt8,
|
||||
`Month` UInt8,
|
||||
`DayofMonth` UInt8,
|
||||
`DayOfWeek` UInt8,
|
||||
`FlightDate` Date,
|
||||
`Reporting_Airline` String,
|
||||
`DOT_ID_Reporting_Airline` Int32,
|
||||
`IATA_CODE_Reporting_Airline` String,
|
||||
`Tail_Number` Int32,
|
||||
`Flight_Number_Reporting_Airline` String,
|
||||
`OriginAirportID` Int32,
|
||||
`OriginAirportSeqID` Int32,
|
||||
`OriginCityMarketID` Int32,
|
||||
`Origin` FixedString(5),
|
||||
`OriginCityName` String,
|
||||
`OriginState` FixedString(2),
|
||||
`OriginStateFips` String,
|
||||
`OriginStateName` String,
|
||||
`OriginWac` Int32,
|
||||
`DestAirportID` Int32,
|
||||
`DestAirportSeqID` Int32,
|
||||
`DestCityMarketID` Int32,
|
||||
`Dest` FixedString(5),
|
||||
`DestCityName` String,
|
||||
`DestState` FixedString(2),
|
||||
`DestStateFips` String,
|
||||
`DestStateName` String,
|
||||
`DestWac` Int32,
|
||||
`CRSDepTime` Int32,
|
||||
`DepTime` Int32,
|
||||
`DepDelay` Int32,
|
||||
`DepDelayMinutes` Int32,
|
||||
`DepDel15` Int32,
|
||||
`DepartureDelayGroups` String,
|
||||
`DepTimeBlk` String,
|
||||
`TaxiOut` Int32,
|
||||
`WheelsOff` Int32,
|
||||
`WheelsOn` Int32,
|
||||
`TaxiIn` Int32,
|
||||
`CRSArrTime` Int32,
|
||||
`ArrTime` Int32,
|
||||
`ArrDelay` Int32,
|
||||
`ArrDelayMinutes` Int32,
|
||||
`ArrDel15` Int32,
|
||||
`ArrivalDelayGroups` Int32,
|
||||
`ArrTimeBlk` String,
|
||||
`Cancelled` UInt8,
|
||||
`CancellationCode` FixedString(1),
|
||||
`Diverted` UInt8,
|
||||
`CRSElapsedTime` Int32,
|
||||
`ActualElapsedTime` Int32,
|
||||
`AirTime` Nullable(Int32),
|
||||
`Flights` Int32,
|
||||
`Distance` Int32,
|
||||
`DistanceGroup` UInt8,
|
||||
`CarrierDelay` Int32,
|
||||
`WeatherDelay` Int32,
|
||||
`NASDelay` Int32,
|
||||
`SecurityDelay` Int32,
|
||||
`LateAircraftDelay` Int32,
|
||||
`FirstDepTime` String,
|
||||
`TotalAddGTime` String,
|
||||
`LongestAddGTime` String,
|
||||
`DivAirportLandings` String,
|
||||
`DivReachedDest` String,
|
||||
`DivActualElapsedTime` String,
|
||||
`DivArrDelay` String,
|
||||
`DivDistance` String,
|
||||
`Div1Airport` String,
|
||||
`Div1AirportID` Int32,
|
||||
`Div1AirportSeqID` Int32,
|
||||
`Div1WheelsOn` String,
|
||||
`Div1TotalGTime` String,
|
||||
`Div1LongestGTime` String,
|
||||
`Div1WheelsOff` String,
|
||||
`Div1TailNum` String,
|
||||
`Div2Airport` String,
|
||||
`Div2AirportID` Int32,
|
||||
`Div2AirportSeqID` Int32,
|
||||
`Div2WheelsOn` String,
|
||||
`Div2TotalGTime` String,
|
||||
`Div2LongestGTime` String,
|
||||
`Div2WheelsOff` String,
|
||||
`Div2TailNum` String,
|
||||
`Div3Airport` String,
|
||||
`Div3AirportID` Int32,
|
||||
`Div3AirportSeqID` Int32,
|
||||
`Div3WheelsOn` String,
|
||||
`Div3TotalGTime` String,
|
||||
`Div3LongestGTime` String,
|
||||
`Div3WheelsOff` String,
|
||||
`Div3TailNum` String,
|
||||
`Div4Airport` String,
|
||||
`Div4AirportID` Int32,
|
||||
`Div4AirportSeqID` Int32,
|
||||
`Div4WheelsOn` String,
|
||||
`Div4TotalGTime` String,
|
||||
`Div4LongestGTime` String,
|
||||
`Div4WheelsOff` String,
|
||||
`Div4TailNum` String,
|
||||
`Div5Airport` String,
|
||||
`Div5AirportID` Int32,
|
||||
`Div5AirportSeqID` Int32,
|
||||
`Div5WheelsOn` String,
|
||||
`Div5TotalGTime` String,
|
||||
`Div5LongestGTime` String,
|
||||
`Div5WheelsOff` String,
|
||||
`Div5TailNum` String
|
||||
) ENGINE = MergeTree
|
||||
PARTITION BY Year
|
||||
ORDER BY (Carrier, FlightDate)
|
||||
SETTINGS index_granularity = 8192;
|
||||
PARTITION BY Year
|
||||
ORDER BY (IATA_CODE_Reporting_Airline, FlightDate)
|
||||
SETTINGS index_granularity = 8192;
|
||||
```
|
||||
|
||||
Загрузка данных:
|
||||
|
||||
``` bash
|
||||
$ for i in *.zip; do echo $i; unzip -cq $i '*.csv' | sed 's/\.00//g' | clickhouse-client --host=example-perftest01j --query="INSERT INTO ontime FORMAT CSVWithNames"; done
|
||||
ls -1 *.zip | xargs -I{} -P $(nproc) bash -c "echo {}; unzip -cq {} '*.csv' | sed 's/\.00//g' | clickhouse-client --input_format_with_names_use_header=0 --query='INSERT INTO ontime FORMAT CSVWithNames'"
|
||||
```
|
||||
|
||||
## Скачивание готовых партиций {#skachivanie-gotovykh-partitsii}
|
||||
@ -211,7 +212,7 @@ LIMIT 10;
|
||||
Q4. Количество задержек по перевозчикам за 2007 год
|
||||
|
||||
``` sql
|
||||
SELECT Carrier, count(*)
|
||||
SELECT IATA_CODE_Reporting_Airline AS Carrier, count(*)
|
||||
FROM ontime
|
||||
WHERE DepDelay>10 AND Year=2007
|
||||
GROUP BY Carrier
|
||||
@ -225,29 +226,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
Carrier,
|
||||
IATA_CODE_Reporting_Airline AS Carrier,
|
||||
count(*) AS c
|
||||
FROM ontime
|
||||
WHERE DepDelay>10
|
||||
AND Year=2007
|
||||
GROUP BY Carrier
|
||||
)
|
||||
) q
|
||||
JOIN
|
||||
(
|
||||
SELECT
|
||||
Carrier,
|
||||
IATA_CODE_Reporting_Airline AS Carrier,
|
||||
count(*) AS c2
|
||||
FROM ontime
|
||||
WHERE Year=2007
|
||||
GROUP BY Carrier
|
||||
) USING Carrier
|
||||
) qq USING Carrier
|
||||
ORDER BY c3 DESC;
|
||||
```
|
||||
|
||||
Более оптимальная версия того же запроса:
|
||||
|
||||
``` sql
|
||||
SELECT Carrier, avg(DepDelay>10)*100 AS c3
|
||||
SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3
|
||||
FROM ontime
|
||||
WHERE Year=2007
|
||||
GROUP BY Carrier
|
||||
@ -261,29 +262,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
Carrier,
|
||||
IATA_CODE_Reporting_Airline AS Carrier,
|
||||
count(*) AS c
|
||||
FROM ontime
|
||||
WHERE DepDelay>10
|
||||
AND Year>=2000 AND Year<=2008
|
||||
GROUP BY Carrier
|
||||
)
|
||||
) q
|
||||
JOIN
|
||||
(
|
||||
SELECT
|
||||
Carrier,
|
||||
IATA_CODE_Reporting_Airline AS Carrier,
|
||||
count(*) AS c2
|
||||
FROM ontime
|
||||
WHERE Year>=2000 AND Year<=2008
|
||||
GROUP BY Carrier
|
||||
) USING Carrier
|
||||
) qq USING Carrier
|
||||
ORDER BY c3 DESC;
|
||||
```
|
||||
|
||||
Более оптимальная версия того же запроса:
|
||||
|
||||
``` sql
|
||||
SELECT Carrier, avg(DepDelay>10)*100 AS c3
|
||||
SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3
|
||||
FROM ontime
|
||||
WHERE Year>=2000 AND Year<=2008
|
||||
GROUP BY Carrier
|
||||
@ -302,7 +303,7 @@ FROM
|
||||
from ontime
|
||||
WHERE DepDelay>10
|
||||
GROUP BY Year
|
||||
)
|
||||
) q
|
||||
JOIN
|
||||
(
|
||||
select
|
||||
@ -310,7 +311,7 @@ JOIN
|
||||
count(*) as c2
|
||||
from ontime
|
||||
GROUP BY Year
|
||||
) USING (Year)
|
||||
) qq USING (Year)
|
||||
ORDER BY Year;
|
||||
```
|
||||
|
||||
@ -346,7 +347,7 @@ Q10.
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
min(Year), max(Year), Carrier, count(*) AS cnt,
|
||||
min(Year), max(Year), IATA_CODE_Reporting_Airline AS Carrier, count(*) AS cnt,
|
||||
sum(ArrDelayMinutes>30) AS flights_delayed,
|
||||
round(sum(ArrDelayMinutes>30)/count(*),2) AS rate
|
||||
FROM ontime
|
||||
|
@ -29,126 +29,127 @@ done
|
||||
创建表结构:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE `ontime` (
|
||||
`Year` UInt16,
|
||||
`Quarter` UInt8,
|
||||
`Month` UInt8,
|
||||
`DayofMonth` UInt8,
|
||||
`DayOfWeek` UInt8,
|
||||
`FlightDate` Date,
|
||||
`UniqueCarrier` FixedString(7),
|
||||
`AirlineID` Int32,
|
||||
`Carrier` FixedString(2),
|
||||
`TailNum` String,
|
||||
`FlightNum` String,
|
||||
`OriginAirportID` Int32,
|
||||
`OriginAirportSeqID` Int32,
|
||||
`OriginCityMarketID` Int32,
|
||||
`Origin` FixedString(5),
|
||||
`OriginCityName` String,
|
||||
`OriginState` FixedString(2),
|
||||
`OriginStateFips` String,
|
||||
`OriginStateName` String,
|
||||
`OriginWac` Int32,
|
||||
`DestAirportID` Int32,
|
||||
`DestAirportSeqID` Int32,
|
||||
`DestCityMarketID` Int32,
|
||||
`Dest` FixedString(5),
|
||||
`DestCityName` String,
|
||||
`DestState` FixedString(2),
|
||||
`DestStateFips` String,
|
||||
`DestStateName` String,
|
||||
`DestWac` Int32,
|
||||
`CRSDepTime` Int32,
|
||||
`DepTime` Int32,
|
||||
`DepDelay` Int32,
|
||||
`DepDelayMinutes` Int32,
|
||||
`DepDel15` Int32,
|
||||
`DepartureDelayGroups` String,
|
||||
`DepTimeBlk` String,
|
||||
`TaxiOut` Int32,
|
||||
`WheelsOff` Int32,
|
||||
`WheelsOn` Int32,
|
||||
`TaxiIn` Int32,
|
||||
`CRSArrTime` Int32,
|
||||
`ArrTime` Int32,
|
||||
`ArrDelay` Int32,
|
||||
`ArrDelayMinutes` Int32,
|
||||
`ArrDel15` Int32,
|
||||
`ArrivalDelayGroups` Int32,
|
||||
`ArrTimeBlk` String,
|
||||
`Cancelled` UInt8,
|
||||
`CancellationCode` FixedString(1),
|
||||
`Diverted` UInt8,
|
||||
`CRSElapsedTime` Int32,
|
||||
`ActualElapsedTime` Int32,
|
||||
`AirTime` Int32,
|
||||
`Flights` Int32,
|
||||
`Distance` Int32,
|
||||
`DistanceGroup` UInt8,
|
||||
`CarrierDelay` Int32,
|
||||
`WeatherDelay` Int32,
|
||||
`NASDelay` Int32,
|
||||
`SecurityDelay` Int32,
|
||||
`LateAircraftDelay` Int32,
|
||||
`FirstDepTime` String,
|
||||
`TotalAddGTime` String,
|
||||
`LongestAddGTime` String,
|
||||
`DivAirportLandings` String,
|
||||
`DivReachedDest` String,
|
||||
`DivActualElapsedTime` String,
|
||||
`DivArrDelay` String,
|
||||
`DivDistance` String,
|
||||
`Div1Airport` String,
|
||||
`Div1AirportID` Int32,
|
||||
`Div1AirportSeqID` Int32,
|
||||
`Div1WheelsOn` String,
|
||||
`Div1TotalGTime` String,
|
||||
`Div1LongestGTime` String,
|
||||
`Div1WheelsOff` String,
|
||||
`Div1TailNum` String,
|
||||
`Div2Airport` String,
|
||||
`Div2AirportID` Int32,
|
||||
`Div2AirportSeqID` Int32,
|
||||
`Div2WheelsOn` String,
|
||||
`Div2TotalGTime` String,
|
||||
`Div2LongestGTime` String,
|
||||
`Div2WheelsOff` String,
|
||||
`Div2TailNum` String,
|
||||
`Div3Airport` String,
|
||||
`Div3AirportID` Int32,
|
||||
`Div3AirportSeqID` Int32,
|
||||
`Div3WheelsOn` String,
|
||||
`Div3TotalGTime` String,
|
||||
`Div3LongestGTime` String,
|
||||
`Div3WheelsOff` String,
|
||||
`Div3TailNum` String,
|
||||
`Div4Airport` String,
|
||||
`Div4AirportID` Int32,
|
||||
`Div4AirportSeqID` Int32,
|
||||
`Div4WheelsOn` String,
|
||||
`Div4TotalGTime` String,
|
||||
`Div4LongestGTime` String,
|
||||
`Div4WheelsOff` String,
|
||||
`Div4TailNum` String,
|
||||
`Div5Airport` String,
|
||||
`Div5AirportID` Int32,
|
||||
`Div5AirportSeqID` Int32,
|
||||
`Div5WheelsOn` String,
|
||||
`Div5TotalGTime` String,
|
||||
`Div5LongestGTime` String,
|
||||
`Div5WheelsOff` String,
|
||||
`Div5TailNum` String
|
||||
CREATE TABLE `ontime`
|
||||
(
|
||||
`Year` UInt16,
|
||||
`Quarter` UInt8,
|
||||
`Month` UInt8,
|
||||
`DayofMonth` UInt8,
|
||||
`DayOfWeek` UInt8,
|
||||
`FlightDate` Date,
|
||||
`Reporting_Airline` String,
|
||||
`DOT_ID_Reporting_Airline` Int32,
|
||||
`IATA_CODE_Reporting_Airline` String,
|
||||
`Tail_Number` Int32,
|
||||
`Flight_Number_Reporting_Airline` String,
|
||||
`OriginAirportID` Int32,
|
||||
`OriginAirportSeqID` Int32,
|
||||
`OriginCityMarketID` Int32,
|
||||
`Origin` FixedString(5),
|
||||
`OriginCityName` String,
|
||||
`OriginState` FixedString(2),
|
||||
`OriginStateFips` String,
|
||||
`OriginStateName` String,
|
||||
`OriginWac` Int32,
|
||||
`DestAirportID` Int32,
|
||||
`DestAirportSeqID` Int32,
|
||||
`DestCityMarketID` Int32,
|
||||
`Dest` FixedString(5),
|
||||
`DestCityName` String,
|
||||
`DestState` FixedString(2),
|
||||
`DestStateFips` String,
|
||||
`DestStateName` String,
|
||||
`DestWac` Int32,
|
||||
`CRSDepTime` Int32,
|
||||
`DepTime` Int32,
|
||||
`DepDelay` Int32,
|
||||
`DepDelayMinutes` Int32,
|
||||
`DepDel15` Int32,
|
||||
`DepartureDelayGroups` String,
|
||||
`DepTimeBlk` String,
|
||||
`TaxiOut` Int32,
|
||||
`WheelsOff` Int32,
|
||||
`WheelsOn` Int32,
|
||||
`TaxiIn` Int32,
|
||||
`CRSArrTime` Int32,
|
||||
`ArrTime` Int32,
|
||||
`ArrDelay` Int32,
|
||||
`ArrDelayMinutes` Int32,
|
||||
`ArrDel15` Int32,
|
||||
`ArrivalDelayGroups` Int32,
|
||||
`ArrTimeBlk` String,
|
||||
`Cancelled` UInt8,
|
||||
`CancellationCode` FixedString(1),
|
||||
`Diverted` UInt8,
|
||||
`CRSElapsedTime` Int32,
|
||||
`ActualElapsedTime` Int32,
|
||||
`AirTime` Nullable(Int32),
|
||||
`Flights` Int32,
|
||||
`Distance` Int32,
|
||||
`DistanceGroup` UInt8,
|
||||
`CarrierDelay` Int32,
|
||||
`WeatherDelay` Int32,
|
||||
`NASDelay` Int32,
|
||||
`SecurityDelay` Int32,
|
||||
`LateAircraftDelay` Int32,
|
||||
`FirstDepTime` String,
|
||||
`TotalAddGTime` String,
|
||||
`LongestAddGTime` String,
|
||||
`DivAirportLandings` String,
|
||||
`DivReachedDest` String,
|
||||
`DivActualElapsedTime` String,
|
||||
`DivArrDelay` String,
|
||||
`DivDistance` String,
|
||||
`Div1Airport` String,
|
||||
`Div1AirportID` Int32,
|
||||
`Div1AirportSeqID` Int32,
|
||||
`Div1WheelsOn` String,
|
||||
`Div1TotalGTime` String,
|
||||
`Div1LongestGTime` String,
|
||||
`Div1WheelsOff` String,
|
||||
`Div1TailNum` String,
|
||||
`Div2Airport` String,
|
||||
`Div2AirportID` Int32,
|
||||
`Div2AirportSeqID` Int32,
|
||||
`Div2WheelsOn` String,
|
||||
`Div2TotalGTime` String,
|
||||
`Div2LongestGTime` String,
|
||||
`Div2WheelsOff` String,
|
||||
`Div2TailNum` String,
|
||||
`Div3Airport` String,
|
||||
`Div3AirportID` Int32,
|
||||
`Div3AirportSeqID` Int32,
|
||||
`Div3WheelsOn` String,
|
||||
`Div3TotalGTime` String,
|
||||
`Div3LongestGTime` String,
|
||||
`Div3WheelsOff` String,
|
||||
`Div3TailNum` String,
|
||||
`Div4Airport` String,
|
||||
`Div4AirportID` Int32,
|
||||
`Div4AirportSeqID` Int32,
|
||||
`Div4WheelsOn` String,
|
||||
`Div4TotalGTime` String,
|
||||
`Div4LongestGTime` String,
|
||||
`Div4WheelsOff` String,
|
||||
`Div4TailNum` String,
|
||||
`Div5Airport` String,
|
||||
`Div5AirportID` Int32,
|
||||
`Div5AirportSeqID` Int32,
|
||||
`Div5WheelsOn` String,
|
||||
`Div5TotalGTime` String,
|
||||
`Div5LongestGTime` String,
|
||||
`Div5WheelsOff` String,
|
||||
`Div5TailNum` String
|
||||
) ENGINE = MergeTree
|
||||
PARTITION BY Year
|
||||
ORDER BY (Carrier, FlightDate)
|
||||
SETTINGS index_granularity = 8192;
|
||||
PARTITION BY Year
|
||||
ORDER BY (IATA_CODE_Reporting_Airline, FlightDate)
|
||||
SETTINGS index_granularity = 8192;
|
||||
```
|
||||
|
||||
加载数据:
|
||||
|
||||
``` bash
|
||||
$ for i in *.zip; do echo $i; unzip -cq $i '*.csv' | sed 's/\.00//g' | clickhouse-client --host=example-perftest01j --query="INSERT INTO ontime FORMAT CSVWithNames"; done
|
||||
ls -1 *.zip | xargs -I{} -P $(nproc) bash -c "echo {}; unzip -cq {} '*.csv' | sed 's/\.00//g' | clickhouse-client --input_format_with_names_use_header=0 --query='INSERT INTO ontime FORMAT CSVWithNames'"
|
||||
```
|
||||
|
||||
## 下载预处理好的分区数据 {#xia-zai-yu-chu-li-hao-de-fen-qu-shu-ju}
|
||||
@ -212,7 +213,7 @@ LIMIT 10;
|
||||
Q4. 查询2007年各航空公司延误超过10分钟以上的次数
|
||||
|
||||
``` sql
|
||||
SELECT Carrier, count(*)
|
||||
SELECT IATA_CODE_Reporting_Airline AS Carrier, count(*)
|
||||
FROM ontime
|
||||
WHERE DepDelay>10 AND Year=2007
|
||||
GROUP BY Carrier
|
||||
@ -226,29 +227,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
Carrier,
|
||||
IATA_CODE_Reporting_Airline AS Carrier,
|
||||
count(*) AS c
|
||||
FROM ontime
|
||||
WHERE DepDelay>10
|
||||
AND Year=2007
|
||||
GROUP BY Carrier
|
||||
)
|
||||
) q
|
||||
JOIN
|
||||
(
|
||||
SELECT
|
||||
Carrier,
|
||||
IATA_CODE_Reporting_Airline AS Carrier,
|
||||
count(*) AS c2
|
||||
FROM ontime
|
||||
WHERE Year=2007
|
||||
GROUP BY Carrier
|
||||
) USING Carrier
|
||||
) qq USING Carrier
|
||||
ORDER BY c3 DESC;
|
||||
```
|
||||
|
||||
更好的查询版本:
|
||||
|
||||
``` sql
|
||||
SELECT Carrier, avg(DepDelay>10)*100 AS c3
|
||||
SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3
|
||||
FROM ontime
|
||||
WHERE Year=2007
|
||||
GROUP BY Carrier
|
||||
@ -262,29 +263,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
Carrier,
|
||||
IATA_CODE_Reporting_Airline AS Carrier,
|
||||
count(*) AS c
|
||||
FROM ontime
|
||||
WHERE DepDelay>10
|
||||
AND Year>=2000 AND Year<=2008
|
||||
GROUP BY Carrier
|
||||
)
|
||||
) q
|
||||
JOIN
|
||||
(
|
||||
SELECT
|
||||
Carrier,
|
||||
IATA_CODE_Reporting_Airline AS Carrier,
|
||||
count(*) AS c2
|
||||
FROM ontime
|
||||
WHERE Year>=2000 AND Year<=2008
|
||||
GROUP BY Carrier
|
||||
) USING Carrier
|
||||
) qq USING Carrier
|
||||
ORDER BY c3 DESC;
|
||||
```
|
||||
|
||||
更好的查询版本:
|
||||
|
||||
``` sql
|
||||
SELECT Carrier, avg(DepDelay>10)*100 AS c3
|
||||
SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3
|
||||
FROM ontime
|
||||
WHERE Year>=2000 AND Year<=2008
|
||||
GROUP BY Carrier
|
||||
@ -303,7 +304,7 @@ FROM
|
||||
from ontime
|
||||
WHERE DepDelay>10
|
||||
GROUP BY Year
|
||||
)
|
||||
) q
|
||||
JOIN
|
||||
(
|
||||
select
|
||||
@ -311,7 +312,7 @@ JOIN
|
||||
count(*) as c2
|
||||
from ontime
|
||||
GROUP BY Year
|
||||
) USING (Year)
|
||||
) qq USING (Year)
|
||||
ORDER BY Year;
|
||||
```
|
||||
|
||||
@ -346,7 +347,7 @@ Q10.
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
min(Year), max(Year), Carrier, count(*) AS cnt,
|
||||
min(Year), max(Year), IATA_CODE_Reporting_Airline AS Carrier, count(*) AS cnt,
|
||||
sum(ArrDelayMinutes>30) AS flights_delayed,
|
||||
round(sum(ArrDelayMinutes>30)/count(*),2) AS rate
|
||||
FROM ontime
|
||||
|
Loading…
Reference in New Issue
Block a user