mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 09:32:06 +00:00
translate getting_started.example_dataset.ontime to farsi
This commit is contained in:
parent
117c32aaee
commit
082542e7dc
393
docs/fa/getting_started/example_datasets/ontime.md
Normal file
393
docs/fa/getting_started/example_datasets/ontime.md
Normal file
@ -0,0 +1,393 @@
|
||||
<a name="example_datasets-ontime"></a>
|
||||
|
||||
<div dir="rtl">
|
||||
|
||||
# OnTime
|
||||
|
||||
دانلود داده ها:
|
||||
|
||||
</div>
|
||||
|
||||
```bash
|
||||
for s in `seq 1987 2017`
|
||||
do
|
||||
for m in `seq 1 12`
|
||||
do
|
||||
wget http://transtats.bts.gov/PREZIP/On_Time_On_Time_Performance_${s}_${m}.zip
|
||||
done
|
||||
done
|
||||
```
|
||||
|
||||
<div dir="rtl">
|
||||
|
||||
(از <https://github.com/Percona-Lab/ontime-airline-performance/blob/master/download.sh> )
|
||||
|
||||
ساخت جدول:
|
||||
|
||||
</div>
|
||||
|
||||
```sql
|
||||
CREATE TABLE `ontime` (
|
||||
`Year` UInt16,
|
||||
`Quarter` UInt8,
|
||||
`Month` UInt8,
|
||||
`DayofMonth` UInt8,
|
||||
`DayOfWeek` UInt8,
|
||||
`FlightDate` Date,
|
||||
`UniqueCarrier` FixedString(7),
|
||||
`AirlineID` Int32,
|
||||
`Carrier` FixedString(2),
|
||||
`TailNum` String,
|
||||
`FlightNum` String,
|
||||
`OriginAirportID` Int32,
|
||||
`OriginAirportSeqID` Int32,
|
||||
`OriginCityMarketID` Int32,
|
||||
`Origin` FixedString(5),
|
||||
`OriginCityName` String,
|
||||
`OriginState` FixedString(2),
|
||||
`OriginStateFips` String,
|
||||
`OriginStateName` String,
|
||||
`OriginWac` Int32,
|
||||
`DestAirportID` Int32,
|
||||
`DestAirportSeqID` Int32,
|
||||
`DestCityMarketID` Int32,
|
||||
`Dest` FixedString(5),
|
||||
`DestCityName` String,
|
||||
`DestState` FixedString(2),
|
||||
`DestStateFips` String,
|
||||
`DestStateName` String,
|
||||
`DestWac` Int32,
|
||||
`CRSDepTime` Int32,
|
||||
`DepTime` Int32,
|
||||
`DepDelay` Int32,
|
||||
`DepDelayMinutes` Int32,
|
||||
`DepDel15` Int32,
|
||||
`DepartureDelayGroups` String,
|
||||
`DepTimeBlk` String,
|
||||
`TaxiOut` Int32,
|
||||
`WheelsOff` Int32,
|
||||
`WheelsOn` Int32,
|
||||
`TaxiIn` Int32,
|
||||
`CRSArrTime` Int32,
|
||||
`ArrTime` Int32,
|
||||
`ArrDelay` Int32,
|
||||
`ArrDelayMinutes` Int32,
|
||||
`ArrDel15` Int32,
|
||||
`ArrivalDelayGroups` Int32,
|
||||
`ArrTimeBlk` String,
|
||||
`Cancelled` UInt8,
|
||||
`CancellationCode` FixedString(1),
|
||||
`Diverted` UInt8,
|
||||
`CRSElapsedTime` Int32,
|
||||
`ActualElapsedTime` Int32,
|
||||
`AirTime` Int32,
|
||||
`Flights` Int32,
|
||||
`Distance` Int32,
|
||||
`DistanceGroup` UInt8,
|
||||
`CarrierDelay` Int32,
|
||||
`WeatherDelay` Int32,
|
||||
`NASDelay` Int32,
|
||||
`SecurityDelay` Int32,
|
||||
`LateAircraftDelay` Int32,
|
||||
`FirstDepTime` String,
|
||||
`TotalAddGTime` String,
|
||||
`LongestAddGTime` String,
|
||||
`DivAirportLandings` String,
|
||||
`DivReachedDest` String,
|
||||
`DivActualElapsedTime` String,
|
||||
`DivArrDelay` String,
|
||||
`DivDistance` String,
|
||||
`Div1Airport` String,
|
||||
`Div1AirportID` Int32,
|
||||
`Div1AirportSeqID` Int32,
|
||||
`Div1WheelsOn` String,
|
||||
`Div1TotalGTime` String,
|
||||
`Div1LongestGTime` String,
|
||||
`Div1WheelsOff` String,
|
||||
`Div1TailNum` String,
|
||||
`Div2Airport` String,
|
||||
`Div2AirportID` Int32,
|
||||
`Div2AirportSeqID` Int32,
|
||||
`Div2WheelsOn` String,
|
||||
`Div2TotalGTime` String,
|
||||
`Div2LongestGTime` String,
|
||||
`Div2WheelsOff` String,
|
||||
`Div2TailNum` String,
|
||||
`Div3Airport` String,
|
||||
`Div3AirportID` Int32,
|
||||
`Div3AirportSeqID` Int32,
|
||||
`Div3WheelsOn` String,
|
||||
`Div3TotalGTime` String,
|
||||
`Div3LongestGTime` String,
|
||||
`Div3WheelsOff` String,
|
||||
`Div3TailNum` String,
|
||||
`Div4Airport` String,
|
||||
`Div4AirportID` Int32,
|
||||
`Div4AirportSeqID` Int32,
|
||||
`Div4WheelsOn` String,
|
||||
`Div4TotalGTime` String,
|
||||
`Div4LongestGTime` String,
|
||||
`Div4WheelsOff` String,
|
||||
`Div4TailNum` String,
|
||||
`Div5Airport` String,
|
||||
`Div5AirportID` Int32,
|
||||
`Div5AirportSeqID` Int32,
|
||||
`Div5WheelsOn` String,
|
||||
`Div5TotalGTime` String,
|
||||
`Div5LongestGTime` String,
|
||||
`Div5WheelsOff` String,
|
||||
`Div5TailNum` String
|
||||
) ENGINE = MergeTree(FlightDate, (Year, FlightDate), 8192)
|
||||
```
|
||||
|
||||
<div dir="rtl">
|
||||
|
||||
Load داده ها:
|
||||
|
||||
</div>
|
||||
|
||||
```bash
|
||||
for i in *.zip; do echo $i; unzip -cq $i '*.csv' | sed 's/\.00//g' | clickhouse-client --host=example-perftest01j --query="INSERT INTO ontime FORMAT CSVWithNames"; done
|
||||
```
|
||||
|
||||
<div dir="rtl">
|
||||
query ها:
|
||||
|
||||
Q0.
|
||||
|
||||
</div>
|
||||
|
||||
```sql
|
||||
select avg(c1) from (select Year, Month, count(*) as c1 from ontime group by Year, Month);
|
||||
```
|
||||
|
||||
<div dir="rtl">
|
||||
|
||||
Q1. تعداد پروازهای به تفکیک روز از تاریخ 2000 تا 2008
|
||||
|
||||
</div>
|
||||
|
||||
```sql
|
||||
SELECT DayOfWeek, count(*) AS c FROM ontime WHERE Year >= 2000 AND Year <= 2008 GROUP BY DayOfWeek ORDER BY c DESC;
|
||||
```
|
||||
|
||||
<div dir="rtl">
|
||||
|
||||
Q2. تعداد پروازهای بیش از 10 دقیقه تاخیر خورده، گروه بندی براساس روزهای هفته از سال 2000 تا 2008
|
||||
|
||||
</div>
|
||||
|
||||
```sql
|
||||
SELECT DayOfWeek, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year >= 2000 AND Year <= 2008 GROUP BY DayOfWeek ORDER BY c DESC
|
||||
```
|
||||
|
||||
<div dir="rtl">
|
||||
|
||||
Q3. تعداد تاخیرها براساس airport از سال 2000 تا 2008
|
||||
|
||||
</div>
|
||||
|
||||
```sql
|
||||
SELECT Origin, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year >= 2000 AND Year <= 2008 GROUP BY Origin ORDER BY c DESC LIMIT 10
|
||||
```
|
||||
|
||||
<div dir="rtl">
|
||||
|
||||
Q4. تعداد تاخیرها براساس carrier در سال 78
|
||||
|
||||
</div>
|
||||
|
||||
```sql
|
||||
SELECT Carrier, count(*) FROM ontime WHERE DepDelay>10 AND Year = 2007 GROUP BY Carrier ORDER BY count(*) DESC
|
||||
```
|
||||
|
||||
<div dir="rtl">
|
||||
|
||||
Q5. درصد تاخیر ها براساس carrier در سال 2007
|
||||
|
||||
</div>
|
||||
|
||||
```sql
|
||||
SELECT Carrier, c, c2, c*1000/c2 as c3
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
Carrier,
|
||||
count(*) AS c
|
||||
FROM ontime
|
||||
WHERE DepDelay>10
|
||||
AND Year=2007
|
||||
GROUP BY Carrier
|
||||
)
|
||||
ANY INNER JOIN
|
||||
(
|
||||
SELECT
|
||||
Carrier,
|
||||
count(*) AS c2
|
||||
FROM ontime
|
||||
WHERE Year=2007
|
||||
GROUP BY Carrier
|
||||
) USING Carrier
|
||||
ORDER BY c3 DESC;
|
||||
```
|
||||
|
||||
<div dir="rtl">
|
||||
|
||||
نسخه ی بهتر query
|
||||
|
||||
</div>
|
||||
|
||||
```sql
|
||||
SELECT Carrier, avg(DepDelay > 10) * 1000 AS c3 FROM ontime WHERE Year = 2007 GROUP BY Carrier ORDER BY Carrier
|
||||
```
|
||||
|
||||
<div dir="rtl">
|
||||
|
||||
Q6. مانند query قبلی اما برای طیف وسیعی از سال های 2000 تا 2008
|
||||
|
||||
</div>
|
||||
|
||||
```sql
|
||||
SELECT Carrier, c, c2, c*1000/c2 as c3
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
Carrier,
|
||||
count(*) AS c
|
||||
FROM ontime
|
||||
WHERE DepDelay>10
|
||||
AND Year >= 2000 AND Year <= 2008
|
||||
GROUP BY Carrier
|
||||
)
|
||||
ANY INNER JOIN
|
||||
(
|
||||
SELECT
|
||||
Carrier,
|
||||
count(*) AS c2
|
||||
FROM ontime
|
||||
WHERE Year >= 2000 AND Year <= 2008
|
||||
GROUP BY Carrier
|
||||
) USING Carrier
|
||||
ORDER BY c3 DESC;
|
||||
```
|
||||
|
||||
<div dir="rtl">
|
||||
|
||||
نسخه ی بهتر query
|
||||
|
||||
</div>
|
||||
|
||||
```sql
|
||||
SELECT Carrier, avg(DepDelay > 10) * 1000 AS c3 FROM ontime WHERE Year >= 2000 AND Year <= 2008 GROUP BY Carrier ORDER BY Carrier
|
||||
```
|
||||
|
||||
<div dir="rtl">
|
||||
|
||||
Q7. درصد تاخیر بیش از 10 دقیقه پروازها به تفکیک سال
|
||||
|
||||
</div>
|
||||
|
||||
```sql
|
||||
SELECT Year, c1/c2
|
||||
FROM
|
||||
(
|
||||
select
|
||||
Year,
|
||||
count(*)*1000 as c1
|
||||
from ontime
|
||||
WHERE DepDelay>10
|
||||
GROUP BY Year
|
||||
)
|
||||
ANY INNER JOIN
|
||||
(
|
||||
select
|
||||
Year,
|
||||
count(*) as c2
|
||||
from ontime
|
||||
GROUP BY Year
|
||||
) USING (Year)
|
||||
ORDER BY Year
|
||||
```
|
||||
|
||||
<div dir="rtl">
|
||||
|
||||
نسخه ی بهتر query
|
||||
|
||||
</div>
|
||||
|
||||
```sql
|
||||
SELECT Year, avg(DepDelay > 10) FROM ontime GROUP BY Year ORDER BY Year
|
||||
```
|
||||
|
||||
<div dir="rtl">
|
||||
|
||||
Q8. مقصدهای پرطرفدار براساس تعداد اتصال های مستقیم شهرها برای سال 2000 تا 2010
|
||||
|
||||
</div>
|
||||
|
||||
```sql
|
||||
SELECT DestCityName, uniqExact(OriginCityName) AS u FROM ontime WHERE Year >= 2000 and Year <= 2010 GROUP BY DestCityName ORDER BY u DESC LIMIT 10;
|
||||
```
|
||||
|
||||
<div dir="rtl">
|
||||
|
||||
Q9.
|
||||
|
||||
</div>
|
||||
|
||||
```sql
|
||||
select Year, count(*) as c1 from ontime group by Year;
|
||||
```
|
||||
|
||||
<div dir="rtl">
|
||||
|
||||
Q10.
|
||||
|
||||
</div>
|
||||
|
||||
```sql
|
||||
select
|
||||
min(Year), max(Year), Carrier, count(*) as cnt,
|
||||
sum(ArrDelayMinutes>30) as flights_delayed,
|
||||
round(sum(ArrDelayMinutes>30)/count(*),2) as rate
|
||||
FROM ontime
|
||||
WHERE
|
||||
DayOfWeek not in (6,7) and OriginState not in ('AK', 'HI', 'PR', 'VI')
|
||||
and DestState not in ('AK', 'HI', 'PR', 'VI')
|
||||
and FlightDate < '2010-01-01'
|
||||
GROUP by Carrier
|
||||
HAVING cnt > 100000 and max(Year) > 1990
|
||||
ORDER by rate DESC
|
||||
LIMIT 1000;
|
||||
```
|
||||
|
||||
<div dir="rtl">
|
||||
|
||||
query های بیشتر:
|
||||
|
||||
</div>
|
||||
|
||||
```sql
|
||||
SELECT avg(cnt) FROM (SELECT Year,Month,count(*) AS cnt FROM ontime WHERE DepDel15=1 GROUP BY Year,Month)
|
||||
|
||||
select avg(c1) from (select Year,Month,count(*) as c1 from ontime group by Year,Month)
|
||||
|
||||
SELECT DestCityName, uniqExact(OriginCityName) AS u FROM ontime GROUP BY DestCityName ORDER BY u DESC LIMIT 10;
|
||||
|
||||
SELECT OriginCityName, DestCityName, count() AS c FROM ontime GROUP BY OriginCityName, DestCityName ORDER BY c DESC LIMIT 10;
|
||||
|
||||
SELECT OriginCityName, count() AS c FROM ontime GROUP BY OriginCityName ORDER BY c DESC LIMIT 10;
|
||||
```
|
||||
|
||||
<div dir="rtl">
|
||||
|
||||
این تست های performance توسط Vadim Tkachenko انجام شده است. برای اطلاعات بیشتر به لینک های زیر مراجعه کنید:
|
||||
|
||||
- <https://www.percona.com/blog/2009/10/02/analyzing-air-traffic-performance-with-infobright-and-monetdb/>
|
||||
- <https://www.percona.com/blog/2009/10/26/air-traffic-queries-in-luciddb/>
|
||||
- <https://www.percona.com/blog/2009/11/02/air-traffic-queries-in-infinidb-early-alpha/>
|
||||
- <https://www.percona.com/blog/2014/04/21/using-apache-hadoop-and-impala-together-with-mysql-for-data-analysis/>
|
||||
- <https://www.percona.com/blog/2016/01/07/apache-spark-with-air-ontime-performance-data/>
|
||||
- <http://nickmakos.blogspot.ru/2012/08/analyzing-air-traffic-performance-with.html>
|
||||
|
||||
</div>
|
Loading…
Reference in New Issue
Block a user