Aviation Data#
import os
os.chdir("../../")
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools
from bokeh.palettes import Spectral10
from bokeh.plotting import show, figure, output_notebook
from bokeh.models import CustomJS, Select, ColumnDataSource, HoverTool, BoxZoomTool, ResetTool
from bokeh.layouts import row, column, Spacer
output_notebook()
ad = pd.read_excel(os.getcwd() +"/data/tourism/aviation_seats_flights_pic.xlsx")
ad.columns = [col.lower() for col in ad.columns]
ad["date"] = pd.to_datetime(ad["date"])
ad.head(5)
country | iso | region | date | aircraft_type | seats_arrivals_domestic | seats_arrivals_interregional | seats_arrivals_intraregional | seats_arrivals_intl | seats_arrivals_total | available_seat_kilometers | number_of_flights_domestic | number_of_flights_interregional | number_of_flights_intraregional | number_of_flights_intl | number_of_flights_total | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Fiji | FJ | East Asia & Pacific | 2019-01-01 | passenger | 839 | 273 | 3480 | 3753 | 4592 | 1.430416e+07 | 8 | 1 | 10 | 11 | 19 |
1 | Fiji | FJ | East Asia & Pacific | 2019-01-02 | passenger | 974 | 313 | 3471 | 3784 | 4758 | 1.495610e+07 | 8 | 1 | 10 | 11 | 19 |
2 | Fiji | FJ | East Asia & Pacific | 2019-01-03 | passenger | 1190 | 443 | 3675 | 4118 | 5308 | 1.592143e+07 | 10 | 2 | 12 | 14 | 24 |
3 | Fiji | FJ | East Asia & Pacific | 2019-01-04 | passenger | 831 | 586 | 3159 | 3745 | 4576 | 1.457334e+07 | 7 | 2 | 12 | 14 | 21 |
4 | Fiji | FJ | East Asia & Pacific | 2019-01-05 | passenger | 744 | 273 | 4752 | 5025 | 5769 | 1.773449e+07 | 7 | 1 | 12 | 13 | 20 |
ad.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10611 entries, 0 to 10610
Data columns (total 16 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 country 10611 non-null object
1 iso 10611 non-null object
2 region 10611 non-null object
3 date 10611 non-null datetime64[ns]
4 aircraft_type 10566 non-null object
5 seats_arrivals_domestic 10611 non-null int64
6 seats_arrivals_interregional 10611 non-null int64
7 seats_arrivals_intraregional 10611 non-null int64
8 seats_arrivals_intl 10611 non-null int64
9 seats_arrivals_total 10611 non-null int64
10 available_seat_kilometers 10611 non-null float64
11 number_of_flights_domestic 10611 non-null int64
12 number_of_flights_interregional 10611 non-null int64
13 number_of_flights_intraregional 10611 non-null int64
14 number_of_flights_intl 10611 non-null int64
15 number_of_flights_total 10611 non-null int64
dtypes: datetime64[ns](1), float64(1), int64(10), object(4)
memory usage: 1.3+ MB
select_cols = ["country", "date", "seats_arrivals_intl", "seats_arrivals_total",
"number_of_flights_intl", "number_of_flights_total"]
# Subset to the passenger
ad = (ad[ad["aircraft_type"] == "passenger"][select_cols]
.reset_index().drop("index", axis=1))
display(ad.head(5))
country | date | seats_arrivals_intl | seats_arrivals_total | number_of_flights_intl | number_of_flights_total | |
---|---|---|---|---|---|---|
0 | Fiji | 2019-01-01 | 3753 | 4592 | 11 | 19 |
1 | Fiji | 2019-01-02 | 3784 | 4758 | 11 | 19 |
2 | Fiji | 2019-01-03 | 4118 | 5308 | 14 | 24 |
3 | Fiji | 2019-01-04 | 3745 | 4576 | 14 | 21 |
4 | Fiji | 2019-01-05 | 5025 | 5769 | 13 | 20 |
daterange = pd.date_range(start=ad.date.min(),
end=ad.date.max(),
freq="D").tolist()
countries = ad.country.unique().tolist()
comb = itertools.product(daterange, countries)
date_df = pd.DataFrame()
for i in comb:
row = pd.DataFrame(i).T
date_df = pd.concat([date_df, row], axis=0)
date_df.columns = ["date", "country"]
ad = date_df.merge(ad, how="left").fillna(0)
ad.sample(5)
date | country | seats_arrivals_intl | seats_arrivals_total | number_of_flights_intl | number_of_flights_total | |
---|---|---|---|---|---|---|
5942 | 2020-05-10 | Marshall Islands (the) | 0.0 | 0.0 | 0.0 | 0.0 |
6235 | 2020-06-03 | Samoa | 0.0 | 0.0 | 0.0 | 0.0 |
10773 | 2021-06-16 | Tonga | 302.0 | 302.0 | 1.0 | 1.0 |
7530 | 2020-09-19 | Papua New Guinea | 376.0 | 3467.0 | 2.0 | 29.0 |
2751 | 2019-08-18 | Micronesia (Federated States of) | 0.0 | 239.0 | 0.0 | 2.0 |
ad_7dsum = pd.DataFrame()
for country in countries:
country_df = (ad[ad.country == country].sort_values(by="date")
.reset_index().drop("index", axis=1))
country_df.iloc[:, -4:] = country_df.iloc[:, -4:].rolling(window=7).sum()
ad_7dsum = pd.concat([ad_7dsum, country_df], axis=0)
nof_7dsum = (ad_7dsum[["date", "country", "seats_arrivals_intl"]]
.pivot(index="date", columns="country", values="seats_arrivals_intl")
.reset_index())
nof_7dsum.columns.name = None
nof_7dsum.sample(5)
date | Fiji | Kiribati | Marshall Islands (the) | Micronesia (Federated States of) | Nauru | Palau | Papua New Guinea | Samoa | Solomon Islands | Tonga | Tuvalu | Vanuatu | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
822 | 2021-04-02 | 3477.0 | 170.0 | 0.0 | 166.0 | 0.0 | 482.0 | 2745.0 | 302.0 | 306.0 | 438.0 | 0.0 | 818.0 |
1038 | 2021-11-04 | 6066.0 | 0.0 | 332.0 | 498.0 | 144.0 | 482.0 | 2926.0 | 0.0 | 772.0 | 275.0 | 0.0 | 920.0 |
1214 | 2022-04-29 | 17561.0 | 144.0 | 166.0 | 332.0 | 0.0 | 498.0 | 3661.0 | 599.0 | 288.0 | 476.0 | 0.0 | 0.0 |
222 | 2019-08-11 | 25127.0 | 714.0 | 996.0 | 1361.0 | 0.0 | 3416.0 | 8372.0 | 5169.0 | 2234.0 | 3293.0 | 0.0 | 3612.0 |
1171 | 2022-03-17 | 10458.0 | 484.0 | 346.0 | 332.0 | 144.0 | 332.0 | 3087.0 | 472.0 | 288.0 | 302.0 | 0.0 | 580.0 |
Show code cell source
cds = ColumnDataSource(nof_7dsum)
render_cds = ColumnDataSource(
{"x": nof_7dsum["date"],
"y": nof_7dsum["Fiji"],
}
)
countries = nof_7dsum.columns[~nof_7dsum.columns.isin(["date"])].tolist()
# Hovertool config
hover = HoverTool(tooltips=[('Date', '@x{%Y-%m-%d}'),
('Number', '@y')],
formatters={'@x': 'datetime'})
p = figure(width=800, height=500, x_axis_type="datetime",
tools=[hover, BoxZoomTool(), ResetTool()])
p.line(x="x", y="y", source=render_cds)
# Selector
country_select = Select(title="Select Country", width=300,
options=countries, value="Country")
jscode = """
// New data
render_cds.data['y'] = cds.data[country_select.value];
yaxis[0].axis_label = country_select.value;
render_cds.change.emit();
"""
args = dict(
render_cds=render_cds,
cds=cds,
country_select=country_select,
yaxis=p.yaxis,
)
country_select.js_on_change("value", CustomJS(code=jscode, args=args))
layout = column(country_select, Spacer(height=30), p)
show(layout)