Datastory

Datastory#

# Import packages
import pandas as pd
import plotly.express as px
import seaborn as sns
import plotly.graph_objs as go 

Inleiding#

Dit is de sectie waar de inleiding moet

# Cancer Data Set
cancer_df = pd.read_csv("CancerDeaths.csv")
cancer_df.head(n=5)

	Country	Code	Year	Liver cancer	Kidney cancer	Larynx cancer	Breast cancer	Thyroid cancer	Stomach cancer	Bladder cancer	...	Non-melanoma skin cancer	Lip and oral cavity cancer	Brain and nervous system cancer	Tracheal, bronchus, and lung cancer	Gallbladder and biliary tract cancer	Malignant skin melanoma	Leukemia	Hodgkin lymphoma	Multiple myeloma	Other cancers
0	Afghanistan	AFG	1990	243.663716	39.470495	109.334207	766.535431	79.820167	923.495208	148.139204	...	26.446156	53.599636	163.869062	797.265710	125.936240	14.293978	727.763429	191.367386	50.719442	294.839679
1	Afghanistan	AFG	1991	261.241824	41.376024	117.311719	823.233932	85.111020	989.709648	156.977412	...	28.275271	57.148890	174.183219	853.126362	133.781377	15.241048	766.040181	203.509622	54.317640	311.469065
2	Afghanistan	AFG	1992	284.443630	44.106315	128.071634	901.022100	92.240603	1078.459037	168.990462	...	30.718152	61.876100	188.382296	927.812846	144.287648	16.508833	820.956547	220.208033	59.144200	334.565964
3	Afghanistan	AFG	1993	313.136816	47.424854	141.429604	996.432762	101.206726	1192.064525	184.347737	...	33.835442	67.504857	205.250430	1017.964700	157.471005	18.038507	891.134162	240.718821	64.938582	362.486749
4	Afghanistan	AFG	1994	343.229715	50.710951	155.754606	1097.895223	110.679923	1316.505674	200.246949	...	37.103370	73.175879	222.383572	1110.997186	171.311262	19.606636	965.286072	262.324068	70.930355	390.334685

5 rows × 30 columns

# Load the Emissions Data Set
emissions_df = pd.read_csv("Emissions.csv")
emissions_df.iloc[250:255]

	Country	ISO 3166-1 alpha-3	Year	Total	Coal	Oil	Gas	Cement	Flaring	Other	Per Capita
250	Afghanistan	AFG	2000	1.047128	0.003664	0.787760	0.223504	0.010216	0.021984	NaN	0.053581
251	Afghanistan	AFG	2001	1.069098	0.069616	0.762112	0.208848	0.006538	0.021984	NaN	0.054300
252	Afghanistan	AFG	2002	1.340995	0.055109	0.727438	0.547416	0.011033	0.000000	NaN	0.063856
253	Afghanistan	AFG	2003	1.559602	0.091813	0.991575	0.466408	0.009807	0.000000	NaN	0.068871
254	Afghanistan	AFG	2004	1.237247	0.091600	0.908672	0.227168	0.009807	0.000000	NaN	0.052529

# Load the Population Data Set
population_df = pd.read_csv("Population.csv")
population_df.head(n=5)

	Unnamed: 0	Location	Time	PopMale	PopFemale	PopTotal	PopDensity
0	0	Afghanistan	2001	11117,754	10489,238	21606,992	33.096
1	1	Afghanistan	2002	11642,106	10958,668	22600,774	34.618
2	2	Afghanistan	2003	12214,634	11466,237	23680,871	36.273
3	3	Afghanistan	2004	12763,726	11962,963	24726,689	37.874
4	4	Afghanistan	2005	13239,684	12414,59	25654,274	39.295

Longkanker is een steeds vaker voorkomend probleem, door klimaatopwarmingen zien we steeds meer stijgingen in long ziektes.

#graph for cancer years. 
country = 'World'  # Replace with the desired country code
country_data = cancer_df[(cancer_df['Country'] == country) & (cancer_df['Year'] >= 2001)]

# Extract the Year and Liver cancer columns
year_lung_cancer = country_data[['Year', "Tracheal, bronchus, and lung cancer "]]

# Create a plot using Plotly
fig = px.line(year_lung_cancer, x='Year', y="Tracheal, bronchus, and lung cancer ", title='Lung Cancer Deaths per year')
fig.update_layout(xaxis_title='Year', yaxis_title='Lung Cancer Deaths')

# Show the plot
fig.show()

# Filter the DataFrame for a specific country (e.g., Afghanistan)
country_code = 'Global'  # Replace with the desired country code
country_data = emissions_df[(emissions_df['Country'] == country_code) & (emissions_df['Year'] >= 2001)]

# Extract the Year and Total emissions columns
year_total_emissions = country_data[['Year', 'Total']]

# Create a plot using Plotly
fig = px.line(year_total_emissions, x='Year', y='Total', title='Total Emissions Over Years Worldwide')
fig.update_layout(xaxis_title='Year', yaxis_title='Total Emissions')

# Show the plot
fig.show()

# Ensure the columns are treated as strings before replacing commas
population_df['PopMale'] = population_df['PopMale'].astype(str).str.replace(',', '').astype(float)
population_df['PopFemale'] = population_df['PopFemale'].astype(str).str.replace(',', '').astype(float)
population_df['PopTotal'] = population_df['PopTotal'].astype(str).str.replace(',', '').astype(float)

# Filter the DataFrame for a specific country (e.g., Afghanistan)
country = 'World'  # Replace with the desired country
country_data = population_df[population_df['Location'] == country]

# Extract the Year and Total Population columns
year_population = country_data[['Time', 'PopTotal']]

# Create a plot using Plotly
fig = px.line(year_population, x='Time', y='PopTotal', title=f'Total Population Growth in {country} (2001 - Present)')
fig.update_layout(xaxis_title='Year', yaxis_title='Total Population')

# Show the plot
fig.show()

Datastory

Contents

Datastory#

Inleiding#