Datastory

Contents

Datastory#

# Import packages
import pandas as pd
import plotly.express as px
import seaborn as sns
import plotly.graph_objs as go 

Inleiding#

Dit is de sectie waar de inleiding moet

# Cancer Data Set
cancer_df = pd.read_csv("CancerDeaths.csv")
cancer_df.head(n=5)
Country Code Year Liver cancer Kidney cancer Larynx cancer Breast cancer Thyroid cancer Stomach cancer Bladder cancer ... Non-melanoma skin cancer Lip and oral cavity cancer Brain and nervous system cancer Tracheal, bronchus, and lung cancer Gallbladder and biliary tract cancer Malignant skin melanoma Leukemia Hodgkin lymphoma Multiple myeloma Other cancers
0 Afghanistan AFG 1990 243.663716 39.470495 109.334207 766.535431 79.820167 923.495208 148.139204 ... 26.446156 53.599636 163.869062 797.265710 125.936240 14.293978 727.763429 191.367386 50.719442 294.839679
1 Afghanistan AFG 1991 261.241824 41.376024 117.311719 823.233932 85.111020 989.709648 156.977412 ... 28.275271 57.148890 174.183219 853.126362 133.781377 15.241048 766.040181 203.509622 54.317640 311.469065
2 Afghanistan AFG 1992 284.443630 44.106315 128.071634 901.022100 92.240603 1078.459037 168.990462 ... 30.718152 61.876100 188.382296 927.812846 144.287648 16.508833 820.956547 220.208033 59.144200 334.565964
3 Afghanistan AFG 1993 313.136816 47.424854 141.429604 996.432762 101.206726 1192.064525 184.347737 ... 33.835442 67.504857 205.250430 1017.964700 157.471005 18.038507 891.134162 240.718821 64.938582 362.486749
4 Afghanistan AFG 1994 343.229715 50.710951 155.754606 1097.895223 110.679923 1316.505674 200.246949 ... 37.103370 73.175879 222.383572 1110.997186 171.311262 19.606636 965.286072 262.324068 70.930355 390.334685

5 rows × 30 columns

# Load the Emissions Data Set
emissions_df = pd.read_csv("Emissions.csv")
emissions_df.iloc[250:255]
Country ISO 3166-1 alpha-3 Year Total Coal Oil Gas Cement Flaring Other Per Capita
250 Afghanistan AFG 2000 1.047128 0.003664 0.787760 0.223504 0.010216 0.021984 NaN 0.053581
251 Afghanistan AFG 2001 1.069098 0.069616 0.762112 0.208848 0.006538 0.021984 NaN 0.054300
252 Afghanistan AFG 2002 1.340995 0.055109 0.727438 0.547416 0.011033 0.000000 NaN 0.063856
253 Afghanistan AFG 2003 1.559602 0.091813 0.991575 0.466408 0.009807 0.000000 NaN 0.068871
254 Afghanistan AFG 2004 1.237247 0.091600 0.908672 0.227168 0.009807 0.000000 NaN 0.052529
# Load the Population Data Set
population_df = pd.read_csv("Population.csv")
population_df.head(n=5)
Unnamed: 0 Location Time PopMale PopFemale PopTotal PopDensity
0 0 Afghanistan 2001 11117,754 10489,238 21606,992 33.096
1 1 Afghanistan 2002 11642,106 10958,668 22600,774 34.618
2 2 Afghanistan 2003 12214,634 11466,237 23680,871 36.273
3 3 Afghanistan 2004 12763,726 11962,963 24726,689 37.874
4 4 Afghanistan 2005 13239,684 12414,59 25654,274 39.295

Longkanker is een steeds vaker voorkomend probleem, door klimaatopwarmingen zien we steeds meer stijgingen in long ziektes.

#graph for cancer years. 
country = 'World'  # Replace with the desired country code
country_data = cancer_df[(cancer_df['Country'] == country) & (cancer_df['Year'] >= 2001)]

# Extract the Year and Liver cancer columns
year_lung_cancer = country_data[['Year', "Tracheal, bronchus, and lung cancer "]]

# Create a plot using Plotly
fig = px.line(year_lung_cancer, x='Year', y="Tracheal, bronchus, and lung cancer ", title='Lung Cancer Deaths per year')
fig.update_layout(xaxis_title='Year', yaxis_title='Lung Cancer Deaths')

# Show the plot
fig.show()
# Filter the DataFrame for a specific country (e.g., Afghanistan)
country_code = 'Global'  # Replace with the desired country code
country_data = emissions_df[(emissions_df['Country'] == country_code) & (emissions_df['Year'] >= 2001)]

# Extract the Year and Total emissions columns
year_total_emissions = country_data[['Year', 'Total']]

# Create a plot using Plotly
fig = px.line(year_total_emissions, x='Year', y='Total', title='Total Emissions Over Years Worldwide')
fig.update_layout(xaxis_title='Year', yaxis_title='Total Emissions')

# Show the plot
fig.show()
# Ensure the columns are treated as strings before replacing commas
population_df['PopMale'] = population_df['PopMale'].astype(str).str.replace(',', '').astype(float)
population_df['PopFemale'] = population_df['PopFemale'].astype(str).str.replace(',', '').astype(float)
population_df['PopTotal'] = population_df['PopTotal'].astype(str).str.replace(',', '').astype(float)

# Filter the DataFrame for a specific country (e.g., Afghanistan)
country = 'World'  # Replace with the desired country
country_data = population_df[population_df['Location'] == country]

# Extract the Year and Total Population columns
year_population = country_data[['Time', 'PopTotal']]

# Create a plot using Plotly
fig = px.line(year_population, x='Time', y='PopTotal', title=f'Total Population Growth in {country} (2001 - Present)')
fig.update_layout(xaxis_title='Year', yaxis_title='Total Population')

# Show the plot
fig.show()