Datastory#
# Import packages
import pandas as pd
import plotly.express as px
import seaborn as sns
import plotly.graph_objs as go
Inleiding#
Dit is de sectie waar de inleiding moet
# Cancer Data Set
cancer_df = pd.read_csv("CancerDeaths.csv")
cancer_df.head(n=5)
| Country | Code | Year | Liver cancer | Kidney cancer | Larynx cancer | Breast cancer | Thyroid cancer | Stomach cancer | Bladder cancer | ... | Non-melanoma skin cancer | Lip and oral cavity cancer | Brain and nervous system cancer | Tracheal, bronchus, and lung cancer | Gallbladder and biliary tract cancer | Malignant skin melanoma | Leukemia | Hodgkin lymphoma | Multiple myeloma | Other cancers | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | AFG | 1990 | 243.663716 | 39.470495 | 109.334207 | 766.535431 | 79.820167 | 923.495208 | 148.139204 | ... | 26.446156 | 53.599636 | 163.869062 | 797.265710 | 125.936240 | 14.293978 | 727.763429 | 191.367386 | 50.719442 | 294.839679 |
| 1 | Afghanistan | AFG | 1991 | 261.241824 | 41.376024 | 117.311719 | 823.233932 | 85.111020 | 989.709648 | 156.977412 | ... | 28.275271 | 57.148890 | 174.183219 | 853.126362 | 133.781377 | 15.241048 | 766.040181 | 203.509622 | 54.317640 | 311.469065 |
| 2 | Afghanistan | AFG | 1992 | 284.443630 | 44.106315 | 128.071634 | 901.022100 | 92.240603 | 1078.459037 | 168.990462 | ... | 30.718152 | 61.876100 | 188.382296 | 927.812846 | 144.287648 | 16.508833 | 820.956547 | 220.208033 | 59.144200 | 334.565964 |
| 3 | Afghanistan | AFG | 1993 | 313.136816 | 47.424854 | 141.429604 | 996.432762 | 101.206726 | 1192.064525 | 184.347737 | ... | 33.835442 | 67.504857 | 205.250430 | 1017.964700 | 157.471005 | 18.038507 | 891.134162 | 240.718821 | 64.938582 | 362.486749 |
| 4 | Afghanistan | AFG | 1994 | 343.229715 | 50.710951 | 155.754606 | 1097.895223 | 110.679923 | 1316.505674 | 200.246949 | ... | 37.103370 | 73.175879 | 222.383572 | 1110.997186 | 171.311262 | 19.606636 | 965.286072 | 262.324068 | 70.930355 | 390.334685 |
5 rows × 30 columns
# Load the Emissions Data Set
emissions_df = pd.read_csv("Emissions.csv")
emissions_df.iloc[250:255]
| Country | ISO 3166-1 alpha-3 | Year | Total | Coal | Oil | Gas | Cement | Flaring | Other | Per Capita | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 250 | Afghanistan | AFG | 2000 | 1.047128 | 0.003664 | 0.787760 | 0.223504 | 0.010216 | 0.021984 | NaN | 0.053581 |
| 251 | Afghanistan | AFG | 2001 | 1.069098 | 0.069616 | 0.762112 | 0.208848 | 0.006538 | 0.021984 | NaN | 0.054300 |
| 252 | Afghanistan | AFG | 2002 | 1.340995 | 0.055109 | 0.727438 | 0.547416 | 0.011033 | 0.000000 | NaN | 0.063856 |
| 253 | Afghanistan | AFG | 2003 | 1.559602 | 0.091813 | 0.991575 | 0.466408 | 0.009807 | 0.000000 | NaN | 0.068871 |
| 254 | Afghanistan | AFG | 2004 | 1.237247 | 0.091600 | 0.908672 | 0.227168 | 0.009807 | 0.000000 | NaN | 0.052529 |
# Load the Population Data Set
population_df = pd.read_csv("Population.csv")
population_df.head(n=5)
| Unnamed: 0 | Location | Time | PopMale | PopFemale | PopTotal | PopDensity | |
|---|---|---|---|---|---|---|---|
| 0 | 0 | Afghanistan | 2001 | 11117,754 | 10489,238 | 21606,992 | 33.096 |
| 1 | 1 | Afghanistan | 2002 | 11642,106 | 10958,668 | 22600,774 | 34.618 |
| 2 | 2 | Afghanistan | 2003 | 12214,634 | 11466,237 | 23680,871 | 36.273 |
| 3 | 3 | Afghanistan | 2004 | 12763,726 | 11962,963 | 24726,689 | 37.874 |
| 4 | 4 | Afghanistan | 2005 | 13239,684 | 12414,59 | 25654,274 | 39.295 |
Longkanker is een steeds vaker voorkomend probleem, door klimaatopwarmingen zien we steeds meer stijgingen in long ziektes.
#graph for cancer years.
country = 'World' # Replace with the desired country code
country_data = cancer_df[(cancer_df['Country'] == country) & (cancer_df['Year'] >= 2001)]
# Extract the Year and Liver cancer columns
year_lung_cancer = country_data[['Year', "Tracheal, bronchus, and lung cancer "]]
# Create a plot using Plotly
fig = px.line(year_lung_cancer, x='Year', y="Tracheal, bronchus, and lung cancer ", title='Lung Cancer Deaths per year')
fig.update_layout(xaxis_title='Year', yaxis_title='Lung Cancer Deaths')
# Show the plot
fig.show()
# Filter the DataFrame for a specific country (e.g., Afghanistan)
country_code = 'Global' # Replace with the desired country code
country_data = emissions_df[(emissions_df['Country'] == country_code) & (emissions_df['Year'] >= 2001)]
# Extract the Year and Total emissions columns
year_total_emissions = country_data[['Year', 'Total']]
# Create a plot using Plotly
fig = px.line(year_total_emissions, x='Year', y='Total', title='Total Emissions Over Years Worldwide')
fig.update_layout(xaxis_title='Year', yaxis_title='Total Emissions')
# Show the plot
fig.show()
# Ensure the columns are treated as strings before replacing commas
population_df['PopMale'] = population_df['PopMale'].astype(str).str.replace(',', '').astype(float)
population_df['PopFemale'] = population_df['PopFemale'].astype(str).str.replace(',', '').astype(float)
population_df['PopTotal'] = population_df['PopTotal'].astype(str).str.replace(',', '').astype(float)
# Filter the DataFrame for a specific country (e.g., Afghanistan)
country = 'World' # Replace with the desired country
country_data = population_df[population_df['Location'] == country]
# Extract the Year and Total Population columns
year_population = country_data[['Time', 'PopTotal']]
# Create a plot using Plotly
fig = px.line(year_population, x='Time', y='PopTotal', title=f'Total Population Growth in {country} (2001 - Present)')
fig.update_layout(xaxis_title='Year', yaxis_title='Total Population')
# Show the plot
fig.show()