In [2]:
import pandas as pd
import plotly.express as px
import numpy as np
from plotly.subplots import make_subplots
In [3]:
# Loading data from Global Witness (https://www.globalwitness.org/en/campaigns/environmental-activists/numbers-lethal-attacks-against-defenders-2012/)
gw_data = pd.read_csv("global_witness_led_22-09-22.csv")
Columns of the CSV file:
In [4]:
gw_data.columns
Out[4]:
Index(['id', 'date', 'name', 'gender', 'age', 'person_characteristics', 'industry_driver', 'perpetrator_type', 'continent', 'country', 'country_numeric', 'local_region', 'number_of_victims'], dtype='object')
Number of rows, columns:
In [5]:
gw_data.shape
Out[5]:
(1733, 13)
The data looks like this (minus a couple of columns):
In [6]:
gw_data.head(10).style\
.hide(axis="columns", subset=["id", "country_numeric"])\
.hide(axis="index")\
.set_properties(**{'background-color': 'RGBA(232, 232, 250, .5)',
'border': '1.5px solid white',
'color': '#495057'})\
.set_table_styles(
[{'selector': 'tr:hover',
'props': [('background-color', '#fffacd')]},
{'selector': 'th',
'props': [('background-color', 'RGBA(180, 142, 173, .5)')]}]
)
Out[6]:
date | name | gender | age | person_characteristics | industry_driver | perpetrator_type | continent | country | local_region | number_of_victims |
---|---|---|---|---|---|---|---|---|---|---|
2016-02-18 | Aníbal Coronado Madera | Male | 49.000000 | Other | Sector could not be confirmed* | Private military actors | Americas | Colombia | Córdoba | 1 |
2016-10-16 | Yimer Chávez Rivera | Male | 31.000000 | Small-scale farmer | Sector could not be confirmed* | Private military actors | Americas | Colombia | Cauca | 1 |
2015-02-10 | Héctor William Cabrera Suárez | Male | 58.000000 | Other | Sector could not be confirmed* | Unspecified | Americas | Colombia | Caquetá | 1 |
2016-04-27 | Rolan Lonin Casiano | Male | 32.000000 | Small-scale farmer | Sector could not be confirmed* | Armed forces | Asia | Philippines | Bicol | 1 |
2015-05-22 | Benilda Santos | Female | 43.000000 | nan | Sector could not be confirmed* | Unspecified | Asia | Philippines | Quezon City | 1 |
2016-08-29 | Diego Alfredo Chirán Nastacuas | Male | 24.000000 | Indigenous peoples | Sector could not be confirmed* | Unspecified | Americas | Colombia | Nariño | 1 |
2014-04-21 | Luis Javier Campo Méndez | Male | 22.000000 | Indigenous peoples | Sector could not be confirmed* | Unspecified | Americas | Colombia | nan | 1 |
2016-12-30 | Yaneth Alejandra Calvache Viveros | Female | nan | Small-scale farmer | Sector could not be confirmed* | Hitmen | Americas | Colombia | Cauca | 1 |
2017-10-24 | Aulio Isarama Forastero | Male | nan | Indigenous peoples | Sector could not be confirmed* | Private military actors | Americas | Colombia | Chocó | 1 |
2016-05-20 | Manuel Chimá Pérez | Male | 21.000000 | Indigenous peoples | Sector could not be confirmed* | Private military actors | Americas | Colombia | Antioquia | 1 |
In [9]:
#gw_data.number_of_victims.unique()
# Making sure each row records one attack/victim
Note: each row records one attack victim
About the victims¶
In [10]:
gw_data["gender"].value_counts()
Out[10]:
gender Male 1545 Female 183 Unknown 1 Name: count, dtype: int64
In [11]:
gender_data = {
"gender": [ "Male", "Female", "unknown"],
"count": [1545, 183, 1]
}
gender_df = pd.DataFrame(gender_data)
In [12]:
treemap_gender = px.treemap(gender_df,
path=['gender'], values='count', color='gender', hover_name='count',
color_discrete_sequence=["#81a1c1", "#a3be8c", "#b48ead", "#b48ead"])
In [13]:
scatter_df = gw_data[["age", "gender", "person_characteristics", "number_of_victims", "industry_driver", "continent"]]
#scatter_df.head()
# Age data error: 4145
#scatter_df[scatter_df.age > 90]
# I assume it's a typo and should be 41
fixed_df = scatter_df.replace(to_replace=4145.0, value=41)
In [14]:
hist_age_gender = px.histogram(fixed_df, x="age", nbins=20,
color_discrete_sequence=["#81a1c1", "#a3be8c", "#b48ead"], color="gender")
In [15]:
bar_characteristic = px.bar(fixed_df, x='person_characteristics', y='number_of_victims',
color="gender", color_discrete_sequence=["#0354A4", "#0A9623", "#f08c00"],
hover_data="age", hover_name="person_characteristics")
In [16]:
treemap_gender.update_traces(textinfo="label+percent root")
hist_age_gender.update_traces(legendgroup="1")
bar_characteristic.update_traces(showlegend=False)
fig1_traces, fig2_traces, fig3_traces = [], [], []
for trace in range(len(treemap_gender["data"])):
fig1_traces.append(treemap_gender["data"][trace])
for trace in range(len(hist_age_gender["data"])):
fig2_traces.append(hist_age_gender["data"][trace])
for trace in range(len(bar_characteristic["data"])):
fig3_traces.append(bar_characteristic["data"][trace])
fig_gender = make_subplots(specs=[[{"type": "domain"}, None], [{"type": "xy"}, {"type": "xy"}]],
cols=2, rows=2, column_widths=[0.5, 0.5], row_heights=[0.35, 0.65],
subplot_titles=('Attacks victims are men in majority', 'Victims are mostly adults between 20-60', 'Indigenous peoples overrepresented among victims'))
for traces in fig1_traces:
fig_gender.add_trace(traces, row=1, col=1)
for traces in fig2_traces:
fig_gender.add_trace(traces, row=2, col=1)
for traces in fig3_traces:
fig_gender.add_trace(traces, row=2, col=2)
fig_gender.update_layout(
template="plotly_white",
title="Victims distributed by gender, age and personal characteristic (data source: www.globalwitness.org)",
xaxis= {"title": 'Age'}, yaxis= {"title": 'Number of victims'},
showlegend=True,
legend={"title": "Gender", "yanchor":"bottom", "y":0.3, "xanchor":"right", "x":0.45},
legend_tracegroupgap = 50,
barmode='relative',
margin=dict(r=5, t=80, b=5, l=5))
fig_gender.update_layout(
autosize=False,
width=1200,
height=700)
fig_gender.show()