World Bank Indicators - GDP per capita from 1960 to 2018

Posted on sam. 02 mai 2020 in Economics • 2 min read

World Bank Indicators - GDP per capita from 1960 to 2018


GDP per capita is gross domestic product divided by midyear total population. GDP is the sum of gross value added by all resident producers in the economy plus any product taxes and minus any subsidies not included in the value of the products. GDP per capita is a good measurement of a country's standard of living, by describing how much citizens benefit from their country's economy. Small, rich countries and more developed industrial countries tend to have the highest per capita GDP.

Data are in current U.S. dollars.

Import required libraries

In [1]:
import pandas as pd
from datetime import datetime
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.animation as animation
import matplotlib
import randomcolor
from IPython.display import HTML

rand_color = randomcolor.RandomColor()
In [2]:
plt.style.use('dark_background')

def draw_barchart_race(fig, ax, df, timedelta='D', timeunit='D', interval=250, title='Title', xlabel='Label'):
    items = df['item'].unique().tolist()
    colors = dict(zip(items, rand_color.generate(luminosity='luminosity', count=len(items))))

    df.dropna(inplace=True)
    df = df.set_index('time')
    df = df.groupby('item').resample(timedelta).mean()
    df['value'] = df['value'].interpolate()

    def draw(time):
        ax.clear()
        
        time_plot_df = df[df.index.isin([time], level=1)].sort_values(by='value', ascending=True).tail(10)
        time_plot_df = time_plot_df.reset_index()
        label = np.datetime_as_string(time, unit=timeunit)

        draw_barchart_date(ax, time_plot_df, title=title, label=label, xlabel=xlabel, colors=colors)

    frames = df.index.to_frame()['time'].unique()
    animator = animation.FuncAnimation(fig, draw, frames=frames, interval=interval)
    return animator


def draw_barchart_date(ax, df, title='Title', label='Label', xlabel='Label', colors={}):
    labelsize = 12
    textcolor = 'white'
    
    ax.text(0, 1.06, xlabel, transform=ax.transAxes,
            size=labelsize, color=textcolor)
    ax.xaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}'))
    ax.xaxis.set_ticks_position('top')
    ax.tick_params(axis='x', colors=textcolor, labelsize=labelsize)
    ax.set_yticks([])
    ax.set_axisbelow(True)
    ax.text(0, 1.15, title, transform=ax.transAxes,
            size=24, weight=600, ha='left', va='top')
    ax.text(1, 0, 'Data Show', transform=ax.transAxes, color=textcolor, ha='right', size=16)
    plt.box(False)
    ax.barh(df['item'], df['value'], color=[
           colors[item] if item in colors.keys() else '#8383fc' for item in df['item']], alpha=0.8)
    
    dx = df['value'].max() / 100
    for i, (value, item) in enumerate(zip(df['value'], df['item'])):
        ax.text(
            value - dx, i,
            item,
            size=16,
            weight=600,
            ha='right',
            va='center'
        )
        ax.text(
            value + dx,
            i,
            f'{value: ,.0f}',
            size=14,
            ha='left',
            va='center'
        )

    ax.text(1, 0.06, label, transform=ax.transAxes,
            color='white', size=52, ha='right', weight=800)
    
    return ax

Data pre-processing

In [3]:
df = pd.read_csv('Data/World_Development_Indicators.csv',
                 header=0,
                 names=['year', 'timecode', 'country', 'countrycode', 'gdp'],
                 usecols=['year', 'country', 'gdp'],
                 dtype={
                     'gdp': np.float
                 },
                 na_values='..')

df.head()
<ipython-input-3-e238b6a8b3f7>:6: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  'gdp': np.float
Out[3]:
year country gdp
0 1960 Afghanistan 59.773194
1 1960 Albania NaN
2 1960 Algeria 246.308763
3 1960 American Samoa NaN
4 1960 Andorra NaN

Cleaning

In [4]:
df.drop(df.tail(5).index, inplace=True)

df.head()
Out[4]:
year country gdp
0 1960 Afghanistan 59.773194
1 1960 Albania NaN
2 1960 Algeria 246.308763
3 1960 American Samoa NaN
4 1960 Andorra NaN

Barchart

In [5]:
fig, ax = plt.subplots(figsize=(20, 10))
ax.clear()
year = '2018'
date = np.datetime64(year)

chart_df = df.copy()
chart_df['year'] = pd.to_datetime(chart_df['year'])
chart_df.rename(columns={'year': 'time', 'country': 'item', 'gdp': 'value'}, inplace=True)

fig, ax = plt.subplots(figsize=(20, 10))

interval = 500
animator = draw_barchart_race(fig, ax,
                              chart_df,
                              timedelta='Y',
                              timeunit='Y',
                              interval=interval,
                              title='GDP per capita from 1960 to 2018',
                              xlabel='GDP/capita (current U.S. dollars)')

HTML(animator.to_jshtml())
Out[5]:

Sources