Source code for equilibrium.compute_equilibrium

# -*- coding: utf-8 -*-

import numpy as np
from tqdm import tqdm
import copy

import equilibrium.sub.compute_outputs as eqout


[docs]def compute_equilibrium(fraction_capital_destroyed, amenities, param,
                        housing_limit, population, households_per_income_class,
                        total_RDP, coeff_land, income_net_of_commuting_costs,
                        grid, options, agricultural_rent, interest_rate,
                        number_properties_RDP, average_income, mean_income,
                        income_class_by_housing_type, minimum_housing_supply,
                        construction_param, income_baseline):
    """
    Run the static equilibrium algorithm.

    This function runs the algorithm described in the technical documentation.
    It starts from arbitrary utility levels, and leverages optimality
    conditions on supply and demand to recover key output variables
    (detailed in equilibrium.sub.compute_outputs). Then, it updates utility
    levels to minimize the error between simulated and target number of
    households per income group. Note that the whole process abstracts from
    formal subsidised housing (fully exogenous in the model), that is added
    to final outputs at the end of the function.

    Parameters
    ----------
    fraction_capital_destroyed : DataFrame
        Data frame of expected fractions of capital destroyed, for housing
        structures and contents in different housing types, in each
        grid cell (24,014)
    amenities : ndarray(float64)
        Normalized amenity index (relative to the mean) for each grid cell
        (24,014)
    param : dict
        Dictionary of default parameters
    housing_limit : Series
        Maximum housing supply (in m² per km²) in each grid cell (24,014)
    population : int64
        Total number of households in the city (from Small-Area-Level data)
    households_per_income_class : ndarray(float64)
        Exogenous total number of households per income group (excluding people
        out of employment, for 4 groups)
    total_RDP : int
        Number of households living in formal subsidized housing (from SAL
        data)
    coeff_land : ndarray(float64, ndim=2)
        Updated land availability for each grid cell (24,014) and each
        housing type (4: formal private, informal backyards, informal
        settlements, formal subsidized)
    income_net_of_commuting_costs : ndarray(float64, ndim=2)
        Expected annual income net of commuting costs (in rands, for
        one household), for each geographic unit, by income group (4)
    grid : DataFrame
        Table yielding, for each grid cell (24,014), its x and y
        (centroid) coordinates, and its distance (in km) to the city centre
    options : dict
        Dictionary of default options
    agricultural_rent : float64
        Annual housing rent below which it is not profitable for formal private
        developers to urbanize (agricultural) land: endogenously limits urban
        sprawl
    interest_rate : float64
        Real interest rate for the overall economy, corresponding to an average
        over past years
    number_properties_RDP : ndarray(float64)
        Number of formal subsidized dwellings per grid cell (24,014) at
        baseline year (2011)
    average_income : ndarray(float64)
        Average median income for each income group in the model (4)
    mean_income : float64
        Average median income across total population
    income_class_by_housing_type : DataFrame
        Set of dummies coding for housing market access (across 4 housing
        submarkets) for each income group (4, from poorest to richest)
    minimum_housing_supply : ndarray(float64)
        Minimum housing supply (in m²) for each grid cell (24,014), allowing
        for an ad hoc correction of low values in Mitchells Plain
    construction_param : ndarray(float64)
        (Calibrated) scale factor for the construction function of formal
        private developers
    income_baseline : DataFrame
        Table summarizing, for each income group in the data (12, including
        people out of employment), the number of households living in each
        endogenous housing type (3), their total number at baseline year (2011)
        in retrospect (2001), as well as the distribution of their average
        income (at baseline year)

    Returns
    -------
    initial_state_utility : ndarray(float64)
        Utility levels for each income group (4) at baseline year
        (2011)
    initial_state_error : ndarray(float64)
        Ratio (in %) of simulated number of households per income group over
        target population per income group at baseline year (2011)
    initial_state_simulated_jobs : ndarray(float64, ndim=2)
        Total number of households in each income group (4) in each
        endogenous housing type (3: formal private, informal backyards,
        informal settlements) at baseline year (2011)
    initial_state_households_housing_types : ndarray(float64, ndim=2)
        Number of households per grid cell in each housing type (4)
        at baseline year (2011)
    initial_state_household_centers : ndarray(float64, ndim=2)
        Number of households per grid cell in each income group (4)
        at baseline year (2011)
    initial_state_households : ndarray(float64, ndim=3)
        Number of households per grid cell in each income group (4) and
        each housing type (4) at baseline year (2011)
    initial_state_dwelling_size : ndarray(float64, ndim=2)
        Average dwelling size (in m²) per grid cell in each housing
        type (4) at baseline year (2011)
    initial_state_housing_supply : ndarray(float64, ndim=2)
        Housing supply per unit of available land (in m² per km²)
        for each housing type (4) in each grid cell at baseline year (2011)
    initial_state_rent : ndarray(float64, ndim=2)
        Average annual rent (in rands) per grid cell for each housing type
        (4) at baseline year (2011)
    initial_state_rent_matrix : ndarray(float64, ndim=3)
        Average annual willingness to pay (in rands) per grid cell
        for each income group (4) and each endogenous housing type (3)
        at baseline year (2011)
    initial_state_capital_land : ndarray(float64, ndim=2)
        Value (in rands) of the housing capital stock per unit of available
        land (in km²) for each endogenous housing type (3) per grid cell
        at baseline year (2011)
    initial_state_average_income : ndarray(float64)
        Not an output of the model per se : it is just the average median
        income for each income group in the model (4), that may change
        over time
    initial_state_limit_city : list
        Contains a ndarray(bool, ndim=3) of indicator dummies for having
        strictly more than one household per housing type and income group
        in each grid cell

    """
    # Adjust the population to include unemployed people, then take out RDP
    # by considering that they all belong to poorest income group

    # General reweighting using SAL data (no formal backyards)
    if options["unempl_reweight"] == 0:
        ratio = population / sum(households_per_income_class)
        households_per_income_class = households_per_income_class * ratio

    # Alternative strategy: we attribute the unemployed population in
    # proportion with calibrated unemployment rates, without applying them
    # directly (as they are too noisy)
    if options["unempl_reweight"] == 1:
        ratio = [2 / size for size in param["household_size"]]
        households_tot = households_per_income_class * ratio
        households_unempl = households_tot - households_per_income_class
        weights = households_unempl / sum(households_unempl)
        unempl_pop = income_baseline.Households_nb[0]
        unempl_attrib = [unempl_pop * w for w in weights]
        households_per_income_class = (
            households_per_income_class + unempl_attrib)
        ratio = population / sum(households_per_income_class)
        households_per_income_class = households_per_income_class * ratio
        # implicit_empl_rate = ((households_per_income_class - unempl_attrib)
        #                       / households_per_income_class)
        # 0.74/0.99/0.98/0.99

    #  Considering that all RDP belong to the poorest, we remove them from here
    households_per_income_class[0] = np.max(
        households_per_income_class[0] - total_RDP, 0)

    # Shorten the grid
    #  We select pixels with constructible shares for all housing types > 0.01
    #  and a positive maximum income net of commuting costs across classes
    #  NB: we will have no output in other pixels (numeric simplification)
    selected_pixels = (
        (np.sum(coeff_land, 0) > 0.01).squeeze()
        & (np.nanmax(income_net_of_commuting_costs, 0) > 0)
        )
    coeff_land_full = copy.deepcopy(coeff_land)
    coeff_land = coeff_land[:, selected_pixels]
    grid_temp = copy.deepcopy(grid)
    grid = grid.iloc[selected_pixels, :]
    housing_limit = housing_limit[selected_pixels]
    income_net_of_commuting_costs = income_net_of_commuting_costs[
        :, selected_pixels]
    minimum_housing_supply = minimum_housing_supply[selected_pixels]
    housing_in = copy.deepcopy(param["housing_in"][selected_pixels])
    amenities = amenities[selected_pixels]
    fraction_capital_destroyed = fraction_capital_destroyed.iloc[
        selected_pixels, :]
    param_pockets = param["informal_pockets"][selected_pixels]
    param_backyards_pockets = param["backyard_pockets"][selected_pixels]

    # Useful variables for the solver
    # (we only consider 3 types of housing in the solver)
    diff_utility = np.zeros((param["max_iter"], param["nb_of_income_classes"]))
    simulated_people_housing_types = np.zeros(
        (param["max_iter"], 3, len(grid.dist)))
    simulated_people = np.zeros((3, 4, len(grid.dist)))
    simulated_jobs = np.zeros(
        (param["max_iter"], 3, param["nb_of_income_classes"]))
    total_simulated_jobs = np.zeros(
        (param["max_iter"], param["nb_of_income_classes"]))
    rent_matrix = np.zeros((param["max_iter"], 3, len(grid.dist)))
    error_max_abs = np.zeros(param["max_iter"])
    error_max = np.zeros(param["max_iter"])
    error_mean = np.zeros(param["max_iter"])
    nb_error = np.zeros(param["max_iter"])
    error = np.zeros((param["max_iter"], param["nb_of_income_classes"]))
    housing_supply = np.empty((3, len(grid.dist)))
    dwelling_size = np.empty((3, len(grid.dist)))
    R_mat = np.empty((3, 4, len(grid.dist)))
    housing_supply[:] = np.nan
    dwelling_size[:] = np.nan
    R_mat[:] = np.nan

    # Initialisation solver
    utility = np.zeros((param["max_iter"], param["nb_of_income_classes"]))
    #  We take arbitrary utility levels, not too far from what we would expect,
    #  to make computation quicker
    utility[0, :] = np.array([1200, 4800, 16000, 77000])
    # utility[0, :] = np.array([400, 900, 5400, 26000])
    index_iteration = 0
    #  We need to apply some convergence factor to our error terms to make them
    #  converge in our optimization: the formula comes from trial and error,
    #  but the intuition is that we put more weight on the error for households
    #  with high relative income as it will magnify the effect on rents, hence
    #  housing supply and resulting population distribution

    param["convergence_factor"] = (
        0.01 * (np.nanmean(average_income) / mean_income) ** 0.01
        )

    # Compute outputs solver (for each housing type, no RDP)
    #  Formal housing
    (simulated_jobs[index_iteration, 0, :], rent_matrix[index_iteration, 0, :],
     simulated_people_housing_types[index_iteration, 0, :],
     simulated_people[0, :, :], housing_supply[0, :], dwelling_size[0, :],
     R_mat[0, :, :]) = eqout.compute_outputs(
         'formal', utility[index_iteration, :], amenities, param,
         income_net_of_commuting_costs, fraction_capital_destroyed, grid,
         income_class_by_housing_type, options, housing_limit,
         agricultural_rent, interest_rate, coeff_land[0, :],
         minimum_housing_supply, construction_param, housing_in, param_pockets,
         param_backyards_pockets
         )
    #  Backyard housing
    (simulated_jobs[index_iteration, 1, :], rent_matrix[index_iteration, 1, :],
     simulated_people_housing_types[index_iteration, 1, :],
     simulated_people[1, :, :], housing_supply[1, :], dwelling_size[1, :],
     R_mat[1, :, :]) = eqout.compute_outputs(
         'backyard', utility[index_iteration, :], amenities, param,
         income_net_of_commuting_costs, fraction_capital_destroyed, grid,
         income_class_by_housing_type, options, housing_limit,
         agricultural_rent, interest_rate, coeff_land[1, :],
         minimum_housing_supply, construction_param, housing_in, param_pockets,
         param_backyards_pockets
         )
    #  Informal housing
    (simulated_jobs[index_iteration, 2, :], rent_matrix[index_iteration, 2, :],
     simulated_people_housing_types[index_iteration, 2, :],
     simulated_people[2, :, :], housing_supply[2, :], dwelling_size[2, :],
     R_mat[2, :, :]) = eqout.compute_outputs(
         'informal', utility[index_iteration, :], amenities, param,
         income_net_of_commuting_costs, fraction_capital_destroyed, grid,
         income_class_by_housing_type, options, housing_limit,
         agricultural_rent, interest_rate, coeff_land[2, :],
         minimum_housing_supply, construction_param, housing_in, param_pockets,
         param_backyards_pockets
         )

    # Compute error and adjust utility

    #  We first update the first iteration of output vector with the sum of
    #  simulated people per income group across housing types (no RDP)
    total_simulated_jobs[index_iteration, :] = np.sum(
        simulated_jobs[index_iteration, :, :], 0)

    #  diff_utility will be used to adjust the utility levels
    #  Note that optimization is made to stick to households_per_income_class,
    #  which does not include people living in RDP (as we take this as
    #  exogenous)

    #  We compare total population for each income group obtained from
    #  equilibrium condition (total_simulated_jobs) with target population
    #  allocation (households_per_income_class)

    #  We arbitrarily set a strictly positive minimum utility level at 10
    #  (as utility will be adjusted multiplicatively, we do not want to break
    #  the model with zero terms)

    diff_utility[index_iteration, :] = np.log(
        (total_simulated_jobs[index_iteration, :] + 10)
        / (households_per_income_class + 10))
    diff_utility[index_iteration, :] = (
        diff_utility[index_iteration, :] * param["convergence_factor"])
    (diff_utility[index_iteration, diff_utility[index_iteration, :] > 0]
     ) = (diff_utility[index_iteration, diff_utility[index_iteration, :] > 0]
          * 1.1)

    # Difference with reality
    error[index_iteration, :] = (total_simulated_jobs[index_iteration, :]
                                 / households_per_income_class - 1) * 100
    #  This is the parameter of interest for optimization
    error_max_abs[index_iteration] = np.nanmax(
        np.abs(total_simulated_jobs[index_iteration, :]
               / households_per_income_class - 1))
    #  Other parameters
    error_max[index_iteration] = -1
    error_mean[index_iteration] = np.nanmean(
        np.abs(total_simulated_jobs[index_iteration, :]
               / (households_per_income_class + 0.001) - 1))
    nb_error[index_iteration] = np.nansum(
        np.abs(total_simulated_jobs[index_iteration, :]
               / households_per_income_class - 1) > param["precision"])

    # Iteration (no RDP)
    # We use a progression bar
    with tqdm(total=param["max_iter"],
              desc="stops when error_max_abs <" + str(param["precision"])
              ) as pbar:
        while ((index_iteration < param["max_iter"] - 1)
               & (error_max_abs[index_iteration] > param["precision"])):

            # Adjust parameters to how close we are from the objective
            index_iteration = index_iteration + 1
            # When population is overestimated, we augment utility to reduce
            # population (cf. population constraint from standard model)
            utility[index_iteration, :] = np.exp(
                np.log(utility[index_iteration - 1, :])
                + diff_utility[index_iteration - 1, :])
            # This is a precaution as utility cannot be negative
            utility[index_iteration, utility[index_iteration, :] < 0] = 10

            # We reduce the convergence factor at each iteration in inverse
            # proportion with the estimation error not to overshoot target in
            # subsequent iterations

            # NB: we assume the minimum error is 100 not to break model with
            # zeros
            convergence_factor = (
                param["convergence_factor"] / (
                    1
                    + 0.5 * np.abs(
                        (total_simulated_jobs[index_iteration, :] + 100)
                        / (households_per_income_class + 100)
                        - 1)
                    )
                )

            # We also reduce it as time passes, as errors should become smaller
            # and smaller
            convergence_factor = (
                convergence_factor
                * (1 - 0.6 * index_iteration / param["max_iter"])
                )

            # Now, we do the same as in the initalization phase

            # Compute outputs solver

            #  Formal housing
            (simulated_jobs[index_iteration, 0, :],
             rent_matrix[index_iteration, 0, :],
             simulated_people_housing_types[index_iteration, 0, :],
             simulated_people[0, :, :],
             housing_supply[0, :],
             dwelling_size[0, :],
             R_mat[0, :, :]) = eqout.compute_outputs(
                 'formal', utility[index_iteration, :], amenities, param,
                 income_net_of_commuting_costs, fraction_capital_destroyed,
                 grid, income_class_by_housing_type, options, housing_limit,
                 agricultural_rent, interest_rate, coeff_land[0, :],
                 minimum_housing_supply, construction_param, housing_in,
                 param_pockets, param_backyards_pockets
                 )

            #  Backyard housing
            (simulated_jobs[index_iteration, 1, :],
             rent_matrix[index_iteration, 1, :],
             simulated_people_housing_types[index_iteration, 1, :],
             simulated_people[1, :, :],
             housing_supply[1, :],
             dwelling_size[1, :],
             R_mat[1, :, :]) = eqout.compute_outputs(
                 'backyard', utility[index_iteration, :], amenities, param,
                 income_net_of_commuting_costs, fraction_capital_destroyed,
                 grid, income_class_by_housing_type, options, housing_limit,
                 agricultural_rent, interest_rate, coeff_land[1, :],
                 minimum_housing_supply, construction_param, housing_in,
                 param_pockets, param_backyards_pockets
                 )

            #  Informal housing
            (simulated_jobs[index_iteration, 2, :],
             rent_matrix[index_iteration, 2, :],
             simulated_people_housing_types[index_iteration, 2, :],
             simulated_people[2, :, :],
             housing_supply[2, :],
             dwelling_size[2, :],
             R_mat[2, :, :]) = eqout.compute_outputs(
                 'informal', utility[index_iteration, :], amenities, param,
                 income_net_of_commuting_costs, fraction_capital_destroyed,
                 grid, income_class_by_housing_type, options, housing_limit,
                 agricultural_rent, interest_rate, coeff_land[2, :],
                 minimum_housing_supply, construction_param, housing_in,
                 param_pockets, param_backyards_pockets
                 )

            # Compute error and adjust utility
            total_simulated_jobs[index_iteration, :] = np.sum(
                simulated_jobs[index_iteration, :, :], 0)

            # diff_utility will be used to adjust the utility levels
            diff_utility[index_iteration, :] = np.log(
                (total_simulated_jobs[index_iteration, :] + 10)
                / (households_per_income_class + 10))
            diff_utility[index_iteration, :] = (
                diff_utility[index_iteration, :] * convergence_factor)
            diff_utility[
                index_iteration, diff_utility[index_iteration, :] > 0] = (
                    diff_utility[index_iteration,
                                 diff_utility[index_iteration, :] > 0] * 1.1
                    )

            # Variables to display
            error[index_iteration, :] = (
                total_simulated_jobs[index_iteration, :]
                / households_per_income_class - 1) * 100
            error_max_abs[index_iteration] = np.max(np.abs(
                total_simulated_jobs[index_iteration,
                                     households_per_income_class != 0]
                / households_per_income_class - 1))
            m = np.argmax(np.abs(total_simulated_jobs[index_iteration, :]
                                 / households_per_income_class - 1))
            erreur_temp = (total_simulated_jobs[index_iteration, :]
                           / households_per_income_class - 1)
            error_max[index_iteration] = erreur_temp[m]

            error_mean[index_iteration] = np.mean(np.abs(
                total_simulated_jobs[index_iteration, :]
                / (households_per_income_class + 0.001) - 1))
            nb_error[index_iteration] = np.sum(np.abs(
                total_simulated_jobs[index_iteration, :]
                / households_per_income_class - 1) > param["precision"])
            pbar.set_postfix({'error_max_abs': error_max_abs[index_iteration]})
            pbar.update()

    # We plug back RDP houses in the output : let us define useful variables
    # first
    #  We correct output coming from data_RDP with more reliable estimations
    #  from SAL data (to include council housing)
    households_RDP = (number_properties_RDP * total_RDP
                      / sum(number_properties_RDP))
    #  Share of housing (no backyard) in RDP surface (with land in km²)
    construction_RDP = np.matlib.repmat(
        param["RDP_size"] / (param["RDP_size"] + param["backyard_size"]),
        1, len(grid_temp.dist))
    #  RDP dwelling size (in m²)
    dwelling_size_RDP = np.matlib.repmat(
        param["RDP_size"], 1, len(grid_temp.dist))

    # We fill the output matrix for each housing type
    simulated_people_with_RDP = np.zeros((4, 4, len(grid_temp.dist)))
    simulated_people_with_RDP[0, :, selected_pixels] = np.transpose(
        simulated_people[0, :, :, ])
    simulated_people_with_RDP[1, :, selected_pixels] = np.transpose(
        simulated_people[1, :, :, ])
    simulated_people_with_RDP[2, :, selected_pixels] = np.transpose(
        simulated_people[2, :, :, ])
    simulated_people_with_RDP[3, 0, :] = households_RDP

    # Outputs of the solver

    initial_state_error = error[index_iteration, :]

    #  Note that this does not contain RDP
    initial_state_simulated_jobs = simulated_jobs[index_iteration, :, :]

    #  We sum across income groups (axis=1)
    initial_state_households_housing_types = np.sum(simulated_people_with_RDP,
                                                    1)
    #  We sum across housing types (axis=0)
    initial_state_household_centers = np.sum(simulated_people_with_RDP, 0)

    #  We keep both dimensions
    initial_state_households = simulated_people_with_RDP

    # Housing stock and dwelling size (fill with RDP)
    housing_supply_export = np.zeros((3, len(grid_temp.dist)))
    dwelling_size_export = np.zeros((3, len(grid_temp.dist)))
    housing_supply_export[:, selected_pixels] = housing_supply
    dwelling_size_export[:, selected_pixels] = copy.deepcopy(dwelling_size)
    dwelling_size_export[dwelling_size_export <= 0] = np.nan
    # NB: we multiply by construction_RDP because we want the housing supply
    # per unit of AVAILABLE land: RDP building is only a fraction of overall
    # surface (also accounts for backyarding)
    housing_supply_RDP = (
        construction_RDP * dwelling_size_RDP * households_RDP
        / (coeff_land_full[3, :] * 0.25)
        )
    housing_supply_RDP[np.isnan(housing_supply_RDP)] = 0
    dwelling_size_RDP = dwelling_size_RDP * (coeff_land_full[3, :] > 0)
    initial_state_dwelling_size = np.vstack(
        [dwelling_size_export, dwelling_size_RDP])
    # Note that RDP housing supply per unit of available land has nothing to do
    # with backyard housing supply per unit of available land
    initial_state_housing_supply = np.vstack(
        [housing_supply_export, housing_supply_RDP]
        )

    # Rents (HHs in RDP pay a rent of 0)
    rent_temp = copy.deepcopy(rent_matrix[index_iteration, :, :])
    rent_export = np.zeros((3, len(grid_temp.dist)))
    rent_export[:, selected_pixels] = copy.deepcopy(rent_temp)
    rent_export[:, selected_pixels == 0] = np.nan
    initial_state_rent = np.vstack(
        [rent_export, np.full(len(grid_temp.dist), np.nan)])
    rent_matrix_export = np.zeros(
        (3, len(average_income), len(grid_temp.dist)))
    rent_matrix_export[:, :, selected_pixels] = copy.deepcopy(R_mat)
    rent_matrix_export[:, :, selected_pixels == 0] = np.nan
    initial_state_rent_matrix = copy.deepcopy(rent_matrix_export)

    # Other outputs
    initial_state_utility = utility[index_iteration, :]
    # Housing capital value per unit of available land: see math appendix
    initial_state_capital_land = ((initial_state_housing_supply / 1000000
                                   / construction_param)
                                  ** (1 / param["coeff_b"]))
    #  NB: this is not an output of the model
    initial_state_average_income = copy.deepcopy(average_income)
    #  NB: this is not used in practice and is included for reference
    initial_state_limit_city = [initial_state_households > 1]

    return (initial_state_utility,
            initial_state_error,
            initial_state_simulated_jobs,
            initial_state_households_housing_types,
            initial_state_household_centers,
            initial_state_households,
            initial_state_dwelling_size,
            initial_state_housing_supply,
            initial_state_rent,
            initial_state_rent_matrix,
            initial_state_capital_land,
            initial_state_average_income,
            initial_state_limit_city)