Big bang

jbnsn · jbnsn · commit 1e455c79b153 · 2022-12-12T08:32:12.000+01:00
diff --git a/README.md b/README.md
@@ -0,0 +1,23 @@
+# Disaggregating input-output models with incomplete information
+
+Implementation of [Lindner et al. (2012)](https://www.tandfonline.com/doi/suppl/10.1080/09535314.2012.689954) in Python with NumPy and Pandas.
+
+The comments in `main.py` originate from [`cesr_a_689954_sup_27358897.docx`](https://www.tandfonline.com/doi/suppl/10.1080/09535314.2012.689954).
+
+The scripts contain one aspect of randomness: `base(p,:) = rand(1,Nv)` in the original Matlab script and `base[p, :] = np.random.rand(1, Nv)` in the Python script.
+
+For verification purposes, e.g. `np.random.seed(1337)` (Python) and `rand('twister', 1337)` (Matlab) can be inserted to render the random numbers predictable.
+
+## Lindner (2012)
+Lindner, Sören, Julien Legault, and Dabo Guan. 2012.
+‘Disaggregating Input–Output Models with Incomplete Information’.
+Economic Systems Research 24 (4): 329–47.
+https://doi.org/10.1080/09535314.2012.689954.
+
+## Verification (`D`)
+
+### Matlab
+![main.m](./verification/main.m.png)
+
+### Python
+![main.m](./verification/main.py.png)
diff --git a/input/io-table-cn-2007-flows-idx.csv b/input/io-table-cn-2007-flows-idx.csv
@@ -0,0 +1,16 @@
+﻿Category,Cat
+Agriculture,Ag
+Coal minin and processing,CmP
+Petroleaum processing and natural gas products,Pp
+Food manufacturing and tobacco products,Fm
+Petroleaum processing and coking,Ppc
+Chemicals,Ch
+Metal smelting and pressing,Msp
+Machinery and equipment,M+e
+Gas production and distribution,Gp+d
+Construction,Co
+Transport and warehousing,T+w
+Electricity production and distribution,Ep+d
+Intermediate demand,ID
+Final demand,FD
+Total output,TO
diff --git a/input/io-table-cn-2007-flows.csv b/input/io-table-cn-2007-flows.csv
@@ -0,0 +1,12 @@
+﻿687.7,7,0.8,2223.1,0,167.6,0.7,66.4,0,25.9,255,0,3434.2,1420.5,4854.7
+2.7,97,5.7,37.1,112,193.5,122.7,22.7,7.1,5.7,25.5,330.2,961.9,41.4,1003.3
+0.6,1.3,114.8,11,1189.4,442.2,933.4,29.3,55.7,83.5,17.5,36.8,2915.5,62.3,2977.8
+482.2,15.7,25,3813.9,15.8,326.7,98.6,370.1,3.3,171.3,1368.1,27.5,6718.2,4675.6,11393.8
+39.4,13.6,89.2,46.2,121.4,463,298.4,83.7,3.4,126.7,771.3,127.5,2183.8,145.5,2329.3
+379.8,27.1,122.8,885.2,48,3176.6,250.9,1098.6,7.4,1579,758.9,15.5,8349.8,1189.9,9539.7
+14.6,69.3,86.6,136.6,10.3,228.8,2972.3,2684.5,4.7,1208.8,109.4,17.3,7543.2,1085.9,8629.1
+58.6,98,197.2,307.8,50.1,339.4,683.5,6359,8.4,531.9,1331.4,295,10260.3,8754.1,19014.4
+1.1,1.7,9.2,17.6,4.9,29.8,17.8,17.7,9.5,3,40.1,9.3,161.7,64.9,226.6
+1.1,1.3,1.4,2.6,1.2,2.7,2.1,3.5,0.2,59.8,123.1,1,200,6018.7,6218.7
+309.7,129.5,189,917.1,130.9,787.8,570.3,1366.1,27.1,942.5,3873.2,278.2,9521.4,10119.7,19641.1
+45.8,60.2,174.7,171,48.3,436.4,367.9,214.1,25,82.7,276.1,1129.4,3031.6,241.8,3273.4
diff --git a/input/io-table-cn-2007-w-idx.csv b/input/io-table-cn-2007-w-idx.csv
@@ -0,0 +1,4 @@
+﻿Category,Cat
+Hydro-electricity and others,Hy
+Subcritical coal,SubC
+Other fossil fuels,OFF
diff --git a/input/io-table-cn-2007-w.csv b/input/io-table-cn-2007-w.csv
@@ -0,0 +1,3 @@
+﻿0.241
+0.648
+0.111
diff --git a/main.py b/main.py
@@ -0,0 +1,230 @@
+"""Implementation of Lindner (2012) in Python with NumPy and Pandas.
+
+Lindner, Sören, Julien Legault, and Dabo Guan. 2012.
+‘Disaggregating Input–Output Models with Incomplete Information’.
+Economic Systems Research 24 (4): 329–47.
+https://doi.org/10.1080/09535314.2012.689954.
+
+The comments in this script contain the Matlab code given in the supplementary
+material 'cesr_a_689954_sup_27358897.docx' of Lindner (2012).
+
+Source (accessed 06.12.2022):
+https://www.tandfonline.com/doi/suppl/10.1080/09535314.2012.689954
+
+The script contains the generation of random numbers. A random vector is
+generated in line 90 of the Matlab script:
+
+    `base(p,:) = rand(1,Nv)`
+
+For verification purposes, `np.random.seed(1337)` (Python) and
+`rand('twister', 1337)` (Matlab) was applied.
+
+"""
+
+import numpy as np
+import pandas as pd
+
+from tqdm import tqdm
+
+if False:  # !!!
+
+    # Switch flag for verification
+    # Matlab equivalent: `rand('twister', 1337)`
+    # Source: https://stackoverflow.com/a/20202330/5696601
+
+    np.random.seed(1337)
+
+# %% Loading IO data
+
+flows = pd.read_csv(
+    # Input–output table of China (2007), in billion RMB
+    './input/io-table-cn-2007-flows.csv',
+    header=None
+    )
+
+flows_idx = pd.read_csv(
+    './input/io-table-cn-2007-flows-idx.csv'
+    )
+
+flows.columns = pd.MultiIndex.from_frame(flows_idx)
+flows.index = pd.MultiIndex.from_frame(flows_idx.iloc[:12, :])
+
+# Vector of final demand
+f = flows.loc[:, ('Final demand', 'FD')]
+
+# Vector of intermediate demand
+id = flows.loc[:, ('Intermediate demand', 'ID')]
+
+# Vector of total outputs
+x = f + id
+
+# Exchange matrix
+Z = flows.loc[
+    # Rows
+    :,
+    # Cols
+    (~flows.columns.get_level_values('Cat')
+     .isin(['ID', 'FD', 'TO']))
+    ]
+
+del flows_idx
+
+temp = Z.shape  # Size of IO table
+
+N = temp[0] - 1  # Number of common sectors
+
+A = np.divide(Z, x)  # ggregated technical coefficient matrix
+
+x_common = x[:-1]  # Vector of total outputs for common sectors
+
+f_common = f[:-1]  # Vector of final demand for common sectors
+
+# Note: The last sector of the table is disaggregated,
+# i.e. the electricity sector
+
+x_elec = x[-1]  # Total output of the disaggregated sector
+
+f_elec = f[-1]  # Final demand of the disaggregated sector
+
+# %% Newly formed sectors from the electricity sector
+
+# New sector weights
+w = pd.read_csv(
+    './input/io-table-cn-2007-w.csv',
+    header=None
+    )
+
+w = w.values.flatten()
+
+w_idx = pd.read_csv(
+    './input/io-table-cn-2007-w-idx.csv'
+    )
+
+n = len(w)  # Number of new sectors
+
+# Total number of sectors for the disaggregated IO table
+N_tot = N + n
+
+# Vector of new total sector outputs
+x_new = w*x_elec/1000
+
+# Vector of disaggregated economy sector total outputs
+xs = np.concatenate((x_common, x_new))
+
+f_new = w*f_elec  # # Final demand of new sectors
+
+# %% Building the constraint matrix C
+
+Nv = n * N_tot + n  # Number of variables
+
+Nc = N + n + 1  # Number of constraints
+
+# Vector of constraint constants
+q = pd.concat(
+    [A.iloc[N, :],
+     pd.Series(w, index=pd.MultiIndex.from_frame(w_idx))]
+    )
+
+# Matrix of constraints
+C = np.zeros((Nc, Nv))
+
+# %%% Common sectors constraints
+
+C11 = np.zeros((N, N*n))
+
+for ii in range(N):
+    col_indices = range(n*(ii), n*ii+n)
+    C11[ii, col_indices] = np.ones((1, n))
+
+C[:N, :N*n] = C11
+
+# %%% New sectors constraints
+
+C22 = np.zeros((1, n**2))
+
+for ii in range(0, n):
+    col_indices = range(n*(ii), n*ii+n)
+    C22[0, col_indices] = w[ii]*np.ones((1, n))
+
+C[N, N*n:N*n+n**2] = C22
+
+# %%% Final demand constraints
+
+C31 = np.zeros((n, N*n))
+
+for ii in range(N):
+    col_indices = range(n*(ii-1)+3, n*ii+3)
+    C31[:n, col_indices] = (x_common[ii]/x_elec)*np.eye(n)
+
+C32 = np.zeros((n, n**2))
+
+for ii in range(0, n):
+    col_indices = range(n*(ii-1)+3, n*ii+3)
+    C32[:n, col_indices] = w[ii]*np.eye(n)
+
+C[N+1:, :N*n] = C31
+C[N+1:, N*n:N*n+n**2] = C32
+C[N+1:, N*n+n**2:] = np.eye(n)
+
+# %% Building the initial estimate y0
+
+# Technical coefficient matrix of the initial estimate
+As_y0 = np.zeros((N_tot, N_tot))
+
+# Common/Common part
+As_y0[:N, :N] = A.iloc[:N, :N]
+
+# Common/New part
+As_y0[:N, N:N_tot] = np.repeat(A.iloc[:N, N].to_numpy(), n).reshape(N, n)
+
+# New/Common part
+As_y0[N:N_tot, :N] = (
+    np.multiply(w, A.iloc[N, :N].to_numpy().repeat(n).reshape(N, n)).T
+    )
+
+# New/New part
+As_y0[N:N_tot, N:N_tot] = np.multiply(
+    A.iloc[N, N],
+    np.repeat(w, n).reshape(n, n)
+    )
+
+# %% Generating the orthogonal distinguishing matrix
+
+# %%% Making the constraint matrix orthogonal
+
+C_orth = C.copy()
+
+for c in tqdm(range(Nc), desc='Orthogonalize constraint matrix'):
+    for i in range(c):
+
+        # Orthogonal projection
+        C_orth[c, :] = (
+            C_orth[c, :]
+            - np.dot(C_orth[c, :], C_orth[i, :])
+            / np.linalg.norm(C_orth[i, :])**2 * C_orth[i, :]
+            )
+
+# %%% Gram-Schmidt algorithm
+
+base = np.zeros((Nv, Nv))  # Orthogonal base containing C_orth and D
+base[:Nc, :] = C_orth.copy()
+
+for p in tqdm(range(Nc, Nv), desc='Gram-Schmidt algorithm'):
+
+    # Generate random vector
+    base[p, :] = np.random.rand(1, Nv)
+
+    for i in range(p):
+
+        # Orthogonal projection on previous vectors
+        base[p, :] -= (
+            np.dot(base[p, :], base[i, :])
+            / np.linalg.norm(base[i, :])**2
+            * base[i, :]
+            )
+
+    # Normalizing
+    base[p, :] /= np.linalg.norm(base[p, :])
+
+# Retrieving the distinguishing matrix from the orthogonal base
+D = base[Nc:, :].T
diff --git a/verification/main.m.png b/verification/main.m.png
diff --git a/verification/main.py.png b/verification/main.py.png