Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
e50eff8
data gerneration graphODE without dampings
AgathaSchmidt Jul 4, 2024
21e6df5
GNN data generator with dampings - graphODE
AgathaSchmidt Jul 9, 2024
95a9d15
add damping information to output
AgathaSchmidt Jul 9, 2024
300b956
adjust to code guidelines
AgathaSchmidt Aug 7, 2024
75ce80d
delete file
AgathaSchmidt Aug 7, 2024
10b6e92
adjust to code guidelines
AgathaSchmidt Aug 7, 2024
9115678
create a make_graph function
AgathaSchmidt Aug 12, 2024
c9b26cf
create make graph function
AgathaSchmidt Aug 12, 2024
d97a1f7
fix population number at 400
AgathaSchmidt Aug 14, 2024
e20fe2d
fix number of population to 400 and rename damping information
AgathaSchmidt Aug 14, 2024
d9110e5
add "return data" for case without saving
AgathaSchmidt Aug 15, 2024
8823aa1
tests for simulation run and datageneration for models with and witho…
AgathaSchmidt Aug 15, 2024
74da6d8
Apply suggestions from code review
AgathaSchmidt Aug 26, 2024
47f99eb
apply suggestions from code review
AgathaSchmidt Aug 29, 2024
58fc8e8
a python file with functions frequently used for our GNNs
AgathaSchmidt Aug 29, 2024
ffa076e
add get population and get minimum matrix to utils
AgathaSchmidt Sep 2, 2024
0cbda2b
remove get population function from this file and import it from utils
AgathaSchmidt Sep 2, 2024
e8fb580
remove functons and import them from utlis
AgathaSchmidt Sep 2, 2024
de35ca6
add comments as proposed by review
AgathaSchmidt Sep 2, 2024
3d5a644
pre commit
AgathaSchmidt Sep 16, 2024
2dfbe4f
mock transform mobility function
AgathaSchmidt Sep 18, 2024
86a2e89
adjust utils: add surrogate utils and add scling to GNN utis
AgathaSchmidt Sep 19, 2024
77af0ab
add test for saving mechanism
AgathaSchmidt Sep 19, 2024
ce4a494
import functions from new utils file
AgathaSchmidt Sep 19, 2024
63980d9
adjust import
AgathaSchmidt Sep 19, 2024
cabf5a1
adjust imports
AgathaSchmidt Sep 19, 2024
06fb8dd
put function which read files outside if the run_simulation function
AgathaSchmidt Sep 24, 2024
f2345c7
add directory as parameter
AgathaSchmidt Sep 24, 2024
d26528e
set edges only one time
AgathaSchmidt Sep 25, 2024
86c76d5
Merge branch 'main' into 1057-GNN-datageneration
HenrZu Oct 2, 2024
e39fd71
new structure for no damp
HenrZu Oct 2, 2024
ff6c571
timing graph sim (Delete before Merge!)
HenrZu Oct 2, 2024
ac705a4
with_dampings
HenrZu Oct 2, 2024
10d5a98
[ci skip] damping correctly setted and reseted after each run
HenrZu Oct 2, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,328 @@
import copy
import os
import pickle
import random
import json
from datetime import date
import numpy as np

from progress.bar import Bar
from sklearn.preprocessing import FunctionTransformer

from memilio.simulation import (AgeGroup, LogLevel, set_log_level)
from memilio.simulation.osecir import (Index_InfectionState, interpolate_simulation_result, ParameterStudy,
InfectionState, Model, ModelGraph,
interpolate_simulation_result, set_edges)
from memilio.epidata import geoModificationGermany as geoger
from memilio.epidata import transformMobilityData as tmd
from memilio.epidata import getDataIntoPandasDataFrame as gd


def run_secir_groups_simulation(days, populations):
"""! Uses an ODE SECIR model allowing for asymptomatic infection with 6
different age groups. The model is not stratified by region.
Virus-specific parameters are fixed and initial number of persons
in the particular infection states are chosen randomly from defined ranges.
@param Days Describes how many days we simulate within a single run.
@param damping_day The day when damping is applied.
@param populations List containing the population in each age group.
@return List containing the populations in each compartment used to initialize
the run.
"""

set_log_level(LogLevel.Off)

start_day = 1
start_month = 1
start_year = 2019
dt = 0.1

# get county ids
countykey_list = geoger.get_county_ids(merge_eisenach=True, zfill=True)

# Define age Groups
groups = ['0-4', '5-14', '15-34', '35-59', '60-79', '80+']
num_groups = len(groups)
num_regions = len(populations)
models = []

# Initialize Parameters
for region in range(num_regions):
model = Model(num_groups)

# Set parameters
for i in range(num_groups):
# Compartment transition duration
model.parameters.TimeExposed[AgeGroup(i)] = 3.2
model.parameters.TimeInfectedNoSymptoms[AgeGroup(i)] = 2.
model.parameters.TimeInfectedSymptoms[AgeGroup(i)] = 6.
model.parameters.TimeInfectedSevere[AgeGroup(i)] = 12.
model.parameters.TimeInfectedCritical[AgeGroup(i)] = 8.

# Initial number of people in each compartment with random numbers
model.populations[AgeGroup(i), Index_InfectionState(
InfectionState.Exposed)] = random.uniform(
0.00025, 0.005) * populations[region][i]
model.populations[AgeGroup(i), Index_InfectionState(
InfectionState.InfectedNoSymptoms)] = random.uniform(
0.0001, 0.0035) * populations[region][i]
model.populations[AgeGroup(i), Index_InfectionState(
InfectionState.InfectedNoSymptomsConfirmed)] = 0
model.populations[AgeGroup(i), Index_InfectionState(
InfectionState.InfectedSymptoms)] = random.uniform(
0.00007, 0.001) * populations[region][i]
model.populations[AgeGroup(i), Index_InfectionState(
InfectionState.InfectedSymptomsConfirmed)] = 0
model.populations[AgeGroup(i), Index_InfectionState(
InfectionState.InfectedSevere)] = random.uniform(
0.00003, 0.0006) * populations[region][i]
model.populations[AgeGroup(i), Index_InfectionState(
InfectionState.InfectedCritical)] = random.uniform(
0.00001, 0.0002) * populations[region][i]
model.populations[AgeGroup(i), Index_InfectionState(
InfectionState.Recovered)] = random.uniform(
0.002, 0.08) * populations[region][i]
model.populations[AgeGroup(i),
Index_InfectionState(InfectionState.Dead)] = random.uniform(
0, 0.0003) * populations[region][i]
model.populations.set_difference_from_group_total_AgeGroup(
(AgeGroup(i), Index_InfectionState(InfectionState.Susceptible)),
populations[region][i])

# Compartment transition propabilities
model.parameters.RelativeTransmissionNoSymptoms[AgeGroup(i)] = 0.5
model.parameters.TransmissionProbabilityOnContact[AgeGroup(
i)] = 0.1
model.parameters.RecoveredPerInfectedNoSymptoms[AgeGroup(i)] = 0.09
model.parameters.RiskOfInfectionFromSymptomatic[AgeGroup(i)] = 0.25
model.parameters.SeverePerInfectedSymptoms[AgeGroup(i)] = 0.2
model.parameters.CriticalPerSevere[AgeGroup(i)] = 0.25
model.parameters.DeathsPerCritical[AgeGroup(i)] = 0.3
# twice the value of RiskOfInfectionFromSymptomatic
model.parameters.MaxRiskOfInfectionFromSymptomatic[AgeGroup(
i)] = 0.5

# StartDay is the n-th day of the year
model.parameters.StartDay = (
date(start_year, start_month, start_day) - date(start_year, 1, 1)).days

# Load baseline and minimum contact matrix and assign them to the model
baseline = getBaselineMatrix()
#minimum = getMinimumMatrix()

model.parameters.ContactPatterns.cont_freq_mat[0].baseline = baseline
model.parameters.ContactPatterns.cont_freq_mat[0].minimum = np.ones(
(num_groups, num_groups)) * 0

# Apply mathematical constraints to parameters
model.apply_constraints()
models.append(model)

graph = ModelGraph()
for i in range(num_regions):
graph.add_node(int(countykey_list[i]), models[i])

# get mobility data directory
arg_dict = gd.cli("commuter_official")

directory = arg_dict['out_folder'].split('/pydata')[0]
directory = os.path.join(directory, 'mobility/')

# Merge Eisenach and Wartbugkreis in Input Data
tmd.updateMobility2022(directory, mobility_file='twitter_scaled_1252')
tmd.updateMobility2022(directory, mobility_file='commuter_migration_scaled')

num_locations = 4

set_edges(os.path.abspath(os.path.join(directory, os.pardir)),
graph, num_locations)

study = ParameterStudy(graph, 0, days, dt=dt, num_runs=1)
study.run()

graph_run = study.run()[0]
results = interpolate_simulation_result(graph_run)

for result_indx in range(len(results)):
results[result_indx] = remove_confirmed_compartments(
np.transpose(results[result_indx].as_ndarray()[1:, :]), num_groups)

# Omit first column, as the time points are not of interest here.
dataset_entry = copy.deepcopy(results)

return dataset_entry

def remove_confirmed_compartments(dataset_entries, num_groups):
"""! The compartments which contain confirmed cases are not needed and are
therefore omitted by summarizing the confirmed compartment with the
original compartment.
@param dataset_entries Array that contains the compartmental data with
confirmed compartments.
@param num_groups Number of age groups.
@return Array that contains the compartmental data without confirmed compartments.
"""

new_dataset_entries = []
for i in dataset_entries :
dataset_entries_reshaped = i.reshape(
[num_groups, int(np.asarray(dataset_entries).shape[1]/num_groups)]
)
sum_inf_no_symp = np.sum(dataset_entries_reshaped [:, [2, 3]], axis=1)
sum_inf_symp = np.sum(dataset_entries_reshaped [:, [4, 5]], axis=1)
dataset_entries_reshaped[:, 2] = sum_inf_no_symp
dataset_entries_reshaped[:, 4] = sum_inf_symp
new_dataset_entries.append(
np.delete(dataset_entries_reshaped , [3, 5], axis=1).flatten()
)
return new_dataset_entries


def get_population(path="data/pydata/Germany/county_population.json"):
"""! loads population data
@param path Path to population file.
@return List with all 400 populations and 6 age groups.
"""

with open(path) as f:
data = json.load(f)
population = []
for data_entry in data:
population_county = []
population_county.append(
data_entry['<3 years'] + data_entry['3-5 years'] / 2)
population_county.append(data_entry['6-14 years'])
population_county.append(
data_entry['15-17 years'] + data_entry['18-24 years'] +
data_entry['25-29 years'] + data_entry['30-39 years'] / 2)
population_county.append(
data_entry['30-39 years'] / 2 + data_entry['40-49 years'] +
data_entry['50-64 years'] * 2 / 3)
population_county.append(
data_entry['65-74 years'] + data_entry['>74 years'] * 0.2 +
data_entry['50-64 years'] * 1 / 3)
population_county.append(
data_entry['>74 years'] * 0.8)

population.append(population_county)
return population

def getBaselineMatrix():
"""! loads the baselinematrix
"""

baseline_contact_matrix0 = os.path.join(
"./data/contacts/baseline_home.txt")
baseline_contact_matrix1 = os.path.join(
"./data/contacts/baseline_school_pf_eig.txt")
baseline_contact_matrix2 = os.path.join(
"./data/contacts/baseline_work.txt")
baseline_contact_matrix3 = os.path.join(
"./data/contacts/baseline_other.txt")

baseline = np.loadtxt(baseline_contact_matrix0) \
+ np.loadtxt(baseline_contact_matrix1) + \
np.loadtxt(baseline_contact_matrix2) + \
np.loadtxt(baseline_contact_matrix3)

return baseline

def generate_data(
num_runs, path, input_width, days, save_data=True):
"""! Generate dataset by calling run_secir_simulation (num_runs)-often
@param num_runs Number of times, the function run_secir_simulation is called.
@param path Path, where the datasets are stored.
@param input_width number of time steps used for model input.
@param label_width number of time steps (days) used as model output/label.
@param save_data Option to deactivate the save of the dataset. Per default true.
"""

population = get_population()
days_sum = days + input_width - 1

data = {"inputs": [],
"labels": [],
}

# show progess in terminal for longer runs
# Due to the random structure, theres currently no need to shuffle the data
bar = Bar('Number of Runs done', max=num_runs)

for _ in range(num_runs):

data_run = run_secir_groups_simulation(
days_sum, population)

inputs = np.asarray(data_run).transpose(1, 2, 0)[: input_width]
data["inputs"].append(inputs)

data["labels"].append(np.asarray(data_run).transpose(1, 2, 0)[input_width:])

bar.next()

bar.finish()

if save_data:
num_groups = int(np.asarray(data['inputs']).shape[2] / 8)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check your format on save settings again. The files are indented one position too far and generally a lot of formatting is displayed. :)

transformer = FunctionTransformer(np.log1p, validate=True)

# Scale inputs
inputs = np.asarray(
data['inputs']).transpose(2, 0, 1, 3).reshape(num_groups * 8, -1)
scaled_inputs = transformer.transform(inputs)
original_shape_input = np.asarray(data['inputs']).shape

# Step 1: Reverse the reshape
reshaped_back = scaled_inputs.reshape(original_shape_input[2],
original_shape_input[0],
original_shape_input[1],
original_shape_input[3])

# Step 2: Reverse the transpose
original_inputs = reshaped_back.transpose(1, 2, 0, 3)
scaled_inputs = original_inputs.transpose(0, 3, 1, 2)


# Scale labels
labels = np.asarray(
data['labels']).transpose(2, 0, 1, 3).reshape(num_groups * 8, -1)
scaled_labels = transformer.transform(labels)
original_shape_labels = np.asarray(data['labels']).shape

# Step 1: Reverse the reshape
reshaped_back = scaled_labels.reshape(original_shape_labels[2],
original_shape_labels[0],
original_shape_labels[1],
original_shape_labels[3])

# Step 2: Reverse the transpose
original_labels = reshaped_back.transpose(1, 2, 0, 3)
scaled_labels = original_labels.transpose(0, 3, 1, 2)

all_data = {"inputs": scaled_inputs,
"labels": scaled_labels,
}

# check if data directory exists. If necessary create it.
if not os.path.isdir(path):
os.mkdir(path)

# save dict to json file
with open(os.path.join(path, 'data_secir_age_groups.pickle'), 'wb') as f:
pickle.dump(all_data, f)


if __name__ == "__main__":

path = os.path.dirname(os.path.realpath(__file__))
path_data = os.path.join(
os.path.dirname(
os.path.realpath(os.path.dirname(os.path.realpath(path)))),
'data_GNN_nodamp')

input_width = 5
days = 30
num_runs = 1000
number_of_populations = 400
generate_data(num_runs, path_data, input_width,
days, number_of_populations)

Loading