lines 6-353 of file: example/python/numeric/covid_19_xam.py # {xrst_begin numeric_covid_19_xam.py} # {xrst_spell # bradbell # csv # cv # gaussian # lcr # pennsylvania # rel # sim # sqrt # stime # } # {xrst_comment_ch #} # # # Example Fitting an SEIRWD Model for Covid-19 # ############################################ # # Covariates # ********** # In this example there are two covariates that # affect the infectious rate :math:`\beta`: # social mobility :math:`c_0 (t)`, # Covid-19 testing :math:`c_1 (t)`, and # scaled time :math:`c_2 (t)`. # The covariates are known functions of time. # The mobility covariate has been shifted and scaled # so it is in the interval [-1, 0]. # The testing covariate has been shifted and scaled # so it is in the interval [0, 1]. # Note that the maximum mobility and the minimum testing corresponds to the # normal (baseline) condition. # The scaled time covariate is shifted and scaled version of time so that # it is in the interval [0, 1] with the first data point corresponding to # time zero. It is assumed here that the baseline condition corresponds # to time zero. # # Model ODE # ********* # We use the :ref:`seirwd` model and notation. # # beta(t) # ======= # Our model for the infectious rate is # # .. math:: # # \beta(t) = \bar{\beta} \exp[ m_0 c_0 (t) + m_1 c_1 (t) + m_2 c_2 (t) ] # # where :math:`\bar{\beta}` is the baseline value for the infectious rate, # :math:`m_0` is the social covariate multiplier, and # :math:`m_1` is the Covid-19 testing covariate multiplier. # The baseline value :math:`\bar{\beta}` is the infectious rate corresponding # to all the covariates being zero. # The covariate multipliers, and the baseline infectious rate, are unknown. # # Other Rates # =========== # The other rates # :math:`\alpha(t)`, # :math:`\sigma(t)`, # :math:`\gamma(t)`, # :math:`\xi(t)`, # :math:`\chi(t)`, # :math:`\delta(t)`, # constant functions with known values: # {xrst_code py} alpha_known = 0.95 sigma_known = 0.2 gamma_known = 0.1 chi_known = 0.1 xi_known = 0.00 delta_known = 0.2 # {xrst_code} # All of theses rates must be non-negative. # # Initial Values # ============== # The initial size of the Recovered group :math:`R(0)` # and of the Death group :math:`D(0)` is zero. # We use fraction of the total population for sizes, so the sum of the # other initial values is one. # We treat the initial # Infected group :math:`I(0)`, and # Will die group :math:`W(0)`, # as unknown parameters in the model. # We would like to also solve for the initial exposed population but # that model has identifiability problems, so # we use the following approximation for the initial exposed group # # .. math:: # # E(0) = I(0) \gamma / \sigma # # The initial Susceptible group :math:`S(0)` is # expressed as a function of the other initial conditions: # # .. math:: # # S(0) = 1 - E(0) - I(0) - W(0) # # Ode Solver # ========== # There are two choices for *ode_method* , # the method used to solve the ODE: # :ref:`runge4` and # :ref:`rosen3`. # In addition, we can choose *ode_n_step* , # the number of step to take for each time interval in *t_all* , # before it is sub-sampled using the # :ref:`sample_interval`. # {xrst_code py} ode_method = 'runge4' ode_n_step = 4 # {xrst_code} # # Unknown Parameters # ****************** # The unknown parameter vector in this model is # # .. math:: # # x = [ m_0, m_1, m_2, I(0), W(0), \bar{\beta} ] # # {xrst_code py} x_name = [ 'm_mobility', 'm_testing', 'm_stime', 'I(0)', 'W(0)', 'beta_bar' ] # {xrst_code} # # Maximum Likelihood # ================== # We use a Gaussian likelihood for each of the differences in the # cumulative deaths. The unknown parameters are estimated by maximizing the # product of these likelihoods; i.e., the differences are modeled as being # independent. The covariance of the estimates is approximated # by the inverse of the observed information matrix. # AD is used to compute first and second derivatives of the likelihood # w.r.t. the unknown parameters :math:`x`. # These derivatives are used during optimization as well as for # computing the observed information matrix. # # Model Bounds # ============ # The infection rate :math:`\beta(t)` must be non-negative; i.e., # # .. math:: # # 0 \leq \bar{\beta} \exp[ m_0 c_0 (t) + m_1 c_1 (t) + m_2 c_2 (t) ] # # is true for all :math:`t`. # In addition, the size of the groups can not be negative. # It is sufficient to enforce this constraint on the initial conditions; i.e., # # .. math:: # # \begin{array}{lcr} # 0 & \leq & \bar{\beta } \\ # 0 & \leq & I(0) \\ # 0 & \leq & W(0) # \end{array} # # Actual Bounds # ============= # The following actual upper and lower bounds for the unknown parameters # are used as an as an aid to the optimizer: # {xrst_literal # # BEGIN_ACTUAL_BOUNDS # # END_ACTUAL_BOUNDS # } # where *x_sim* is the # :ref:`simulation` value # for the unknown parameters and *actual_bound_factor* is chosen below. # The problem has not really been solved if bounds, # other than the model bounds above, are active at the solution of the # optimization problem. # {xrst_code py} actual_bound_factor = 5.0 # {xrst_code} # # Data # **** # The data in this model is the cumulative number of deaths, # as a fraction of the total population and as a function of time. # We assume that new deaths are recorded for time intervals # and the cumulative deaths is the sum of these recordings. # For this reason, we model the difference of the cumulative deaths # between time points as independent. # # sample_interval # =============== # It is possible to sub-sample the data in order to reduce noise. # The cumulative death data is just sub-sampled since the reduces noise by # the summing the differences corresponding to a longer time period. # The covariate data is averaged over the sample interval. # The *sample_interval* must be either one or a positive even integer # (even so an original data point corresponds to the center of the interval). # {xrst_code py} sample_interval = 1 # {xrst_code} # # data_file # ========= # If the data file name is the empty string, the cumulative death data, # and corresponding covariates, are simulated by the program. # Otherwise, the data file must be a CSV file with the following columns: # *day* , *death* , *mobility* , *testing* . # In this case the data file is used for the # cumulative death and corresponding covariates. # {xrst_code py} data_file = '/home/bradbell/Downloads/561.csv' # Pennsylvania data_file = '/home/bradbell/trash/covid_19/seirwd.csv' # New York data_file = '' # empty string # {xrst_code} # # Coefficient of Variation # ======================== # This is the coefficient of variation for the differences # in the cumulative death data as a fraction, not a percent. # If this value is zero, a CV of zero is used for data simulation # and a CV of one in the definition of the likelihood. # This enables checking that the unknown parameters can be accurately # identified using perfect data. # For real data (when *data_file* is not empty) # this value should be adjusted so that the average residual has variance one. # {xrst_code py} death_data_cv = 0.25 # {xrst_code} # Note this is the noise level in the original data before it is # sub-sampled using # :ref:`sample_interval`. # # Simulation # ========== # If *data_file* is the empty string, the data is simulated using # the following values for the # :ref:`unknown_parameters`: # {xrst_code py} m_mobility_sim = 1.0 # m_0 m_testing_sim = - 1.0 # m_1 m_stime_sim = - 1.0 # m_2 I0_sim = 2e-5 # I(0) W0_sim = 2e-5 # W(0) beta_bar_sim = 2.0 # baseline value for beta # {xrst_code} # # Weighted Residuals # ================== # If *death_data_cv* is zero, :math:`\lambda = 1`, otherwise # :math:`\lambda` is equal to # # | |tab| *death_data_cv* ``* sqrt`` ( *sample_interval* ) # #. # (Note that the standard deviation of a sum of independent values is the # square root of the sum of the variance of each of the values.) # Let :math:`y_i` be the i-th value for the cumulative death data. # The weighted residuals (some times referred to as just the residuals) are # # .. math:: # # r_i = \frac{ ( y_{i+1} - y_i ) - [ D( t_{i+1} ) - D( t_i ) ] }{ # \lambda ( y_{i+1} - y_i ) } # # where :math:`D(t)` is the model for the cumulative data # given the fit results. # The time corresponding to :math:`r_i` is :math:`( t_{i+1} + t_i ) / 2`. # We put the data difference in the denominator, # instead of the model difference, # because it is constant with respect to the unknown parameters. # # Random Seed # *********** # This is the random seed used to simulate noise in the data. # If this value is zero, the system clock is used to choose the random seed. # {xrst_code py} random_seed = 20821659074 # {xrst_code} # # Random Start # ************ # The optimizer needs a good starting point in order to succeed. # This is the number of random points, between the lower and upper limits, # that are checked. The point with the best objective value is chosen # as the starting point for the optimization. # {xrst_code py} n_random_start = 4000 # {xrst_code} # # Display Fit Results # ******************* # If you set this variable to True, # a printout and a plot of the fit results is generated. # {xrst_code py} display_fit = False # {xrst_code} # # Plot # ==== # There are three plots all with time on the x-axis. # The first contains the size for all the compartments, except S, # as a fraction of the total population. # The second contains the model and data for the death difference values. # The third contains the weighted residuals corresponding to the death # difference data. # # Printout # ======== # #. The following statistics for the weighted data residuals is printed: # the maximum, minimum, average, and average of square. # #. A table with the following columns is printed: # # .. csv-table:: # :widths: 9, 43 # # *x_name* , name of the unknown parameter # *x_fit* , fit result for the unknown parameter # *x_lower* , lower bound used for the fit # *x_upper* , upper bound used for the fit # *std_error* , asymptotic standard error for the parameter # # #. If *data_file* is empty, # a table is printed with the following columns is also printed: # # .. csv-table:: # :widths: 9, 53 # # *x_name* , name of the unknown parameter # *x_sim* , known parameter value used during simulation # *x_fit* , fit result for the unknown parameter # *rel_error* , relative error for fit versus simulation # *residual* , *std_error* weighted residual for fit versus simulation # # Debug Output # ************ # If this flag is true a lot of debugging output is printed. # {xrst_code py} debug_output = False # {xrst_code} # # Source Code # *********** # {xrst_literal # # BEGIN_PYTHON # # END_PYTHON # } # # {xrst_end numeric_covid_19_xam.py}