Covariables selon la méthode utilisée

Modèle de score de propension

Méthode Covariables

Hirano-Imbens MEDIA_COST*_B ; MEDIA_COST*_C ; P_POP_4564 ;

p_const2011_2016 ; CRP3_SCORE_STD


- Moodie-Stephens, avec imputation

p_const2011_2016 ; CRP3_SCORE_STD ; - Moodie-Stephens, trai-

tement avec masse à 0

Annexe B

Code R

#1) Analyse agregee

#a) On moyenne nos donnees sur les 9 semaines en question


#on filtre sur l' annee 2018 , les semaines de 8 a 16

filter ( GEO_NBF_2016 > 0, YEAR == 2018 , WEEKNUM %in% 8:16) %>% group_by( GEO_RTA ) %>% summarise_at( vars ( vars_media ), # vars_media nos variables d' investissement

funs ( mean (.) )) %>% as. data . frame ()

#b) Classification K- moyennes

K_means_vars <-c(" MEDIA_COST_B","P_marie ","P_immigr ","P_mater_fr") MEDIA_A_kmean <- kmeans (

x = Kmeans_data [,K_means_vars ], centers = 5,

nstart = 50)

MEDIA_A_kmean$size # 136


k = which . min ( MEDIA_A_kmean$withinss / MEDIA_A_kmean$size )

# ** Dataset final forme de 136 RTA homogenes

# Definir les groupes de RTA HIGH et LOW en terme de MEDIA_A

ngroup <- floor ( MEDIA_A_kmean$size [k] / 3) DATA_CLUST1 <- DATA_MEAN0 %>%

filter ( CLUSTER == k) %>% arrange ( MEDIA__COST_A) %>% mutate (

Group = c( rep ('LOW ', ngroup ), rep ('MIDDLE ', phone_kmean$size [k] - 2*ngroup ), rep ('HIGH ', ngroup )),

TREATED = ifelse ( Group == 'LOW ', 0,

#c) Application de la methode Hirano - Imbens #i) Modele de score de propension

vars_drf_1 <-c('MEDIA \_COST*\_B',

'MEDIA \_COST*\_C ','P_POP_4564 ',

'p_const2011_2016 ', 'CRP3_SCORE_STD ')

formule_drf_1 <- paste ('MEDIA \_COST*\_B ~', paste ( vars_drf_1, collapse =

' + '))

modele1_gamma <- glm (as. formula ( formule_drf_1) , family = Gamma ( link = " inverse "), data = DATA_CLUST1 )

summary ( modele1_gamma ) # parametre de dispersion 0.0326 # Estimation du GPS

#1. Definition de la grille de longueur 93 pour la variable MEDIA \_COST*\_A

grille1 <- quantile ( DATA_CLUST1$MEDIA \_COST*\_A, probs = seq (0.03 , .95 , by = 0.01) )

#2. Estimation de l' esperance

N <- nrow ( DATA_CLUST1 ) L <- length ( grille1 )

mu_i1 <- 1/( predict . glm ( modele1_gamma ))

#3. Estimation de la variance

phi <- summary ( modele1_gamma )$dispersion var_est_i <- phi * (mu_i ^2)

#4. Calcul de l' estimation du GPS comme densite de probabilite #mu_i = ka*teta_i , var_i = ka*( teta_i ^2)

ka <-1/phi

teta_i <- mu_i1 / ka


GPS<- dgamma ( DATA_CLUST1$MEDIA \_COST*\_A, scale = teta_i, shape = ka )

#ii) Modele de SOUM*

modele2_gamma <- lm( SOUM* ~ MEDIA \_COST*\_$A$+ I( MEDIA \_COST*\_$A$ ^2 )+ GPS +

I( GPS ^2) + MEDIA \_COST*\_$A$ * GPS , data = DATA_CLUST1 ) summary ( modele2_gamma )

alphas <- modele2_gamma$coefficients

# iii ) Estimation de la fonction la dose - reponse et sa derivee #on definit d' abord une matrice GPS

gps1_gamma_matrix = matrix ( nrow =N, ncol =L)

for(j in 1:L) {

gps1_gamma_matrix [,j] <- ( 1 /( gamma (ka)* ( teta_i ^ka))) * exp (- grille1 [j]

/ teta_i ) *( grille1 [j ]^( ka -1) ) }


result_grille <- rep (0, L)

for(t in 1:L){

list_rta <- rep (0, N)

for( rta in 1:N){

list_rta [ rta ] = alphas [1] + alphas [2]*grille1 [t] + alphas [3]*grille1 [t]^2 + alphas [4]*gps1_gamma_matrix [rta ,t] + alphas [5]*gps1_gamma_matrix [rta ,t

]^2 + alphas [6]*grille1 [t]*gps1_gamma_matrix [rta ,t]} result_grille [t] = 1/N * sum ( list_rta )


hi_clust<- result_grille

# Derivee du GPS

gps_deriv<- ( 1 /( gamma (ka)* ( teta_i ^ka)))* ((( -1 / teta_i)*exp (-( DATA_CLUST1

$MEDIA \_COST*\_A) / teta_i )*(( DATA_CLUST1$MEDIA \_COST*\_A)^(ka -1) )) + ((ka -1)*(( DATA_CLUST1$MEDIA \_COST*\_A)^(ka -2) )*exp (-( DATA_CLUST1$MEDIA \_

COST*\_A) / teta_i)))

gps_deriv_matrix = matrix ( nrow =N, ncol =L)

for(j in 1:L) {

gps_deriv_matrix [,j] <-( 1 /( gamma (ka)* ( teta_i ^ka)))* ((( -1 / teta_i)*exp (-( grille1 [j]) / teta_i)*(( grille1 [j]) ^(ka -1) )) +

((ka -1)*(( grille1 [j]) ^(ka -2) )*exp (-( grille1 [j]) / teta_i)))}

# Derivee de l' ADRF

result_hi_derivee <- rep (0, L)

for(t in 1:L){

list_rta <- rep (0, N)

for( rta in 1:N){

list_rta [ rta ] = alphas [2] + 2*alphas [3]*grille1 [t] + alphas [4]*gps_deriv_

matrix [rta ,t] + 2*alphas [5]*gps1_gamma_matrix [rta ,t]*gps_deriv_matrix [rta ,

t] +

alphas [6]*( grille1 [t]*gps_deriv_matrix [rta ,t] + gps1_gamma_matrix [rta ,t])} result_hi_derivee [t] = 1/N * sum ( list_rta )}


#d) Validation de la propriete de balance du GPS

# Repartir MEDIA \_COST*\_A par tertiles en 3 intervalles g

DATA_BALANCED1 <- DATA_CLUST1 %>% arrange ( MEDIA \_COST*\_A) %>%

mutate ( GROUP = ifelse ( MEDIA \_COST*\_$A$ < quantile ( MEDIA \_COST*\_A, .33) , 'g1 '

. ifelse ( MEDIA \_COST*\_$A$ > quantile ( MEDIA \_COST*\_$A$, .66) , ' g3 ', 'g2 '))) %>% as. data . frame ()

#i) Avant ajustement pour le GPS ( unadjusted )

# 5 variables x 3 groupes = 15 t- tests

ttest_unadjusted <- DATA_BALANCED1 %>%

summarise_at( vars ( one_of( vars_drf_1)), list (~t. test (.[ GROUP == "g1"], .[ GROUP != "g1"])$statistic )) %>%

mutate ( GROUP = 'g1_MEDIA \_A') %>% bind_rows (


summarise_at( vars ( one_of( vars_drf_1)), list (~t. test (.[ GROUP == "g2"], .[ GROUP != "g2"])$statistic )) %>%

mutate ( GROUP = 'g2_MEDIA \_A') ) %>%

bind_rows (


summarise_at( vars ( one_of( vars_drf_1)), list (~t. test (.[ GROUP == "g3"], .[ GROUP != "g3"])$statistic )) %>%

mutate ( GROUP = 'g3_MEDIA \_A') ) %>%

gather (-GROUP , key = Xi , value = ttest ) %>% spread ( key = GROUP , value = ttest ) %>% mutate_at( vars (-Xi), list (~round (., 2))) %>% as_tibble ()

ttest_unadjusted #on obtient le tableau des statistiques t d' egalite des moyennes avant ajustement pour le GPS

#ii) Apres ajustement pour le GPS ( adjusted )

# a) on calcule le traitement m d i a n par intervalle g

MEDIA \_A_median <- DATA_BALANCED1 %>% group_by( GROUP ) %>% summarise ( median = median ( MEDIA \_COST*\_A)) %>% .$median

# b) on calcule un vecteur median de GPS par intervalle g


gps_median1 = (1 / ( gamma (ka) * teta_i^ka) ) * exp (- MEDIA \_A_median [1]/

teta_i) * ( MEDIA \_A_median [1]^{ ka -1}) ,

gps_median2 = (1 / ( gamma (ka) * teta_i^ka) ) * exp (- MEDIA \_A_median [2]/

teta_i) * ( MEDIA \_A_median [2]^{ ka -1}) ,

gps_median3 = (1 / ( gamma (ka) * teta_i^ka) ) * exp (- MEDIA \_A_median [3]/

teta_i) * ( MEDIA \_A_median [3]^{ ka -1}) )

# c) on separe ces GPS en 4 blocs

library( forcats ) nblocks <- 4


gps_block_from_m1 = cut ( gps_median1 , nblocks ) %>% lvls_revalue ( paste0 (' block_', 1: nblocks )),

gps_block_from_m2 = cut ( gps_median2 , nblocks ) %>% lvls_revalue ( paste0 (' block_', 1: nblocks )),

gps_block_from_m3 = cut ( gps_median3 , nblocks ) %>% lvls_revalue ( paste0 (' block_', 1: nblocks )))

# d) t- test entre les blocs (un bloc contre les autres )

ttest_adjusted <- DATA_BALANCED1 %>% group_by( gps_block_from_m1) %>%

summarise_at( vars ( one_of( vars_drf_1)), list (~t. test (.[ GROUP == "g1"], .[ GROUP != "g1"])$statistic )) %>%

left_join (count( DATA_BALANCED1 , gps_block_from_m1)) %>%

summarise_at( vars ( one_of( vars_drf_1)), list (~sum (. * n) / sum (n))) %>% mutate ( GROUP = 'g1_MEDIA \_$A$') %>%

bind_rows (


group_by( gps_block_from_m2) %>%

summarise_at( vars ( one_of( vars_drf_1)), list (~t. test (.[ GROUP == "g2"], .[ GROUP != "g2"])$statistic )) %>%

left_join (count( DATA_BALANCED1 , gps_block_from_m2)) %>%

summarise_at( vars ( one_of( vars_drf_1)), list (~sum (. * n) / sum (n))) %>% mutate ( GROUP = 'g2_MEDIA \_$A$')

) %>% bind_rows (


group_by( gps_block_from_m3) %>%

summarise_at( vars ( one_of( vars_drf_1)), list (~t. test (.[ GROUP == "g3"], .[ GROUP != "g3"])$statistic )) %>%

ungroup () %>%

left_join (count( DATA_BALANCED1 , gps_block_from_m3)) %>%

summarise_at( vars ( one_of( vars_drf_1)), list (~sum (. * n) / sum (n))) %>% mutate ( GROUP = 'g3_MEDIA \_$A$')

) %>%

gather (-GROUP , key = Xi , value = ttest ) %>% spread ( key = GROUP , value = ttest ) %>% mutate_at( vars (-Xi), list (~round (., 2)))

ttest_adjusted #on obtient le tableau des statistiques t d' egalite des moyennes apres ajustement pour le GPS .

##--- Bootstrap ---

vars_drf_1 <- c('MEDIA \_COST*\_B ','MEDIA \_COST*\_C', 'P_POP_4564 ', 'p_ const2011_2016 ', 'CRP3_SCORE_STD ')

formule_drf_1 <- paste ('MEDIA \_COST*\_A ~', paste ( vars_drf_1, collapse = ' + '))

iter<- 1000

result_boot_hi <- list () result_boot_hi_deriv<- list () grille_hi<- list ()

for(i in 1: iter ){ print (i)

rta_sample<- sample (rtas , size =120)

DATA_SAMPLE<- DATA_CLUST1 %>% filter ( GEO_RTA %in% rta_sample )

modele1_gps <- glm (as. formula ( formule_drf_1) , family = Gamma ( link = " inverse "), data = DATA_SAMPLE )

# estimation du GPS

grille1_sample <- quantile ( DATA_SAMPLE$MEDIA \_COST*\_$A$, probs = seq (0.03 , .95 , by = 0.01) )

# estimation de l' esperance

L <- length ( grille1_sample ) N<- nrow ( DATA_SAMPLE )

mu_i <- 1/( predict . glm ( modele1_gps , newdata = DATA_SAMPLE ))

# estimation de la variance

phi <- summary ( modele1_gps )$dispersion var_est_i <- phi * (mu_i ^2)

ka <-1/phi

teta_i <- mu_i / ka

gps_sample<-dgamma ( DATA_SAMPLE$MEDIA \_COST*\_A, scale = teta_i, shape = ka )

gps1_gamma_matrix = matrix ( nrow =N, ncol =L)

for(j in 1:L) {

gps1_gamma_matrix [,j] <- ( 1 /( gamma (ka)* ( teta_i ^ka))) * exp (- grille1

_sample [j] / teta_i ) *( grille1_sample [j ]^( ka -1) ) }

gps1_gamma = apply( gps1_gamma_matrix , 2, function(x) mean (x))

# Derivee du GPS

gps_deriv_sample<- ( 1 /( gamma (ka)* ( teta_i ^ka)))* ((( -1 / teta_i)*exp (-( DATA_SAMPLE$MEDIA \_COST*\_A) / teta_i )*(( DATA_SAMPLE$MEDIA \_COST*\_A)^(ka -1) )) + ((ka -1)*(( DATA_SAMPLE$MEDIA \_COST*\_A)^(ka -2) )*exp (-( DATA_SAMPLE$

MEDIA \_COST*\_A) / teta_i)))

gps_deriv_matrix = matrix ( nrow =N, ncol =L)

for(j in 1:L) {

gps_deriv_matrix [,j] <-

( 1 /( gamma (ka)* ( teta_i ^ka)))* ((( -1/teta_i)*exp (-( grille1_sample [j])

/ teta_i)*(( grille1_sample [j]) ^(ka -1) )) + ((ka -1)*(( grille1_sample [j]) ^(ka -2) )*

exp (-( grille1_sample [j]) / teta_i))) }

DATA_SAMPLE<- DATA_SAMPLE %>% mutate ( GPS = gps_sample )

modele2_gamma <- lm( SOUM* ~ MEDIA \_COST*\_A + I( MEDIA \_COST*\_$A$ ^2 )+ GPS + I( GPS ^2) + MEDIA \_COST*\_A * GPS , data = DATA_SAMPLE )


alphas<- coef ( modele2_gamma ) result_sample <- rep (0, L)

for(t in 1:L){

list_rta_gps <- rep (0, N)

for( rta in 1:N){

list_rta_gps [ rta ] = alphas [1] + alphas [2]*grille1_sample [t] +

alphas [3]*grille1_sample [t]^2 + alphas [4]*gps1_gamma_matrix [rta ,t] + alphas [5]*gps1_gamma_matrix [rta ,t]^2 + alphas [6]*grille1_sample [t]*

gps1_gamma_matrix [rta ,t] }

result_sample [t] = 1/N * sum ( list_rta_gps ) }

result_boot_hi [[i]]<- result_sample grille_hi [[i]] <- grille1_sample

# Derivee de l' ADRF

result_sample_deriv<- rep (0, L)

for(t in 1:L){

list_rta_gps <- rep (0, N)

for( rta in 1:N){

list_rta_gps [ rta ] = alphas [2] + 2*alphas [3]*grille1_sample [t] + alphas [4]*gps_deriv_matrix [rta ,t] +

2*alphas [5]*gps1_gamma_matrix [rta ,t]*

gps_deriv_matrix [rta ,t] + alphas [6]*( grille1_sample [t]*

gps_deriv_matrix [rta ,t] + gps1_gamma_matrix [rta ,t]) }

result_sample_deriv [t] = 1/N *sum ( list_rta_gps ) }

result_boot_hi_deriv [[i]] <- result_sample_deriv }

#2) Analyse longitudinale

##On garde les valeurs des 9 semaines , annee 2018

DATA_LONG2018 <- DATA %>% filter ( GEO_NBF_2016 > 0, YEAR == 2018 , WEEKNUM %in% 8:16) %>%

filter ( GEO_NBF_2016 > 0, YEAR == 2018 , WEEKNUM %in% 8:16) %>% mutate ( URBAIN = ifelse ( grepl (0, GEO_RTA ) == T, 0, 1)) %>% mutate_at(

vars ( vars_media ), funs (. / GEO_NBF_2016) )%>%

select (YEAR , WEEKNUM , GEO_RTA , GEO_POP , GEO_NBF_2016 , URBAIN , vars_media ) DATA_LONG2018 <- DATA_LONG2018 %>% #on la merge avec statcan

left_join ( STATCAN , by = c('GEO_RTA ', 'GEO_NBF_2016 ', 'URBAIN '))%>%

na.omit() %>% as. data . frame ()

# ** Cluster de 1224: 136 rta repetee 9 fois

LONG <- filter ( DATA_MEAN0 , CLUSTER == k) %>% select ( GEO_RTA )

DATA_CLUST1_LONG2018<- DATA_LONG2018 %>% filter ( GEO_RTA %in% LONG$GEO_RTA )

# ** Introduire les variables retardees

DATA_CLUST1_LONG2018 = DATA_CLUST1_LONG2018 %>% arrange ( GEO_RTA , WEEKNUM ) %>%

group_by( GEO_RTA ) %>% mutate (

SOUM_lag1 = lag (SOUM , n = 1, default = 0) ,

MEDIA_lag1_A = lag ( MEDIA_COST_A, n = 1, default = 0) ) %>% ungroup ()

DATA_CLUST1_LONG2018<- DATA_CLUST1_LONG2018 %>% mutate ( WEEKNUM_fac = as. factor ( WEEKNUM )) # WEEKNUM en facteur

# 2.1) Approche - SCMM

DATA_CLUST_LONG2018<- DATA_CLUST1_LONG2018 %>% mutate ( MEDIA_COST_A = ifelse ( MEDIA_COST_A == 0, 0.1 , MEDIA_COST_A)) %>%

mutate ( WEEKNUM_fac = as. factor ( WEEKNUM ))

#i) Modele pour le score de propension

vars1_SCMM_fac <- c('MEDIA_COST_B', 'MEDIA_COST_C', 'P_POP_4564 ',

'p_const2011_2016 ','CRP3_SCORE_STD ','WEEKNUM_fac ',

'MEDIA_lag1_A', 'SOUM_lag1 ')

formule_gee_fac <- paste ('MEDIA \_COST \_A ~', paste ( vars1_SCMM_fac , collapse =

' + '))

mod1 <- lm(as. formula ( formule_gee_fac ), data = DATA_CLUST_LONG2018 ) summary ( mod1 )

gps_fac<- mod1$fitted . values

#ii) modele pour SOUM

vars2_SCMM <- c('MEDIA_COST_A ', 'MEDIA_lag1_A','SOUM_lag1 ', 'gps_fac ',' MEDIA_COST_B', 'MEDIA_COST_C', 'P_POP_4564 ', 'p_const2011_2016 ',


formule <- paste ('SOUM ~', paste ( vars2_SCMM , collapse = ' + ')) mod2<- lm(as. formula ( formule ), data = DATA_CLUST_LONG2018 )

#a) Moodie_Stephens , avec imputation #i) Modele pour le score de propension

vars_drf_LONG_mgps <- c('MEDIA_COST_B', 'MEDIA_COST_C', 'P_POP_4564 ',

'p_const2011_2016 ','CRP3_SCORE_STD ','WEEKNUM_fac ',

'MEDIA_lag1_A', 'SOUM_lag1 ')

formule_mgps <- paste ('MEDIA_COST_A ~', paste ( vars_drf_LONG_mgps , collapse = ' + '))

modele_gamma_mgps <- glm (as. formula ( formule_mgps ), family = Gamma ( link = " inverse "), data = DATA_CLUST_LONG2018 )

summary ( modele_gamma_mgps )

# Estimation du mgps

#1. Definition de la grille de longueur 93 pour la variable MEDIA_COST_A

grille1_LONG <- quantile ( DATA_CLUST_LONG2018$MEDIA_COST_A, probs = seq (0.03 , .95 , by = 0.01) )

#2. estimation de l' e s p r a n c e

L <- length ( grille1_LONG ) N<- nrow ( DATA_CLUST_LONG2018 )

mu_i1 <- 1/( predict . glm ( modele_gamma_mgps ))

#3. estimation de la variance

phi1 <- summary ( modele_gamma_mgps )$dispersion var_est_i <- phi1 * (mu_i1 ^2)

#3. Calcul de l' estimation du gps comme d e n s i t de p r o b a b i l i t #mu_i = ka*teta_i , var_i = ka*( teta_i ^2)

ka <-1/phi1

teta_i1 <- mu_i1 / ka

#4. MGPS

# mgps1<-dgamma ( DATA_CLUST_LONG2018$MEDIA_COST_A, scale = teta_i1 , shape = ka )

DATA_CLUST_LONG2018 <- DATA_CLUST_LONG2018 %>% mutate ( mgps1 = dgamma ( DATA_

CLUST_LONG2018$MEDIA_COST_A, scale = teta_i1 , shape = ka )) mgps1_matrix = matrix ( nrow =N, ncol =L)

for(j in 1:L) {

mgps1_matrix [,j] <- ( 1 /( gamma (ka)* ( teta_i1 ^ka))) * exp (- grille1_LONG [j]

/ teta_i1 ) *( grille1_LONG [j ]^( ka -1) ) }

#5. Deriv_MGPS

mgps1_deriv<- ( 1 /( gamma (ka)* ( teta_i1 ^ka)))* ((( -1 / teta_i1)*exp (-( DATA_


A)^(ka -1) )) +(( ka -1)*(( DATA_CLUST_LONG2018$MEDIA_COST_A)^(ka -2) )

*exp (-( DATA_CLUST_LONG2018$MEDIA_COST_A) / teta_i1))) mgps1_deriv_matrix = matrix ( nrow =N, ncol =L)

mgps1_deriv_matrix [,j] <-( 1 /( gamma (ka)* ( teta_i1 ^ka)))* ((( -1 / teta_i1)*

exp (-( grille1_LONG [j]) / teta_i1 )*(( grille1_LONG [j]) ^(ka -1) )) +

((ka -1)*(( grille1_LONG [j]) ^(ka -2) )*exp (-( grille1_LONG [j]) / teta_i1))) }

#ii) Modele pour SOUM

mod_mgps <- lm( SOUM ~ MEDIA_COST_A + I( MEDIA_COST_A ^ 2) + mgps1 + MEDIA_

COST_A*mgps1 , data = DATA_CLUST_LONG2018 ) summary ( mod_mgps )


alphas<- coef ( mod_mgps )

result_grille_mgps<- rep (0, L)

for(t in 1:L){

list_rta_mgps <- rep (0, N)

for( rta in 1:N){

list_rta_mgps [ rta ] = alphas [1] + alphas [2]*grille1_LONG [t] + alphas [3]* I( grille1_LONG [t]^2) + alphas [4]*mgps1_matrix [rta ,t]+ alphas [5]*grille1_LONG [t]*mgps1_matrix [rta ,t]


result_grille_mgps [t] = 1/N * sum ( list_rta_mgps ) }

result_mgps1<- result_grille_mgps

# Derivee de l' ADRF

result_grille_deriv<- rep (0, L)

for(t in 1:L){

list_rta_mgps <- rep (0, N)

for( rta in 1:N){

list_rta_mgps [ rta ] = alphas [2] + 2*alphas [3]*grille1_LONG [t] + alphas [4]*mgps1_deriv_matrix [rta ,t]+

alphas [5]*( grille1_LONG [t]*mgps1_deriv_matrix [rta ,t] + mgps1_matrix [rta ,t

]) }

result_grille_deriv [t] = 1/N * sum ( list_rta_mgps ) }

deriv1 <- result_grille_deriv

##--- Bootstrap ---

formule1_mgps <- paste ('MEDIA_COST_A ~', paste ( vars_drf_LONG_mgps , collapse =

' + '))

rtas<- unique( DATA_CLUST_LONG2018$GEO_RTA )

vars_drf_LONG_mgps <- c( 'MEDIA_COST_B', 'MEDIA_COST_C','P_POP_4564 ', 'p_ const2011_2016 ', 'CRP3_SCORE_STD ','WEEKNUM_fac ', 'SOUM_lag1 ','MEDIA_lag1_A ')

iter<- 1000

result_boot_ms <- list () result_boot_ms_deriv<- list () grille_ms<- list ()

for(i in 1: iter ){ print (i)

rta_sample<- sample (rtas , size =120)

DATA_SAMPLE<- DATA_CLUST_LONG2018 %>% filter ( GEO_RTA %in% rta_sample )

modele_gamma_mgps <- glm (as. formula ( formule1_mgps ), family = Gamma ( link = " inverse "), data = DATA_CLUST_LONG2018 )

# estimation du mgps

grille1_LONG_sample <- quantile ( DATA_SAMPLE$MEDIA_COST_A, probs = seq (0.03 , .95 , by = 0.01) )

# estimation de l' esperance

L <- length ( grille1_LONG_sample ) N<- nrow ( DATA_SAMPLE )

mu_i1<- 1/( predict . glm ( modele_gamma_mgps , newdata = DATA_SAMPLE ))

# estimation de la variance

phi1 <- summary ( modele_gamma_mgps )$dispersion # 0.22

var_est_i <- phi1 * (mu_i1 ^2)

# Calcul de l' estimation du gps comme densite de probabilite

ka <-1/phi1

teta_i1 <- mu_i1 / ka

mgps1_SAMPLE<-dgamma ( DATA_SAMPLE$MEDIA_COST_A, scale = teta_i1 , shape = ka ) DATA_SAMPLE<- DATA_SAMPLE %>% mutate ( mgps1 = mgps1_SAMPLE )

grille_ms [[i]] <- grille1_LONG_sample L <- length ( grille1_LONG_sample ) N<- nrow ( DATA_SAMPLE )

mgps1_matrix = matrix ( nrow =N, ncol =L)

for(j in 1:L) {

mgps1_matrix [,j] <- ( 1 /( gamma (ka)* ( teta_i1 ^ka))) * exp (- grille1_LONG_

sample [j] / teta_i1 ) *( grille1_LONG_sample [j ]^( ka -1) ) }

# Derivee du MGPS

mgps1_deriv<- ( 1 /( gamma (ka)* ( teta_i1 ^ka)))* ((( -1 / teta_i1)*exp (-( DATA_

SAMPLE$MEDIA_COST_A) / teta_i1 )*(( DATA_SAMPLE$MEDIA_COST_A)^(ka -1) )) + (( ka -1)*(( DATA_SAMPLE$MEDIA_COST_A)^(ka -2) )*exp (-( DATA_SAMPLE$MEDIA_COST_A)

/ teta_i1)))

mgps1_deriv_matrix = matrix ( nrow =N, ncol =L)

mgps1_deriv_matrix [,j] <-( 1 /( gamma (ka)* ( teta_i1 ^ka)))* ((( -1 / teta_i1 )*exp (-( grille1_LONG_sample [j]) / teta_i1 )*(( grille1_LONG_sample [j]) ^(ka -1) )) + ((ka -1)*(( grille1_LONG_sample [j]) ^(ka -2) )*exp (-( grille1_LONG_

sample [j]) / teta_i1))) }

mod1_mgps <- lm( SOUM ~ MEDIA_COST_A + I( MEDIA_COST_A ^ 2) + mgps1 + MEDIA_

COST_A*mgps1 , data = DATA_SAMPLE )


alphas<- coef ( mod1_mgps ) result_sample1<- rep (0, L)

for(t in 1:L){

list_rta_mgps <- rep (0, N)

for( rta in 1:N){

list_rta_mgps [ rta ] = alphas [1] + alphas [2]*grille1_LONG_sample [t] + alphas [3]*( grille1_LONG_sample [t]^2) + alphas [4]* mgps1_matrix [rta ,t]+ alphas [5]* grille1_LONG_sample [t] * mgps1_matrix [rta ,t]


result_sample1 [t] = 1/N * sum ( list_rta_mgps ) }

result_boot_ms [[i]]<- result_sample1

# Derivee de l' ADRF

result_sample_deriv_ms<- rep (0, L)

for(t in 1:L){

list_rta_mgps <- rep (0, N)

for( rta in 1:N){

list_rta_mgps [ rta ] = alphas [2] + 2*alphas [3]*grille1_LONG_sample [t] + alphas [4]*mgps1_deriv_matrix [rta ,t]+

alphas [5]*( grille1_LONG_sample [t]*mgps1_deriv_matrix [rta ,t] + mgps1_

matrix [rta ,t]) }

result_sample_deriv_ms[t] = 1/N * sum ( list_rta_mgps ) }

result_boot_ms_deriv [[i]] <- result_sample_deriv_ms }

# b) Moodie - Stephens , traitement avec masse a 0 # i) Calcul du MGPS pour le traitement non nul

DATA_CLUST1_LONG2018_0 <- DATA_CLUST1_LONG2018 %>% filter ( MEDIA_COST_A != 0) %>% mutate ( WEEKNUM_fac = as. factor ( WEEKNUM ))

formule1_mgps <- paste (' MEDIA_COST_A ~', paste ( vars_drf_LONG_mgps , collapse =

' + '))

modele1_mgps <- glm (as. formula ( formule1_mgps ), family = Gamma ( link = " inverse "

), data = DATA_CLUST1_LONG2018_0) summary ( modele1_mgps )

# estimation du mgps

grille1_LONG_0 <- quantile ( DATA_CLUST1_LONG2018_0$ MEDIA_COST_A, probs = seq (0.03 , .95 , by = 0.01) )

L <- length ( grille1_LONG_0) N<- nrow ( DATA_CLUST1_LONG2018_0)

mu_i_0<- 1/( predict . glm ( modele1_mgps , newdata = DATA_CLUST1_LONG2018 )) phi0 <- summary ( modele1_mgps )$dispersion

var_est_i <- phi0 * (mu_i_0^2) ka <-1/phi0

teta_i0 <- mu_i_0 / ka

mgps_0<-dgamma ( DATA_CLUST1_LONG2018$MEDIA_COST_A, scale = teta_i0 , shape = ka )

#ii) Modele de melange # regression logistique

# creation d' une nouvelle var ( newvar ) qui prend 1 quand MEDIA_COST_A = 0

DATA_CLUST1_LONG2018<- DATA_CLUST1_LONG2018 %>% mutate ( newvar = ifelse ( MEDIA

_COST_A == 0, 1, 0))

DATA_CLUST1_LONG2018<- DATA_CLUST1_LONG2018 %>% mutate ( WEEKNUM_fac = DATA_


formule1_mgps <- paste ('newvar ~', paste ( vars_drf_LONG_mgps , collapse = ' + ') )

# on veut predire la probabilite que newvar =1, MEDIA_COST_A = 0

model_mgps <- glm (

data = DATA_CLUST1_LONG2018 , formula = formule1_mgps ,

family = binomial ( link = " logit ") )

summary ( model_mgps )


DATA_CLUST1_LONG2018 <- DATA_CLUST1_LONG2018 %>% mutate (

pi_hat = round ( predict ( model_mgps , type = " response "), 10) , #pi_hat represente la probabilite que MEDIA_COST_A est nulle

DATA_CLUST1_LONG2018 <- DATA_CLUST1_LONG2018 %>% mutate ( mgps_0= mgps_0, r_hat = ifelse ( newvar ==1 , WEIGHT , WEIGHT*mgps_0))


grille1_mass0 <- quantile ( DATA_CLUST1_LONG2018$ MEDIA_COST_A, probs = seq (0.03 , .95 , by = 0.01) )

L <- length ( grille1_mass0 ) N<- nrow ( DATA_CLUST1_LONG2018 )

mgps0_matrix = matrix ( nrow =N, ncol =L)

mgps0_matrix [ ,1] = DATA_CLUST1_LONG2018$pi_hat

for(j in 2:L) {

mgps0_matrix [,j] <- (1 - DATA_CLUST1_LONG2018$pi_hat ) * ( 1 /( gamma (ka)* ( teta_i0 ^ka))) * exp (- grille1_mass0 [j] / teta_i0 ) *( grille1_mass0 [j ]^( ka -1) )


# Derivee de r_hat

mgps0_deriv_matrix = matrix ( nrow =N, ncol =L) mgps0_deriv_matrix [ ,1] = rep (0)

for(j in 2:L) {

mgps0_deriv_matrix [,j] <- (1 - DATA_CLUST1_LONG2018$pi_hat ) * ( 1 /( gamma ( ka)* ( teta_i0 ^ka)))* ((( -1 / teta_i0)*exp (-( grille1_mass0 [j]) / teta_i0 )

*(( grille1_mass0 [j]) ^(ka -1) )) + ((ka -1)*(( grille1_mass0 [ j]) ^(ka -2) )*exp (-( grille1_mass0 [j]) / teta_i0)))


# iii ) Modele de SOUM

modele2_mgps <- lm( SOUM ~ MEDIA_COST_A +I( MEDIA_COST_A ^2 )+ r_hat + MEDIA_

COST_A*r_hat , data = DATA_CLUST1_LONG2018 ) summary ( modele2_mgps )


alphas<- coef ( modele2_mgps ) result_mass0<- rep (0, L)

for(t in 1:L){

list_rta_mgps <- rep (0, N)

for( rta in 1:N){

list_rta_mgps [ rta ] = alphas [1] + alphas [2]*grille1_mass0 [t] + alphas [3]*( grille1_mass0 [t]^2) + alphas [4]* mgps0_matrix [rta ,t]+ alphas [5]* grille1_mass0 [t] * mgps0_matrix [rta ,t]


result_mass0 [t] = 1/N * sum ( list_rta_mgps ) }

result_mgps0<- result_mass0

result_grille_deriv<- rep (0, L)

for(t in 1:L){

list_rta_mgps <- rep (0, N)

for( rta in 1:N){

list_rta_mgps [ rta ] = alphas [2] + 2*alphas [3]*grille1_mass0 [t] + alphas [4]*mgps0_deriv_matrix [rta ,t]+

alphas [5]*( grille1_mass0 [t]*mgps0_deriv_matrix [rta ,t] + mgps0_matrix [rta ,t])


result_grille_deriv [t] = 1/N * sum ( list_rta_mgps ) }

deriv0 <- result_grille_deriv

##--- Bootstrap ---

vars_drf_LONG_mgps <- c(' MEDIA_COST_B',

' MEDIA_COST_C', 'P_POP_4564 ', 'p_const2011_2016 ',

'CRP3_SCORE_STD ', 'WEEKNUM_fac ', 'SOUM_lag1 ',


formule1_mgps <- paste ('MEDIA_COST_A ~', paste ( vars_drf_LONG_mgps , collapse =

' + '))

rtas<- unique( DATA_CLUST1_LONG2018$GEO_RTA )

formule1_log <- paste ('newvar ~', paste ( vars_drf_LONG_mgps , collapse = ' + ')) iter<- 1000

result_boot <- list () result_boot_deriv<- list () grille<- list ()

for(i in 1: iter ){ print (i)

rta_sample<- sample (rtas , size =120)

DATA_SAMPLE<- DATA_CLUST1_LONG2018 %>% filter ( GEO_RTA %in% rta_sample )

DATA_SAMPLE_0 <- DATA_SAMPLE %>% filter ( MEDIA_COST_A != 0) %>% mutate ( WEEKNUM

_fac = as. factor ( WEEKNUM ))


modele1_mgps <- glm (as. formula ( formule1_mgps ), family = Gamma ( link = " inverse "), data = DATA_SAMPLE_0)

# estimation du mgps

grille1_LONG_sample0 <- quantile ( DATA_SAMPLE_0$MEDIA_COST_A, probs = seq (0.03 , .95 , by = 0.01) )

# estimation de l' esperance

L <- length ( grille1_LONG_sample0 ) N<- nrow ( DATA_SAMPLE_0)

mu_i_0<- 1/( predict . glm ( modele1_mgps , newdata = DATA_SAMPLE ))

phi0 <- summary ( modele1_mgps )$dispersion # 0.22

var_est_i <- phi0 * (mu_i_0^2)

# Calcul de l' estimation du gps comme densite de probabilite

ka <-1/phi0

teta_i0 <- mu_i_0 / ka

mgps_sample0<-dgamma ( DATA_SAMPLE$MEDIA_COST_A, scale = teta_i0 , shape = ka )

# creation d' une nouvelle var qui prend 1 quand MEDIA_COST_A=0

DATA_SAMPLE<- DATA_SAMPLE %>% mutate ( newvar = ifelse ( MEDIA_COST_A == 0, 1, 0))


#on veut predire la probabilite que newvar =1, MEDIA_COST_A=0

model_mgps <- glm (

data = DATA_SAMPLE , formula = formule1_log ,

family = binomial ( link = " logit ") )


pi_hat = round ( predict ( model_mgps , type = " response "), 10) , #pi_hat r e p r s e n t e la p r o b a b i l i t que phonecost est nulle

WEIGHT = ifelse ( newvar == 1, pi_hat , (1 - pi_hat )))

# Calcul de r_hat

DATA_SAMPLE <- DATA_SAMPLE %>% mutate ( mgps_0= mgps_0, r_hat = ifelse ( newvar ==1 , WEIGHT , WEIGHT*mgps_0))

grille1_sample0 <- quantile ( DATA_SAMPLE$MEDIA_COST_A, probs = seq (0.03 , .95 , by = 0.01) )

grille [[i]]<- grille1_sample0 L <- length ( grille1_sample0 ) N<- nrow ( DATA_SAMPLE )

mgps0_matrix = matrix ( nrow =N, ncol =L) mgps0_matrix [ ,1] = DATA_SAMPLE$pi_hat

for(j in 2:L) {

mgps0_matrix [,j] <- (1 - DATA_SAMPLE$pi_hat ) * ( 1 /( gamma (ka)* ( teta_i0 ^ ka))) * exp (- grille1_sample0 [j] / teta_i0 ) *( grille1_sample0 [j ]^( ka -1) ) }

# Derivee de r_hat

mgps0_deriv_matrix = matrix ( nrow =N, ncol =L) mgps0_deriv_matrix [ ,1] = rep (0)

mgps0_deriv_matrix [,j] <- (1 - DATA_SAMPLE$pi_hat ) * ( 1 /( gamma (ka)* ( teta_i0 ^ka)))* ((( -1 / teta_i0)*exp (-( grille1_sample0 [j]) / teta_i0 )*(( grille1_sample0 [j]) ^(ka -1) )) + ((ka -1)*(( grille1_sample0 [j]) ^(ka -2) )*exp (-( grille1_sample0 [j]) / teta_i0)))


modele2_mgps <- lm( SOUM ~ MEDIA_COST_A +I( MEDIA_COST_A ^2 )+ r_hat + MEDIA_

COST_A*r_hat , data = DATA_SAMPLE ) alphas<- coef ( fit2_mgps )

result_sample0<- rep (0, L)

for(t in 1:L){

list_rta_mgps <- rep (0, N)

for( rta in 1:N){

list_rta_mgps [ rta ] = alphas [1] + alphas [2]*grille1_sample0 [t] + alphas [3]*( grille1_sample0 [t]^2) + alphas [4]* mgps0_matrix [rta ,t]+ alphas [5]* grille1_sample0 [t] * mgps0_matrix [rta ,t]


result_sample0 [t] = 1/N * sum ( list_rta_mgps ) }

result_boot [[i]]<- result_sample0

# Derivee de l' ADRF

result_sample_deriv<- rep (0, L)

for(t in 1:L){

list_rta_mgps <- rep (0, N)

for( rta in 1:N){

list_rta_mgps [ rta ] = alphas [2] + 2*alphas [3]*grille1_sample0 [t] + alphas [4]*mgps0_deriv_matrix [rta ,t]+

alphas [5]*( grille1_sample0 [t]*mgps0_deriv_matrix [rta ,t] + mgps0_matrix [rta ,t])


result_sample_deriv [t] = 1/N * sum ( list_rta_mgps ) }

result_boot_deriv [[i]] <- result_sample_deriv }

## Courbes dose - reponse de la figure 4.1 #1) Methode Hirano - Imbens

method1 <- bind_cols ( result_boot_hi) %>% mutate ( SOUM = apply(. ,1 , mean ),

ic1 = apply(., 1, function(x) quantile (x, probs = 0.05) ), ic2 = apply(., 1, function(x) quantile (x, probs = 0.95) ))%>% mutate ( MEDIA_COST_A= grille_hi [[1]])

#2) Methode Moodie - Stephens , avec imputation

method2 <- bind_cols ( result_boot_ms) %>% mutate ( SOUM = apply(. ,1 , mean ),

ic1 = apply(., 1, function(x) quantile (x, probs = 0.05) ), ic2 = apply(., 1, function(x) quantile (x, probs = 0.95) ))%>% mutate ( MEDIA_COST_A= grille_ms [[1]])

#3) Methode Moodie - Stephens , traitement avec masse a 0

method3<- bind_cols ( result_boot ) %>% mutate ( SOUM = apply(. ,1 , mean ),

ic1 = apply(., 1, function(x) quantile (x, probs = 0.05) ), ic2 = apply(., 1, function(x) quantile (x, probs = 0.95) ))%>% mutate ( MEDIA_COST_A= grille [[1]])

ggplot ( data = method1 , aes (x = MEDIA_COST_A)) +

geom_ribbon ( aes ( ymin = ic1 , ymax = ic2 , fill = 'Hirano - Imbens '), alpha = 0.7) + geom_point ( aes (y= SOUM , color = 'Hirano - Imbens ')) +

geom_ribbon ( data = method2 , aes ( ymin = ic1 , ymax = ic2 , fill = 'Moodies -

Stephens , avec imputation '), alpha = 0.7) +

geom_point ( data = method2 , aes (y= SOUM , color = 'Moodies - Stephens , avec imputation '))+

geom_ribbon ( data = method3 , aes ( ymin = ic1 , ymax = ic2 , fill = 'Moodie -

Stephens , traitement avec masse a 0'), alpha = 0.7) +

geom_point ( data = method3 , aes (y= SOUM , color = 'Moodie - Stephens , traitement

avec masse a 0'))

## Courbes de derivees de la figure 4.2 #1) Methode Hirano - Imbens

method1 <- bind_cols ( result_boot_hi_deriv ) %>% mutate ( SOUM = apply(. ,1 , mean ),

ic1 = apply(., 1, function(x) quantile (x, probs = 0.05) ), ic2 = apply(., 1, function(x) quantile (x, probs = 0.95) ))%>% mutate ( MEDIA_COST_A= grille_hi [[1]])

#2) Methode Moodie - Stephens , avec imputation

method2 <- bind_cols ( result_boot_ms_deriv ) %>% mutate ( SOUM = apply(. ,1 , mean ),

ic1 = apply(., 1, function(x) quantile (x, probs = 0.05) ), ic2 = apply(., 1, function(x) quantile (x, probs = 0.95) ))%>% mutate ( MEDIA_COST_A= grille_ms [[1]])

#3) Methode Moodie - Stephens , traitement avec masse a 0

method3 <- bind_cols ( result_boot_deriv ) %>% mutate ( SOUM = apply(. ,1 , mean ),

ic1 = apply(., 1, function(x) quantile (x, probs = 0.05) ), ic2 = apply(., 1, function(x) quantile (x, probs = 0.95) ))%>% mutate ( MEDIA_COST_A= grille [[1]])

ggplot ( data = method1 , aes (x = MEDIA_COST_A)) +

geom_ribbon ( aes ( ymin = ic1 , ymax = ic2 , fill = 'Hirano - Imbens '), alpha = 0.7) + geom_point ( aes (y= SOUM , color = 'Hirano - Imbens ')) +

geom_ribbon ( data = method2 , aes ( ymin = ic1 , ymax = ic2 , fill = 'Moodies -

Stephens , avec imputation '), alpha = 0.7) +

geom_point ( data = method2 , aes (y= SOUM , color = 'Moodies - Stephens , avec imputation '))+

geom_ribbon ( data = method3 , aes ( ymin = ic1 , ymax = ic2 , fill = 'Moodie -

Stephens , traitement avec masse a 0'), alpha = 0.7) +

geom_point ( data = method3 , aes (y= SOUM , color = 'Moodie - Stephens , traitement

avec masse a 0'))+

# ajout de la droite associee a la methode des SCMMs

geom_line ( data = data . frame (x= grille1_LONG , y= 0.015) , aes (x, y, color = ' SCMM '))


