ui.R server.R and global.R
library(shiny) library(shinymaterial) library(shinydashboard) library(plotly) library(DT) # flow: input -> box group -> tab -> full ui # global var -------------------------------------------------------------- methodList <- list("Predictive mean matching" = "pmm", "Weighted predictive mean matching" = "midastouch", "Random sample from observed values" = "sample", "Classification and regression trees" = "cart", "Random forest imputations" = "rf", "Unconditional mean imputation" = "mean", "Bayesian linear regression" = "norm", "Linear regression ignoring model error" = "norm.nob", "Linear regression using bootstrap" = "norm.boot", "predicted values" = "norm.predict", "Imputation of quadratic terms" = "quadratic", "Random indicator for nonignorable data" = "ri", "Level-1 normal heteroskedastic" = "2l.norm", "Level-1 normal homoscedastic, lmer" = "2l.lmer", "Level-1 normal homoscedastic, pan" = "2l.pan", "Level-2 class mean" = "2lonly.mean", "Level-2 class normal" = "2lonly.norm", "Level-2 class predictive mean matching" = "2lonly.pmm") # inputs lab -------------------------------------------------------------- inMissType <- material_radio_button( input_id = "missType", label = "Missingness Mechanism:", choices = c("MCAR", "MAR", "MNAR1", "MNAR2") ) inMissPercent <- material_slider( input_id = "missPercent", "% missing data", min_value = 1, max_value = 99, initial_value = 20 ) inImputeM <- material_dropdown( input_id = "imputeMethod", label = "Imputation Mechanism:", choices = as.character(methodList) ) inImputeMulti <- material_number_box( input_id = "imputeN", label = "# multi-imuptation", min_value = 2, max_value = 30, initial_value = 10 ) runImputation <- material_button( input_id = "runImp", label = "Run Imputation" ) inShowGroup <- material_switch( input_id = "showGroup", #label = " group by age", initial_value = TRUE ) inPlotItems <- material_dropdown( input_id = "plotItems", label = "Plot Contents:", multiple = TRUE, choices = c("") ) # inputs em --------------------------------------------------------------- inMu1 <- material_row( material_column(width = 4, material_number_box("mean1x", "Mean x", initial_value = 3, min_value = -30, max_value = 30)), material_column(width = 4, material_number_box("mean1y", "y", initial_value = 3, min_value = -30, max_value = 30)), material_column(width = 4, material_number_box("mean1z", "z", initial_value = 3, min_value = -30, max_value = 30)) ) inSigma1 <- material_row( material_row( material_column(width = 4, numericInput("sigma1.11", "Covariance", value = 1, step = 0.5)) ), material_row( material_column(width = 4, numericInput("sigma1.21", "", value = 0, step = 0.5)), material_column(width = 4, numericInput("sigma1.22", "", value = 1, step = 0.5)) ), material_row( material_column(width = 4, numericInput("sigma1.31", "", value = 0, step = 0.5)), material_column(width = 4, numericInput("sigma1.32", "", value = 0, step = 0.5)), material_column(width = 4, numericInput("sigma1.33", "", value = 1, step = 0.5)) ) ) inMu2 <- material_row( material_column(width = 4, material_number_box("mean2x", "Mean x", initial_value = -1, min_value = -30, max_value = 30)), material_column(width = 4, material_number_box("mean2y", "y", initial_value = -1, min_value = -30, max_value = 30)), material_column(width = 4, material_number_box("mean2z", "z", initial_value = -1, min_value = -30, max_value = 30)) ) inSigma2 <- material_row( material_row( material_column(width = 4, numericInput("sigma2.11", "Covariance", value = 1, step = 0.5)) ), material_row( material_column(width = 4, numericInput("sigma2.21", "", value = 0, step = 0.5)), material_column(width = 4, numericInput("sigma2.22", "", value = 1, step = 0.5)) ), material_row( material_column(width = 4, numericInput("sigma2.31", "", value = 0, step = 0.5)), material_column(width = 4, numericInput("sigma2.32", "", value = 0, step = 0.5)), material_column(width = 4, numericInput("sigma2.33", "", value = 1, step = 0.5)) ) ) inMix <- tagList( material_slider( input_id = "lambda", label = "Lambda - mix probability (100x)", initial_value = 60, min_value = 1, max_value = 99 ), material_slider( input_id = "sizeN", label = "Data length (n)", initial_value = 100, min_value = 50, max_value = 500 ) ) inEMextra <- material_row( tags$br(), tags$br(), material_column(width = 6, material_number_box( input_id = "maxiter", label = "Max Iterations", initial_value = 500, min_value = 100, max_value = 2000 )), material_column(width = 6, material_number_box( input_id = "initMag", label = "InitValue Error", initial_value = 3, min_value = 1, max_value = 10 )) ) runEMbtn <- material_row( material_column(width = 6, material_button("genSim", label= "Generate Data")), material_column(width = 6, material_button("runEM", label = "Run EM")) ) # modules in uni ---------------------------------------------------------- boxConfig <- material_row( title = "Configurations: Missingness & Imputation", # missingness inputs material_column(width = 4, material_card( inMissType, inMissPercent, p(tags$i("Data will be generated automatically")) )), # imputation inputs material_column(width = 4, material_card( inImputeM, inImputeMulti, runImputation )), # graph options material_column(width = 4, material_card( inPlotItems, p(tags$i("only available after imputation | V1,...,V9,... stand for each imputation result from the multi-imputation")), inShowGroup )) ) boxValues <- material_row( valueBoxOutput(width = 2, "valueBoxA"), valueBoxOutput(width = 2, "valueBoxB"), valueBoxOutput(width = 4, "valueBoxC"), valueBoxOutput(width = 4, "valueBoxD") ) boxImpRes <- material_row( material_column(width = 5, material_card( title = "Result", dataTableOutput("missTable"), plotOutput("missPlot") )), material_column(width = 7, material_card( title = "Visualization", plotlyOutput("missMain") )) ) # modules in EM ----------------------------------------------------------- boxEMconfig <- material_row( title = "Simulation Configuration", # distribution 1 material_column(width = 4, material_card(title = "Distribution 1", inMu1, inSigma1)), # distribution 2 material_column(width = 4, material_card(title = "Distribution 2", inMu2, inSigma2)), # options and run material_column(width = 4, material_card(title = "Mixing & EM option", inMix, inEMextra), runEMbtn) ) boxEMRes <- material_row( material_column(width = 4, material_card(title = "Result", htmlOutput("emInfo"), verbatimTextOutput("emPrint"))), material_column(width = 8, material_card(title = "3D Visualization", plotlyOutput("plot3d", height = "500px"))) ) # modules in source ------------------------------------------------------- boxSource0 <- material_card( title = "Shiny 3-file structure", p("ui.R server.R and global.R") ) boxSource1 <- material_card( title = "ui.R" , tags$pre(includeText("ui.R")) ) boxSource2 <- material_card( title = "server.R", tags$pre(includeText("server.R")) ) boxSource3 <- material_card( title = "global.R", tags$pre(includeText("global.R")) ) # MAKE the UI ------------------------------------------------------------- material_page( title = "My Missing Data Lab", nav_bar_color = "cyan lighten-2", material_tabs( tabs = c("EM Algorithm" = "em", "Univariate Lab" = "lab", "Source" = "source") ), material_tab_content( tab_id = "em", boxEMconfig, boxEMRes ), material_tab_content( tab_id = "lab", tags$h1("Coming Soon") # tags$p("I am currently working on a dynamic link to ESS (European Social Survey), and will be able to publish result here") # boxConfig, boxValues, boxImpRes ), material_tab_content( tab_id = "source", tags$div( class = "container", boxSource0, boxSource1, boxSource2, boxSource3 ) ) )
library(shinydashboard) library(shinyWidgets) library(plotly) library(purrr) library(matrixcalc) library(DT) library(dplyr) # global var -------------------------------------------------------------- myColors<- setNames(c('#66c2a5','#fc8d62','#8da0cb','#e78ac3','#a6d854','#ffd92f'), c('Distribution 1', 'Distribution 2', 'Theoretic mu 1', 'Theoretic mu 2', 'Trace of mu 1', 'Trace of mu 2')) orgDF <- readRDS("sampleData.rds"); genDF <- NULL; mainDF <- NULL; sim <- NULL; init <- NULL; p3d <- NULL shinyServer(function(input, output, session) { # generate missing data --------------------------------------------------- observeEvent({input$missType; input$missPercent},{ genList <- createData(orgDF, input$missType, input$missPercent / 100) genDF <<- genList$generatedData output$valueBoxA <- renderValueBox({ valueBox( nrow(genDF), "total observations", icon = icon("bars"), color = "red") }) output$valueBoxB <- renderValueBox({ valueBox( sum(!is.na(genDF$Total)), "complete cases", icon = icon("info-circle"), color = "yellow") }) output$valueBoxC <- renderValueBox({ valueBox( paste(round(mean(genDF$Total, na.rm = T), digits = 2), round(mean(orgDF$Total), digits = 2), sep = " / "), "mean: CC vs. Truth", icon = icon("dashboard"), color = "aqua") }) output$valueBoxD <- renderValueBox({ valueBox( paste(round(sd(genDF$Total, na.rm = T), digits = 2), round(sd(orgDF$Total), digits = 2), sep = " / "), "sd: CC vs. Truth", icon = icon("calculator"), color = "blue") }) }) # imputation -------------------------------------------------------------- observeEvent(input$runImputation,{ impList <- imputointi(genDF, input$imputeMethod, input$imputeN) output$missPlot <- renderPlot(impList$impPlot) resultDT <- datatable(round(tableIMP(impList, orgDF), digits = 3), options = list(dom = "t", rownames = FALSE)) output$missTable <- renderDataTable(resultDT) # set a global var for plot impDF <- impList$impDATA missId <- which(is.na(genDF$Total)) plotDF <- as.data.frame(matrix(nrow = nrow(orgDF), ncol = ncol(impDF))) plotDF[missId,1:ncol(impDF)] <- impDF plotDF$age <- genDF$Age plotDF$days <- genDF$Days plotDF$ObsTotal <- genDF$Total plotDF$real <- NA plotDF$real[missId] <- orgDF$Total[missId] mainDF <<- tidyr::gather(plotDF, "key", "total", -age, -days) updatePickerInput(session, "plotItems", choices = unique(mainDF$key), selected = c("ObsTotal", "real","V1")) }) observeEvent({input$showGroup; input$plotItems},{ if(!is.null(mainDF)){ plotData <- mainDF %>% filter(key %in% input$plotItems) p <- ggplot(plotData, aes(days, total, color = key)) + geom_point() print(head(plotData)); print(input$plotItems) if(input$showGroup) p <- p + facet_wrap(~age) output$missMain <- renderPlotly((p)) } }) # EM: generate samples ---------------------------------------------------- output$emInfo <- renderUI(p('Note: The initial value is generated automatically by including an error defined by InitValue Error')) observeEvent(input$genSim,{ sigma1 <- matrix(c(input$sigma1.11, input$sigma1.21, input$sigma1.31, input$sigma1.21, input$sigma1.22, input$sigma1.32, input$sigma1.31, input$sigma1.32, input$sigma1.33), ncol = 3, nrow = 3) sigma2 <- matrix(c(input$sigma2.11, input$sigma2.21, input$sigma2.31, input$sigma2.21, input$sigma2.22, input$sigma2.32, input$sigma2.31, input$sigma2.32, input$sigma2.33), ncol = 3, nrow = 3) mu1 <- c(input$mean1x, input$mean1y, input$mean1z) mu2 <- c(input$mean2x, input$mean2y, input$mean2z) if(is.positive.semi.definite(sigma1) & is.positive.semi.definite(sigma2)){ sim <<- genData(input$sizeN, mu1, mu2, sigma1, sigma2, input$lambda/100) init <<- randomInit(mu1, mu2, sigma1, sigma2, input$lambda/100, input$initMag) print(input$lambda) p3d <<- plot_ly(sim$df) %>% add_markers(x = ~X1, y= ~X2, z= ~X3, color = ~isFirst, colors = myColors, marker = list(opacity = 0.6, symbol = "circle-open-dot", size = 3)) %>% add_markers(x = mu1[1], y = mu1[2], z = mu1[3], color = "Theoretic mu 1", marker = list(opacity =1, symbol = "diamond", size = 5))%>% add_markers(x = mu2[1], y = mu2[2], z = mu2[3], color = "Theoretic mu 2", marker = list( symbol = "diamond", size = 5)) output$plot3d <- renderPlotly(p3d) output$emPrint <- renderPrint({ print("Samples Generated and Plotted") print(list(mu1 = mu1, mu2 = mu2, sigma1 = sigma1, sigma2 = sigma2)) }) } else { output$emInfo <- renderUI(tagList(tags$b("ERROR: "), p("please change covariance | must be positively semi-definite"))) output$emPrint <- renderPrint({ print("covariance matrix error") print("nothing updated") }) } }) # EM: do the algorithm ---------------------------------------------------- observeEvent(input$runEM, { if(!is.null(sim)){ em <- myEM(init, sim$data, maxiter = input$maxiter) tr.mu11 <- em$all %>% map(1) %>% map_dbl(1) tr.mu12 <- em$all %>% map(1) %>% map_dbl(2) tr.mu13 <- em$all %>% map(1) %>% map_dbl(3) tr.mu21 <- em$all %>% map(2) %>% map_dbl(1) tr.mu22 <- em$all %>% map(2) %>% map_dbl(2) tr.mu23 <- em$all %>% map(2) %>% map_dbl(3) p3d <<- p3d %>% add_markers(x = tr.mu11, y = tr.mu12, z = tr.mu13, color = "Trace of mu 1", marker = list( symbol = "cross", size = 5)) %>% add_markers(x = tr.mu21, y = tr.mu22, z = tr.mu23, color = "Trace of mu 2", marker = list( symbol = "cross", size = 5)) output$emInfo <- renderUI(tagList( p("EM result after", length(tr.mu11), "iterations", br(), "Covergence reached?", !em$reachMax, br(), "convergence is set to 1e-12") )) output$emPrint <- renderPrint({ print(em$result) print("=========") print("initial values are") print(init) }) output$plot3d <- renderPlotly(p3d) } else { output$emInfo <- renderUI(tagList(tags$b("ERROR: "), p("please click Generate Data first"))) output$emPrint <- renderPrint({ print("no data available") }) } }) })
library(MASS) library(mice) # impute ------------------------------------------------------------------ createData <- function(data, missingnessType, amount){ if(missingnessType == "MCAR"){ a <- 1:length(data$Age) generatedData <- data #This missingness index can be used to identify those values that go missing missingnessIndex <- sample(a,round(length(data$Age)*amount)) generatedData[missingnessIndex,2] <- NA } if(missingnessType=="MAR"){ #data <- arrange(data,data$Age) data <- data[order(data$Age),] data1234 <-data[data$Age==1 |data$Age==2 |data$Age==3 |data$Age==4,] data56 <- data[data$Age==5 | data$Age==6,] #rownames(data56) <- seq(length(data56$Age)) #rownames(data1234) <- seq(length(data1234$Age)) a1 <- 1:length(data56$Age) a2 <- 1:length(data1234$Age) missingnessIndex1234 <- sample(a2,round(length(data1234$Age)*amount)) if (amount + 0.2 >= 1) amount=0.8 missingnessIndex56 <- sample(a1,round(length(data56$Age)*(amount+0.2))) data56[missingnessIndex56,2] <- NA data1234[missingnessIndex1234,2] <- NA generatedData <- data.frame(rbind(data1234,data56)) missingnessIndex <- c(missingnessIndex1234,missingnessIndex56+length(data1234$Age)) } if(missingnessType=="MNAR1"){ data <- data[order(data$Total),] #data <- arrange(data,data$Total) data0 <-data[data$Total < 120,] data120 <- data[data$Total>=120,] #rownames(data120) <- seq(length(data120$Age)) #rownames(data0) <- seq(length(data0$Age)) a1 <- 1:length(data120$Age) a2 <- 1:length(data0$Age) missingnessIndex0 <- sample(a2,round(length(data0$Age)*amount)) if (amount + 0.2 >= 1) amount=0.8 missingnessIndex120 <- sample(a1,round(length(data120$Age)*(amount+0.2))) data120[missingnessIndex120,2] <- NA data0[missingnessIndex0,2] <- NA generatedData <- data.frame(rbind(data0,data120)) missingnessIndex <- c(missingnessIndex0,missingnessIndex120+length(data0$Age)) } if(missingnessType=="MNAR2") { data <- data[order(data$Total),] #data <- arrange(data,data$Total) index <- data$Days>0 data2 <- data[index,] dataLarge<- data2[(data2$Total/data2$Days)>=5,] dataSmall1 <-data2[data2$Total/data2$Days < 5,] dataSmall2 <- data[index==FALSE,] dataSmall <- data.frame(rbind(dataSmall1,dataSmall2)) a1 <- 1:length(dataLarge$Age) a2 <- 1:length(dataSmall$Age) missingnessIndexSmall <- sample(a2,round(length(dataSmall$Age)*amount)) if (amount + 0.2 >= 1) amount=0.8 missingnessIndexLarge <- sample(a1,round(length(dataLarge$Age)*(amount+0.2))) dataLarge[missingnessIndexLarge,2] <- NA dataSmall[missingnessIndexSmall,2] <- NA generatedData <- data.frame(rbind(dataSmall,dataLarge)) missingnessIndex <- c(missingnessIndexSmall,missingnessIndexLarge+length(dataSmall$Age)) } # First is index telling what rows are missing from the original data. # Then returns the new data with missingness. return(list(missingnessIndex = missingnessIndex, generatedData = generatedData)) } # Imputation itself. Gets data which has missing values and information # about what method to use in "mice" function. imputointi <- function(generatedData, method, m) { imp1 <- mice(generatedData[,1:3],m=m, meth=method, print=F) impPlot <- xyplot(imp1,Total~Days|Age) #summary(imp1) fish1 <- with(imp1,lm(Total~-1+as.factor(Age))) fish1_pooled <- summary(pool(fish1)) #print(fish1_pooled) impPARAM<-(fish1_pooled[1:6,1:2]) impDATA <- imp1$imp$Total # Returns first table of estimated values and mean errors, then # the imputated values (20 columns since we have 20 imputations) # and lastly a plot about imputations. This plot may or may not be # used. return(list(impPARAM = impPARAM, impDATA = impDATA, impPlot = impPlot)) } # Returns a table with imputated data mean, mean error and real data mean tableIMP <- function(impu, aineisto) { impTable <- (impu[[1]]) aineisto <- as.data.frame(aineisto) realMean <- NULL for (i in 1:6) { realMean[i] <- mean(aineisto[aineisto$Age==i,2]) } impTable <- data.frame(impTable,realMean) rownames(impTable) <- paste("Age Group", 1:6) impTable } # EM ---------------------------------------------------------------------- genData <- function(n, mu1, mu2, sigma1, sigma2, lambda) { xa <- mvrnorm(n, mu = mu1, Sigma = sigma1) xb <- mvrnorm(n, mu = mu2, Sigma = sigma2) p <- rbinom(n, 1, lambda) x <- xa x[which(p==0),] <- xb [which(p==0),] # construct data.frame isFirst <- rep("Distribution 1",n) isFirst[which(p==0)] <- "Distribution 2" df <- data.frame(x, isFirst) return(list(data = x, id = p, df = df)) } myEM <- function(init, x, maxiter = 1000, eps = 1e-12) { library(mvtnorm) theta <- list() theta[[1]] <- init for (i in 2:maxiter) { theta.p<- theta[[i-1]] # previous theta R1 <- numeric(nrow(x)) R2 <- numeric(nrow(x)) # use log for accuracy for (j in 1:nrow(x)) { lf1 <- dmvnorm((x[j,]),mean = theta.p$mu1, sigma = theta.p$sigma1) lf2 <- dmvnorm((x[j,]),mean = theta.p$mu2, sigma = theta.p$sigma2) Ra <- theta.p$lambda * lf1 Rb <- (1 - theta.p$lambda) * lf2 R1[j] <- Ra / (Ra + Rb) R2[j] <- 1- R1[j] } lambda <- mean(R1) # point-wise multiplication mu1 <- colSums(R1 * x) / sum(R1) mu2 <- colSums(R2 * x) / sum(R2) # calculate Rt(xi - mu1)(xi - mu1) !new mu rxx1 <- R1[j] * (x[j,] - mu1) %*% t(x[j,] - mu1) rxx2 <- R2[j] * (x[j,] - mu2) %*% t(x[j,] - mu2) for (j in 2: nrow(x)) { rxx1 <- rxx1 + R1[j] * (x[j,] - mu1) %*% t(x[j,] - mu1) rxx2 <- rxx2 + R2[j] * (x[j,] - mu2) %*% t(x[j,] - mu2) } theta[[i]] <- list(mu1 = mu1, mu2 = mu2, sigma1 = rxx1 / sum(R1), sigma2 = rxx2 / sum(R2), lambda = lambda) # return if close enough eDist <- dist(rbind(mu1, theta.p$mu1)) + dist(rbind(mu2, theta.p$mu2)) if( eDist < 2 * eps) return(list(all = theta, result = theta[[i]], reachMax = F)) } return(list(all = theta, result = theta[[maxiter]], reachMax = TRUE)) } randomInit <- function(mu1, mu2, sigma1, sigma2, lambda, magnitude) { p <- length(mu1) diag10 <- magnitude * diag(nrow = p) mu1.x <- mu1 + mvrnorm(1, rep(0,p), diag10) mu2.x <- mu1 + mvrnorm(1, rep(0,p), diag10) lambda.x <- runif(1) return(list(mu1 = mu1.x, mu2 = mu2.x, sigma1 = diag10, sigma2 = diag10, lambda = lambda.x)) }