all models

author: Bobby <[email protected]> 2022-04-29 18:18:42 -0400
committer: Bobby <[email protected]> 2022-04-29 18:18:42 -0400
commit: 5dbd34fbc10b190d6c3934db21a2918757c2a132 (patch)
tree: ba932201fccd78e2bd03da3e1ce3b89e194238b2
parent: 77ac9ab78f0d14ba4e26537bf9c35b66a7dcaa0f (diff)
download: Network-Intrusion-Detection-5dbd34fbc10b190d6c3934db21a2918757c2a132.tar.xz
Network-Intrusion-Detection-5dbd34fbc10b190d6c3934db21a2918757c2a132.zip
2 files changed, 209 insertions, 77 deletions
diff --git a/app.R b/app.R
index c55d105..c6639d9 100644
--- a/app.R
+++ b/app.R
@@ -43,7 +43,10 @@ get_color <- function(a = 1) {
 }
 
 # Load the models
-model.dl = h2o.loadModel(dl_model)
+model.dl <- h2o.loadModel(dl_model)
+model.drf <- h2o.loadModel(drf_model)
+model.gbm <- h2o.loadModel(gbm_model)
+model.nb <- h2o.loadModel(nb_model)
 
 # Define UI for application
 ui <- fluidPage(
@@ -100,7 +103,7 @@ ui <- fluidPage(
                 sidebarPanel(
                     selectInput(
                         "plotType",
-                        p("Type of Plot:"),
+                        p("Type of plot:"),
                         choices = c(Histogram = "hist",
                                     "Scatter Plot" = "scatter",
                                     "Mosaic Plot" = "mosaic")
@@ -110,12 +113,12 @@ ui <- fluidPage(
                         condition = "input.plotType == 'hist'",
                         selectInput(
                             "plotVariable",
-                            p("Feature to Visualize:"),
+                            p("Feature to visualize:"),
                             choices = features
                         ),
                         selectInput(
                             "plotVariant",
-                            p("Plot Variant:"),
+                            p("Plot variant:"),
                             choices = c("Normal", "Log 10 Scale")
                         )   
                     ),
@@ -125,7 +128,7 @@ ui <- fluidPage(
                         condition = "input.plotType == 'scatter'",
                         selectInput(
                             "plotVariable1",
-                            p("First Feature to Visualize:"),
+                            p("First feature to visualize:"),
                             choices = features
                         ),
                         uiOutput("secondSelection")
@@ -136,7 +139,7 @@ ui <- fluidPage(
                         condition = "input.plotType == 'mosaic'",
                         selectInput(
                             "mosaicVariable",
-                            p("Select Features to Visualize:"),
+                            p("Select features to visualize:"),
                             choices = c("Labels vs Protocols" = "labproto")
                         )
                     ),
@@ -158,8 +161,11 @@ ui <- fluidPage(
                 sidebarPanel(
                     selectInput(
                         "modelType",
-                        p("Choose a Model to Predict:"),
-                        choices = c("Deep Learning" = "dl")
+                        p("Choose a model to predict:"),
+                        choices = c("Deep Learning" = "dl",
+                                    "Distributed Random Forest" = "drf",
+                                    "Gradient Boosting Machine" = "gbm",
+                                    "Naive Bayes" = "nb")
                     ),
                     numericInput("npin", "Number of inbound packets:", 
                                  10, min = 0),
@@ -169,9 +175,9 @@ ui <- fluidPage(
                                  2000, min = 0),
                     numericInput("nbob", "Number of bytes out:", 
                                  10000, min = 0),
-                    numericInput("dprt", "Destination Port (1024 - 49151):", 
+                    numericInput("dprt", "Destination port (1024 - 49151):", 
                                  5234, min = 1024, max = 49151),
-                    numericInput("tepy", "Total Entropy:", 
+                    numericInput("tepy", "Total entropy:", 
                                  18000, min = 0),
                     actionButton("predictButton", "Predict",
                                  width = "100%", icon = icon("think-peaks"),
@@ -180,11 +186,26 @@ ui <- fluidPage(
                 mainPanel(
                     tags$label(h3('Status/Output')),
                     verbatimTextOutput('contents'),
-                    p(strong("Prediction Legend"), br(), br(), em("1.00 - 1.99"), 
-                      " - Benign", br(), em("2.00 - 2.99"), " - Malicious",
-                      br(), em("3.00 - 3.99"), " - Outlier", 
-                    style="text-align:justify;color:black;
+                    fluidRow(
+                        column(
+                            width = 6,
+                            p(strong("Prediction Legend"), br(), br(), em("1"), 
+                              " - Benign", br(), em("2"), " - Malicious",
+                              br(), em("3"), " - Outlier", 
+                              style="text-align:justify;color:black;
             background-color:lavender;padding:15px;border-radius:10px"),
+                        ),
+                        column(
+                            width = 6,
+                            p(strong("Probabilty Legend"), br(), br(), em("p1"), 
+                              " - Probability of being a Benign Connection",
+                              br(), em("p2"), " - Probability of being a 
+                              Malicious Connection", br(), em("p3"), " - 
+                              Probability of being an Outlier Connection", 
+                              style="text-align:justify;color:black;
+            background-color:papayawhip;padding:15px;border-radius:10px"),
+                        )
+                    ),
                     tableOutput('tabledata'), # Prediction results table
                     fluidRow(
                         column(
@@ -204,6 +225,48 @@ ui <- fluidPage(
                     )
                 )
             )
+        ),
+        tabPanel(
+            "Model Metrics",
+            sidebarLayout(
+                sidebarPanel(
+                    selectInput(
+                        "metricModelType",
+                        p("Choose a model to show metrics:"),
+                        choices = c("Deep Learning" = "dl",
+                                    "Distributed Random Forest" = "drf",
+                                    "Gradient Boosting Machine" = "gbm",
+                                    "Naive Bayes" = "nb"),
+                    ),
+                    actionButton("metricsButton", "Show Metrics",
+                                 width = "100%", icon = icon("tachometer-alt"),
+                                 class = "btn btn-primary")
+                ),
+                mainPanel(
+                    verbatimTextOutput('metrics')
+                )
+            )
+        ),
+        tabPanel(
+            "Model Summary",
+            sidebarLayout(
+                sidebarPanel(
+                    selectInput(
+                        "summaryModelType",
+                        p("Choose a model to show metrics:"),
+                        choices = c("Deep Learning" = "dl",
+                                    "Distributed Random Forest" = "drf",
+                                    "Gradient Boosting Machine" = "gbm",
+                                    "Naive Bayes" = "nb"),
+                    ),
+                    actionButton("summaryButton", "Show Information",
+                                 width = "100%", icon = icon("clipboard"),
+                                 class = "btn btn-primary")
+                ),
+                mainPanel(
+                    verbatimTextOutput('summary')
+                )
+            )
         )
     )
 )
@@ -228,6 +291,7 @@ server <- function(input, output) {
     )
     
     datasetInput <- reactive({
+        modelType <- input$modelType
         req(input$npin)
         req(input$npob)
         req(input$nbin)
@@ -246,15 +310,71 @@ server <- function(input, output) {
         write.table(input,"input.csv", sep=",", quote = FALSE, 
                     row.names = FALSE, col.names = FALSE)
         test <- read.csv(paste("input", ".csv", sep=""), header = TRUE)
-        prediction <- predict(model.dl, as.h2o(test))
+        
+        if (modelType == "dl") {
+            predict(model.dl, as.h2o(test))
+        } else if (modelType == "drf") {
+            predict(model.drf, as.h2o(test))
+        } else if (modelType == "gbm") {
+            predict(model.gbm, as.h2o(test))
+        } else if (modelType == "nb") {
+            predict(model.nb, as.h2o(test))
+        }
     })
     
     output$varImpPlot <- renderPlot({
-        h2o.varimp_plot(dl)
+        modelType <- input$modelType
+        if (modelType == "dl") {
+            h2o.varimp_plot(model.dl)
+        } else if (modelType == "drf") {
+            h2o.varimp_plot(model.drf)
+        } else if (modelType == "gbm") {
+            h2o.varimp_plot(model.gbm)
+        }
+        
     })
     
     output$lcPlot <- renderPlot({
-        h2o.learning_curve_plot(dl)
+        modelType <- input$modelType
+        if (modelType == "dl") {
+            h2o.learning_curve_plot(model.dl)
+        } else if (modelType == "drf") {
+            h2o.learning_curve_plot(model.drf)
+        } else if (modelType == "gbm") {
+            h2o.learning_curve_plot(model.gbm)
+        }
+    })
+    
+    output$metrics <- renderPrint({
+        input$metricsButton
+        isolate({
+            modelType <- input$metricModelType
+            if (modelType == "dl") {
+                h2o.performance(model.dl)
+            } else if (modelType == "drf") {
+                h2o.performance(model.drf)
+            } else if (modelType == "gbm") {
+                h2o.performance(model.gbm)
+            } else {
+                h2o.performance(model.nb)
+            }
+        })
+    })
+    
+    output$summary <- renderPrint({
+        input$summaryButton
+        isolate({
+            modelType <- input$summaryModelType
+            if (modelType == "dl") {
+                summary(model.dl)
+            } else if (modelType == "drf") {
+                summary(model.drf)
+            } else if (modelType == "gbm") {
+                summary(model.gbm)
+            } else {
+                summary(model.nb)
+            }
+        })
     })
     
     # Status/Output Text Box
@@ -277,7 +397,7 @@ server <- function(input, output) {
         selectedFeature <- input$plotVariable1
         selectInput(
             "plotVariable2",
-            p("Second Feature to Visualize:"),
+            p("Second feature to visualize:"),
             choices = features[!features %in% selectedFeature]
         )
     })
diff --git a/models.R b/models.R
index 1444a8e..3e269e7 100644
--- a/models.R
+++ b/models.R
@@ -6,22 +6,27 @@ localH2O = h2o.init()
 
 # Importing the Network Intrusion Data set
 dataset <- fread("2020.10.01.csv")
-dataset = na.omit(dataset)
+dataset <- na.omit(dataset)
 dataset <- dataset[, -c(12, 13)]
+correlationSet <- dataset
 
-# Encoding 'label' as Numeric Variable
+# Encoding 'label' as Catagorical Variable
 dataset$label <- factor(dataset$label,
                            levels = c("benign", "malicious", "outlier"),
                            labels = c(1, 2, 3))
-dataset$label <- as.numeric(dataset$label)
+correlationSet$label <- factor(correlationSet$label,
+                        levels = c("benign", "malicious", "outlier"),
+                        labels = c(1, 2, 3))
+
+correlationSet$label <- as.numeric(correlationSet$label)
 
 # Remove Redundant Features - First Find Correlated Features
-correlationMatrix <- cor(dataset)
+correlationMatrix <- cor(correlationSet)
 highlyCorrelated <- findCorrelation(correlationMatrix, cutoff=0.5)
 print(highlyCorrelated)
 
 df <- dataset[, c(8,2,7,3,5,12,13)]
-df <- as.h2o(df)
+df <- as.h2o(dataset)
 
 head(dataset[, c(8,2,7,3,5,12,13)])
 
@@ -37,12 +42,12 @@ train <- df_splits[[1]]
 test <- df_splits[[2]]
 
 
-# Build and train the model:
+# Build and train Deep learning model:
 dl <- h2o.deeplearning(x = 1:6,
                        y = "label",
-                       distribution = "tweedie",
+                       distribution = "multinomial",
                        hidden = c(1),
-                       epochs = 1000,
+                       epochs = 100,
                        train_samples_per_iteration = -1,
                        reproducible = TRUE,
                        activation = "Tanh",
@@ -50,21 +55,18 @@ dl <- h2o.deeplearning(x = 1:6,
                        balance_classes = FALSE,
                        force_load_balance = FALSE,
                        seed = 23123,
-                       tweedie_power = 1.5,
                        score_training_samples = 0,
                        score_validation_samples = 0,
                        training_frame = df,
                        stopping_rounds = 0)
 
-# Eval performance:
+# Eval performance of deep learning model:
 perf <- h2o.performance(dl)
 perf
 
 # Generate predictions on a test set (if necessary):
 pred <- h2o.predict(dl, newdata = df)
-pred
 summary(dl)
-plot(dl)
 
 # Save the model
 dl_model <- h2o.saveModel(object = dl, 
@@ -72,57 +74,67 @@ dl_model <- h2o.saveModel(object = dl,
                           force = TRUE)
 print(dl_model)
 
-h2o.varimp_plot(dl)
-h2o.learning_curve_plot(dl)
-
-
-
-
-
-
-
-
-ind <- createDataPartition(dataset$label, p=0.6, list=FALSE)
-dataset.train <- dataset[ind,]
-dataset.test <- dataset[-ind,]
-
-
-
-
-
-
-
-# Decision Tree
-tree <- rpart(label ~., data = dataset.train)
-rpart.plot(tree)
-printcp(tree)
-plotcp(tree)
-p <- predict(tree, dataset.train)
-confusionMatrix(p, dataset.train$label, positive='y')
-
-
-
-
-# Split the class attribute
-dataset.traintarget <- dataset[ind == 1, 5]
-dataset.testtarget <- dataset[ind==2, 5]
-
-
-# Remove Redundant Features - First Find Correlated Features
-correlationMatrix <- cor(dataset)
-highlyCorrelated <- findCorrelation(correlationMatrix, cutoff=0.5)
-print(highlyCorrelated)
-
-dataset <- dataset[, c(8,2,7,3,5,12,13)]
-
-
-
-
-
 
+# Build and train distributed random forest model:
+drf <- h2o.randomForest(x = predictors,
+                             y = response,
+                             ntrees = 10,
+                             max_depth = 5,
+                             min_rows = 10,
+                             calibration_frame = test,
+                             binomial_double_trees = TRUE,
+                             training_frame = train,
+                             validation_frame = test)
 
+# Eval Performance of distributed random forest model:
+h2o.performance(drf)
+summary(dl)
 
+# Save the model
+drf_model <- h2o.saveModel(object = drf, 
+                           path = "/Users/lucifer/Documents/projects/NetworkIntrusionDetection/models", 
+                           force = TRUE)
 
+# Build and train the Gradient Boosting machine model:
+gbm <- h2o.gbm(x = predictors,
+                    y = response,
+                    nfolds = 5,
+                    seed = 1111,
+                    keep_cross_validation_predictions = TRUE,
+                    training_frame = df)
 
 
+# Eval Performance of GBM model:
+h2o.performance(gbm)
+summary(dl)
 
+# Save the model
+gbm_model <- h2o.saveModel(object = gbm, 
+                           path = "/Users/lucifer/Documents/projects/NetworkIntrusionDetection/models", 
+                           force = TRUE)
+
+# Build and train the Naive Bayes model:
+nb <- h2o.naiveBayes(x = predictors,
+                          y = response,
+                          training_frame = df,
+                          laplace = 0,
+                          nfolds = 5,
+                          seed = 1234)
+
+# Eval performance of the Naive Bayes:
+h2o.performance(nb)
+summary(nb)
+
+
+nb_model <- h2o.saveModel(object = nb, 
+                           path = "/Users/lucifer/Documents/projects/NetworkIntrusionDetection/models", 
+                           force = TRUE)
+
+# Build and train the XGBoost model:
+xgb <- h2o.xgboost(x = predictors,
+                   y = response,
+                   training_frame = train,
+                   validation_frame = test,
+                   booster = "dart",
+                   normalize_type = "tree",
+                   seed = 1234)
author	Bobby <[email protected]>	2022-04-29 18:18:42 -0400
committer	Bobby <[email protected]>	2022-04-29 18:18:42 -0400
commit	5dbd34fbc10b190d6c3934db21a2918757c2a132 (patch)
tree	ba932201fccd78e2bd03da3e1ce3b89e194238b2
parent	77ac9ab78f0d14ba4e26537bf9c35b66a7dcaa0f (diff)
download	Network-Intrusion-Detection-5dbd34fbc10b190d6c3934db21a2918757c2a132.tar.xz Network-Intrusion-Detection-5dbd34fbc10b190d6c3934db21a2918757c2a132.zip