deep learning model

author: Bobby <[email protected]> 2022-04-29 16:58:49 -0400
committer: Bobby <[email protected]> 2022-04-29 16:58:49 -0400
commit: 77ac9ab78f0d14ba4e26537bf9c35b66a7dcaa0f (patch)
tree: 1fe22f7e11f2d16ec9000fec83d58d300789ea38
parent: 80905013b68e901594fe310dae13f455ad965a2a (diff)
download: Network-Intrusion-Detection-77ac9ab78f0d14ba4e26537bf9c35b66a7dcaa0f.tar.xz
Network-Intrusion-Detection-77ac9ab78f0d14ba4e26537bf9c35b66a7dcaa0f.zip
3 files changed, 232 insertions, 4 deletions
diff --git a/.gitignore b/.gitignore
index 9fb7372..171e9e2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -39,3 +39,5 @@ vignettes/*.pdf
 # R Environment Variables
 .Renviron
 *.csv
+.DS_Store
+models/
diff --git a/app.R b/app.R
index ceff262..c55d105 100644
--- a/app.R
+++ b/app.R
@@ -12,7 +12,7 @@ library(DT)
 library(data.table)
 library(ggplot2)
 library(shinycssloaders)
-
+library(h2o)
 
 # Defining Non Changing Variables
 data <- fread("2020.10.01.csv")
@@ -42,6 +42,9 @@ get_color <- function(a = 1) {
     return(alpha("#e95420", a))
 }
 
+# Load the models
+model.dl = h2o.loadModel(dl_model)
+
 # Define UI for application
 ui <- fluidPage(
     theme = shinytheme("united"),
@@ -139,8 +142,7 @@ ui <- fluidPage(
                     ),
                     actionButton("plot", "Plot Graph",
                                  width = "100%", icon = icon("chart-line"),
-                                 style="color: #fff; background-color: #e95420;
-                                 outline: none")
+                                 class = "btn btn-primary")
                 ),
                 mainPanel(
                     withSpinner(
@@ -151,7 +153,57 @@ ui <- fluidPage(
             )
         ),
         tabPanel(
-            "Compare Models"
+            "Predictions",
+            sidebarLayout(
+                sidebarPanel(
+                    selectInput(
+                        "modelType",
+                        p("Choose a Model to Predict:"),
+                        choices = c("Deep Learning" = "dl")
+                    ),
+                    numericInput("npin", "Number of inbound packets:", 
+                                 10, min = 0),
+                    numericInput("npob", "Number of outbound packets:", 
+                                 10, min = 0),
+                    numericInput("nbin", "Number of bytes in:", 
+                                 2000, min = 0),
+                    numericInput("nbob", "Number of bytes out:", 
+                                 10000, min = 0),
+                    numericInput("dprt", "Destination Port (1024 - 49151):", 
+                                 5234, min = 1024, max = 49151),
+                    numericInput("tepy", "Total Entropy:", 
+                                 18000, min = 0),
+                    actionButton("predictButton", "Predict",
+                                 width = "100%", icon = icon("think-peaks"),
+                                 class = "btn btn-primary")
+                ),
+                mainPanel(
+                    tags$label(h3('Status/Output')),
+                    verbatimTextOutput('contents'),
+                    p(strong("Prediction Legend"), br(), br(), em("1.00 - 1.99"), 
+                      " - Benign", br(), em("2.00 - 2.99"), " - Malicious",
+                      br(), em("3.00 - 3.99"), " - Outlier", 
+                    style="text-align:justify;color:black;
+            background-color:lavender;padding:15px;border-radius:10px"),
+                    tableOutput('tabledata'), # Prediction results table
+                    fluidRow(
+                        column(
+                            width = 6,
+                            withSpinner(
+                                plotOutput("varImpPlot"),
+                                type = 6, color = "#e95420"
+                            )
+                        ),
+                        column(
+                            width = 6,
+                            withSpinner(
+                                plotOutput("lcPlot"),
+                                type = 6, color = "#e95420"
+                            )
+                        )
+                    )
+                )
+            )
         )
     )
 )
@@ -175,6 +227,52 @@ server <- function(input, output) {
         colnames = features)
     )
     
+    datasetInput <- reactive({
+        req(input$npin)
+        req(input$npob)
+        req(input$nbin)
+        req(input$nbob)
+        req(input$dprt)
+        req(input$tepy)
+        df <- data.frame(
+            Name = c("num_pkts_in", "bytes_in", "num_pkts_out", "bytes_out",
+                     "dest_port", "total_entropy"),
+            Value = as.character(c(input$npin, input$nbin, input$npob, 
+                                   input$nbob, input$dprt, input$tepy)),
+            stringsAsFactors = FALSE)
+        labels <- 0
+        df <- rbind(df, labels)
+        input <- transpose(df)
+        write.table(input,"input.csv", sep=",", quote = FALSE, 
+                    row.names = FALSE, col.names = FALSE)
+        test <- read.csv(paste("input", ".csv", sep=""), header = TRUE)
+        prediction <- predict(model.dl, as.h2o(test))
+    })
+    
+    output$varImpPlot <- renderPlot({
+        h2o.varimp_plot(dl)
+    })
+    
+    output$lcPlot <- renderPlot({
+        h2o.learning_curve_plot(dl)
+    })
+    
+    # Status/Output Text Box
+    output$contents <- renderPrint({
+        if (input$predictButton>0) { 
+            isolate("Calculation complete.") 
+        } else {
+            return("Server is ready for calculation.")
+        }
+    })
+    
+    # Prediction results table
+    output$tabledata <- renderTable({
+        if (input$predictButton>0) { 
+            isolate(datasetInput())
+        } 
+    })
+    
     output$secondSelection <- renderUI({
         selectedFeature <- input$plotVariable1
         selectInput(
diff --git a/models.R b/models.R
new file mode 100644
index 0000000..1444a8e
--- /dev/null
+++ b/models.R
@@ -0,0 +1,128 @@
+# Import necessary libraries
+library(data.table)
+library(caret)
+library(h2o)
+localH2O = h2o.init()
+
+# Importing the Network Intrusion Data set
+dataset <- fread("2020.10.01.csv")
+dataset = na.omit(dataset)
+dataset <- dataset[, -c(12, 13)]
+
+# Encoding 'label' as Numeric Variable
+dataset$label <- factor(dataset$label,
+                           levels = c("benign", "malicious", "outlier"),
+                           labels = c(1, 2, 3))
+dataset$label <- as.numeric(dataset$label)
+
+# Remove Redundant Features - First Find Correlated Features
+correlationMatrix <- cor(dataset)
+highlyCorrelated <- findCorrelation(correlationMatrix, cutoff=0.5)
+print(highlyCorrelated)
+
+df <- dataset[, c(8,2,7,3,5,12,13)]
+df <- as.h2o(df)
+
+head(dataset[, c(8,2,7,3,5,12,13)])
+
+
+# set the predictor and response columns
+predictors <- c("num_pkts_in", "bytes_in", "num_pkts_out", "bytes_out",
+                "dest_port", "total_entropy")
+response <- "label"
+
+# split the dataset into train and test sets
+df_splits <- h2o.splitFrame(data =  df, ratios = 0.8)
+train <- df_splits[[1]]
+test <- df_splits[[2]]
+
+
+# Build and train the model:
+dl <- h2o.deeplearning(x = 1:6,
+                       y = "label",
+                       distribution = "tweedie",
+                       hidden = c(1),
+                       epochs = 1000,
+                       train_samples_per_iteration = -1,
+                       reproducible = TRUE,
+                       activation = "Tanh",
+                       single_node_mode = FALSE,
+                       balance_classes = FALSE,
+                       force_load_balance = FALSE,
+                       seed = 23123,
+                       tweedie_power = 1.5,
+                       score_training_samples = 0,
+                       score_validation_samples = 0,
+                       training_frame = df,
+                       stopping_rounds = 0)
+
+# Eval performance:
+perf <- h2o.performance(dl)
+perf
+
+# Generate predictions on a test set (if necessary):
+pred <- h2o.predict(dl, newdata = df)
+pred
+summary(dl)
+plot(dl)
+
+# Save the model
+dl_model <- h2o.saveModel(object = dl, 
+                            path = "/Users/lucifer/Documents/projects/NetworkIntrusionDetection/models", 
+                          force = TRUE)
+print(dl_model)
+
+h2o.varimp_plot(dl)
+h2o.learning_curve_plot(dl)
+
+
+
+
+
+
+
+
+ind <- createDataPartition(dataset$label, p=0.6, list=FALSE)
+dataset.train <- dataset[ind,]
+dataset.test <- dataset[-ind,]
+
+
+
+
+
+
+
+# Decision Tree
+tree <- rpart(label ~., data = dataset.train)
+rpart.plot(tree)
+printcp(tree)
+plotcp(tree)
+p <- predict(tree, dataset.train)
+confusionMatrix(p, dataset.train$label, positive='y')
+
+
+
+
+# Split the class attribute
+dataset.traintarget <- dataset[ind == 1, 5]
+dataset.testtarget <- dataset[ind==2, 5]
+
+
+# Remove Redundant Features - First Find Correlated Features
+correlationMatrix <- cor(dataset)
+highlyCorrelated <- findCorrelation(correlationMatrix, cutoff=0.5)
+print(highlyCorrelated)
+
+dataset <- dataset[, c(8,2,7,3,5,12,13)]
+
+
+
+
+
+
+
+
+
+
+
+
author	Bobby <[email protected]>	2022-04-29 16:58:49 -0400
committer	Bobby <[email protected]>	2022-04-29 16:58:49 -0400
commit	77ac9ab78f0d14ba4e26537bf9c35b66a7dcaa0f (patch)
tree	1fe22f7e11f2d16ec9000fec83d58d300789ea38
parent	80905013b68e901594fe310dae13f455ad965a2a (diff)
download	Network-Intrusion-Detection-77ac9ab78f0d14ba4e26537bf9c35b66a7dcaa0f.tar.xz Network-Intrusion-Detection-77ac9ab78f0d14ba4e26537bf9c35b66a7dcaa0f.zip