diff options
| author | Bobby <[email protected]> | 2022-04-29 16:58:49 -0400 |
|---|---|---|
| committer | Bobby <[email protected]> | 2022-04-29 16:58:49 -0400 |
| commit | 77ac9ab78f0d14ba4e26537bf9c35b66a7dcaa0f (patch) | |
| tree | 1fe22f7e11f2d16ec9000fec83d58d300789ea38 | |
| parent | 80905013b68e901594fe310dae13f455ad965a2a (diff) | |
| download | Network-Intrusion-Detection-77ac9ab78f0d14ba4e26537bf9c35b66a7dcaa0f.tar.xz Network-Intrusion-Detection-77ac9ab78f0d14ba4e26537bf9c35b66a7dcaa0f.zip | |
deep learning model
| -rw-r--r-- | .gitignore | 2 | ||||
| -rw-r--r-- | app.R | 106 | ||||
| -rw-r--r-- | models.R | 128 |
3 files changed, 232 insertions, 4 deletions
@@ -39,3 +39,5 @@ vignettes/*.pdf # R Environment Variables .Renviron *.csv +.DS_Store +models/ @@ -12,7 +12,7 @@ library(DT) library(data.table) library(ggplot2) library(shinycssloaders) - +library(h2o) # Defining Non Changing Variables data <- fread("2020.10.01.csv") @@ -42,6 +42,9 @@ get_color <- function(a = 1) { return(alpha("#e95420", a)) } +# Load the models +model.dl = h2o.loadModel(dl_model) + # Define UI for application ui <- fluidPage( theme = shinytheme("united"), @@ -139,8 +142,7 @@ ui <- fluidPage( ), actionButton("plot", "Plot Graph", width = "100%", icon = icon("chart-line"), - style="color: #fff; background-color: #e95420; - outline: none") + class = "btn btn-primary") ), mainPanel( withSpinner( @@ -151,7 +153,57 @@ ui <- fluidPage( ) ), tabPanel( - "Compare Models" + "Predictions", + sidebarLayout( + sidebarPanel( + selectInput( + "modelType", + p("Choose a Model to Predict:"), + choices = c("Deep Learning" = "dl") + ), + numericInput("npin", "Number of inbound packets:", + 10, min = 0), + numericInput("npob", "Number of outbound packets:", + 10, min = 0), + numericInput("nbin", "Number of bytes in:", + 2000, min = 0), + numericInput("nbob", "Number of bytes out:", + 10000, min = 0), + numericInput("dprt", "Destination Port (1024 - 49151):", + 5234, min = 1024, max = 49151), + numericInput("tepy", "Total Entropy:", + 18000, min = 0), + actionButton("predictButton", "Predict", + width = "100%", icon = icon("think-peaks"), + class = "btn btn-primary") + ), + mainPanel( + tags$label(h3('Status/Output')), + verbatimTextOutput('contents'), + p(strong("Prediction Legend"), br(), br(), em("1.00 - 1.99"), + " - Benign", br(), em("2.00 - 2.99"), " - Malicious", + br(), em("3.00 - 3.99"), " - Outlier", + style="text-align:justify;color:black; + background-color:lavender;padding:15px;border-radius:10px"), + tableOutput('tabledata'), # Prediction results table + fluidRow( + column( + width = 6, + withSpinner( + plotOutput("varImpPlot"), + type = 6, color = "#e95420" + ) + ), + column( + width = 6, + withSpinner( + plotOutput("lcPlot"), + type = 6, color = "#e95420" + ) + ) + ) + ) + ) ) ) ) @@ -175,6 +227,52 @@ server <- function(input, output) { colnames = features) ) + datasetInput <- reactive({ + req(input$npin) + req(input$npob) + req(input$nbin) + req(input$nbob) + req(input$dprt) + req(input$tepy) + df <- data.frame( + Name = c("num_pkts_in", "bytes_in", "num_pkts_out", "bytes_out", + "dest_port", "total_entropy"), + Value = as.character(c(input$npin, input$nbin, input$npob, + input$nbob, input$dprt, input$tepy)), + stringsAsFactors = FALSE) + labels <- 0 + df <- rbind(df, labels) + input <- transpose(df) + write.table(input,"input.csv", sep=",", quote = FALSE, + row.names = FALSE, col.names = FALSE) + test <- read.csv(paste("input", ".csv", sep=""), header = TRUE) + prediction <- predict(model.dl, as.h2o(test)) + }) + + output$varImpPlot <- renderPlot({ + h2o.varimp_plot(dl) + }) + + output$lcPlot <- renderPlot({ + h2o.learning_curve_plot(dl) + }) + + # Status/Output Text Box + output$contents <- renderPrint({ + if (input$predictButton>0) { + isolate("Calculation complete.") + } else { + return("Server is ready for calculation.") + } + }) + + # Prediction results table + output$tabledata <- renderTable({ + if (input$predictButton>0) { + isolate(datasetInput()) + } + }) + output$secondSelection <- renderUI({ selectedFeature <- input$plotVariable1 selectInput( diff --git a/models.R b/models.R new file mode 100644 index 0000000..1444a8e --- /dev/null +++ b/models.R @@ -0,0 +1,128 @@ +# Import necessary libraries +library(data.table) +library(caret) +library(h2o) +localH2O = h2o.init() + +# Importing the Network Intrusion Data set +dataset <- fread("2020.10.01.csv") +dataset = na.omit(dataset) +dataset <- dataset[, -c(12, 13)] + +# Encoding 'label' as Numeric Variable +dataset$label <- factor(dataset$label, + levels = c("benign", "malicious", "outlier"), + labels = c(1, 2, 3)) +dataset$label <- as.numeric(dataset$label) + +# Remove Redundant Features - First Find Correlated Features +correlationMatrix <- cor(dataset) +highlyCorrelated <- findCorrelation(correlationMatrix, cutoff=0.5) +print(highlyCorrelated) + +df <- dataset[, c(8,2,7,3,5,12,13)] +df <- as.h2o(df) + +head(dataset[, c(8,2,7,3,5,12,13)]) + + +# set the predictor and response columns +predictors <- c("num_pkts_in", "bytes_in", "num_pkts_out", "bytes_out", + "dest_port", "total_entropy") +response <- "label" + +# split the dataset into train and test sets +df_splits <- h2o.splitFrame(data = df, ratios = 0.8) +train <- df_splits[[1]] +test <- df_splits[[2]] + + +# Build and train the model: +dl <- h2o.deeplearning(x = 1:6, + y = "label", + distribution = "tweedie", + hidden = c(1), + epochs = 1000, + train_samples_per_iteration = -1, + reproducible = TRUE, + activation = "Tanh", + single_node_mode = FALSE, + balance_classes = FALSE, + force_load_balance = FALSE, + seed = 23123, + tweedie_power = 1.5, + score_training_samples = 0, + score_validation_samples = 0, + training_frame = df, + stopping_rounds = 0) + +# Eval performance: +perf <- h2o.performance(dl) +perf + +# Generate predictions on a test set (if necessary): +pred <- h2o.predict(dl, newdata = df) +pred +summary(dl) +plot(dl) + +# Save the model +dl_model <- h2o.saveModel(object = dl, + path = "/Users/lucifer/Documents/projects/NetworkIntrusionDetection/models", + force = TRUE) +print(dl_model) + +h2o.varimp_plot(dl) +h2o.learning_curve_plot(dl) + + + + + + + + +ind <- createDataPartition(dataset$label, p=0.6, list=FALSE) +dataset.train <- dataset[ind,] +dataset.test <- dataset[-ind,] + + + + + + + +# Decision Tree +tree <- rpart(label ~., data = dataset.train) +rpart.plot(tree) +printcp(tree) +plotcp(tree) +p <- predict(tree, dataset.train) +confusionMatrix(p, dataset.train$label, positive='y') + + + + +# Split the class attribute +dataset.traintarget <- dataset[ind == 1, 5] +dataset.testtarget <- dataset[ind==2, 5] + + +# Remove Redundant Features - First Find Correlated Features +correlationMatrix <- cor(dataset) +highlyCorrelated <- findCorrelation(correlationMatrix, cutoff=0.5) +print(highlyCorrelated) + +dataset <- dataset[, c(8,2,7,3,5,12,13)] + + + + + + + + + + + + |
