diff options
| author | Bobby <[email protected]> | 2022-04-29 18:18:42 -0400 |
|---|---|---|
| committer | Bobby <[email protected]> | 2022-04-29 18:18:42 -0400 |
| commit | 5dbd34fbc10b190d6c3934db21a2918757c2a132 (patch) | |
| tree | ba932201fccd78e2bd03da3e1ce3b89e194238b2 | |
| parent | 77ac9ab78f0d14ba4e26537bf9c35b66a7dcaa0f (diff) | |
| download | Network-Intrusion-Detection-5dbd34fbc10b190d6c3934db21a2918757c2a132.tar.xz Network-Intrusion-Detection-5dbd34fbc10b190d6c3934db21a2918757c2a132.zip | |
all models
| -rw-r--r-- | app.R | 156 | ||||
| -rw-r--r-- | models.R | 130 |
2 files changed, 209 insertions, 77 deletions
@@ -43,7 +43,10 @@ get_color <- function(a = 1) { } # Load the models -model.dl = h2o.loadModel(dl_model) +model.dl <- h2o.loadModel(dl_model) +model.drf <- h2o.loadModel(drf_model) +model.gbm <- h2o.loadModel(gbm_model) +model.nb <- h2o.loadModel(nb_model) # Define UI for application ui <- fluidPage( @@ -100,7 +103,7 @@ ui <- fluidPage( sidebarPanel( selectInput( "plotType", - p("Type of Plot:"), + p("Type of plot:"), choices = c(Histogram = "hist", "Scatter Plot" = "scatter", "Mosaic Plot" = "mosaic") @@ -110,12 +113,12 @@ ui <- fluidPage( condition = "input.plotType == 'hist'", selectInput( "plotVariable", - p("Feature to Visualize:"), + p("Feature to visualize:"), choices = features ), selectInput( "plotVariant", - p("Plot Variant:"), + p("Plot variant:"), choices = c("Normal", "Log 10 Scale") ) ), @@ -125,7 +128,7 @@ ui <- fluidPage( condition = "input.plotType == 'scatter'", selectInput( "plotVariable1", - p("First Feature to Visualize:"), + p("First feature to visualize:"), choices = features ), uiOutput("secondSelection") @@ -136,7 +139,7 @@ ui <- fluidPage( condition = "input.plotType == 'mosaic'", selectInput( "mosaicVariable", - p("Select Features to Visualize:"), + p("Select features to visualize:"), choices = c("Labels vs Protocols" = "labproto") ) ), @@ -158,8 +161,11 @@ ui <- fluidPage( sidebarPanel( selectInput( "modelType", - p("Choose a Model to Predict:"), - choices = c("Deep Learning" = "dl") + p("Choose a model to predict:"), + choices = c("Deep Learning" = "dl", + "Distributed Random Forest" = "drf", + "Gradient Boosting Machine" = "gbm", + "Naive Bayes" = "nb") ), numericInput("npin", "Number of inbound packets:", 10, min = 0), @@ -169,9 +175,9 @@ ui <- fluidPage( 2000, min = 0), numericInput("nbob", "Number of bytes out:", 10000, min = 0), - numericInput("dprt", "Destination Port (1024 - 49151):", + numericInput("dprt", "Destination port (1024 - 49151):", 5234, min = 1024, max = 49151), - numericInput("tepy", "Total Entropy:", + numericInput("tepy", "Total entropy:", 18000, min = 0), actionButton("predictButton", "Predict", width = "100%", icon = icon("think-peaks"), @@ -180,11 +186,26 @@ ui <- fluidPage( mainPanel( tags$label(h3('Status/Output')), verbatimTextOutput('contents'), - p(strong("Prediction Legend"), br(), br(), em("1.00 - 1.99"), - " - Benign", br(), em("2.00 - 2.99"), " - Malicious", - br(), em("3.00 - 3.99"), " - Outlier", - style="text-align:justify;color:black; + fluidRow( + column( + width = 6, + p(strong("Prediction Legend"), br(), br(), em("1"), + " - Benign", br(), em("2"), " - Malicious", + br(), em("3"), " - Outlier", + style="text-align:justify;color:black; background-color:lavender;padding:15px;border-radius:10px"), + ), + column( + width = 6, + p(strong("Probabilty Legend"), br(), br(), em("p1"), + " - Probability of being a Benign Connection", + br(), em("p2"), " - Probability of being a + Malicious Connection", br(), em("p3"), " - + Probability of being an Outlier Connection", + style="text-align:justify;color:black; + background-color:papayawhip;padding:15px;border-radius:10px"), + ) + ), tableOutput('tabledata'), # Prediction results table fluidRow( column( @@ -204,6 +225,48 @@ ui <- fluidPage( ) ) ) + ), + tabPanel( + "Model Metrics", + sidebarLayout( + sidebarPanel( + selectInput( + "metricModelType", + p("Choose a model to show metrics:"), + choices = c("Deep Learning" = "dl", + "Distributed Random Forest" = "drf", + "Gradient Boosting Machine" = "gbm", + "Naive Bayes" = "nb"), + ), + actionButton("metricsButton", "Show Metrics", + width = "100%", icon = icon("tachometer-alt"), + class = "btn btn-primary") + ), + mainPanel( + verbatimTextOutput('metrics') + ) + ) + ), + tabPanel( + "Model Summary", + sidebarLayout( + sidebarPanel( + selectInput( + "summaryModelType", + p("Choose a model to show metrics:"), + choices = c("Deep Learning" = "dl", + "Distributed Random Forest" = "drf", + "Gradient Boosting Machine" = "gbm", + "Naive Bayes" = "nb"), + ), + actionButton("summaryButton", "Show Information", + width = "100%", icon = icon("clipboard"), + class = "btn btn-primary") + ), + mainPanel( + verbatimTextOutput('summary') + ) + ) ) ) ) @@ -228,6 +291,7 @@ server <- function(input, output) { ) datasetInput <- reactive({ + modelType <- input$modelType req(input$npin) req(input$npob) req(input$nbin) @@ -246,15 +310,71 @@ server <- function(input, output) { write.table(input,"input.csv", sep=",", quote = FALSE, row.names = FALSE, col.names = FALSE) test <- read.csv(paste("input", ".csv", sep=""), header = TRUE) - prediction <- predict(model.dl, as.h2o(test)) + + if (modelType == "dl") { + predict(model.dl, as.h2o(test)) + } else if (modelType == "drf") { + predict(model.drf, as.h2o(test)) + } else if (modelType == "gbm") { + predict(model.gbm, as.h2o(test)) + } else if (modelType == "nb") { + predict(model.nb, as.h2o(test)) + } }) output$varImpPlot <- renderPlot({ - h2o.varimp_plot(dl) + modelType <- input$modelType + if (modelType == "dl") { + h2o.varimp_plot(model.dl) + } else if (modelType == "drf") { + h2o.varimp_plot(model.drf) + } else if (modelType == "gbm") { + h2o.varimp_plot(model.gbm) + } + }) output$lcPlot <- renderPlot({ - h2o.learning_curve_plot(dl) + modelType <- input$modelType + if (modelType == "dl") { + h2o.learning_curve_plot(model.dl) + } else if (modelType == "drf") { + h2o.learning_curve_plot(model.drf) + } else if (modelType == "gbm") { + h2o.learning_curve_plot(model.gbm) + } + }) + + output$metrics <- renderPrint({ + input$metricsButton + isolate({ + modelType <- input$metricModelType + if (modelType == "dl") { + h2o.performance(model.dl) + } else if (modelType == "drf") { + h2o.performance(model.drf) + } else if (modelType == "gbm") { + h2o.performance(model.gbm) + } else { + h2o.performance(model.nb) + } + }) + }) + + output$summary <- renderPrint({ + input$summaryButton + isolate({ + modelType <- input$summaryModelType + if (modelType == "dl") { + summary(model.dl) + } else if (modelType == "drf") { + summary(model.drf) + } else if (modelType == "gbm") { + summary(model.gbm) + } else { + summary(model.nb) + } + }) }) # Status/Output Text Box @@ -277,7 +397,7 @@ server <- function(input, output) { selectedFeature <- input$plotVariable1 selectInput( "plotVariable2", - p("Second Feature to Visualize:"), + p("Second feature to visualize:"), choices = features[!features %in% selectedFeature] ) }) @@ -6,22 +6,27 @@ localH2O = h2o.init() # Importing the Network Intrusion Data set dataset <- fread("2020.10.01.csv") -dataset = na.omit(dataset) +dataset <- na.omit(dataset) dataset <- dataset[, -c(12, 13)] +correlationSet <- dataset -# Encoding 'label' as Numeric Variable +# Encoding 'label' as Catagorical Variable dataset$label <- factor(dataset$label, levels = c("benign", "malicious", "outlier"), labels = c(1, 2, 3)) -dataset$label <- as.numeric(dataset$label) +correlationSet$label <- factor(correlationSet$label, + levels = c("benign", "malicious", "outlier"), + labels = c(1, 2, 3)) + +correlationSet$label <- as.numeric(correlationSet$label) # Remove Redundant Features - First Find Correlated Features -correlationMatrix <- cor(dataset) +correlationMatrix <- cor(correlationSet) highlyCorrelated <- findCorrelation(correlationMatrix, cutoff=0.5) print(highlyCorrelated) df <- dataset[, c(8,2,7,3,5,12,13)] -df <- as.h2o(df) +df <- as.h2o(dataset) head(dataset[, c(8,2,7,3,5,12,13)]) @@ -37,12 +42,12 @@ train <- df_splits[[1]] test <- df_splits[[2]] -# Build and train the model: +# Build and train Deep learning model: dl <- h2o.deeplearning(x = 1:6, y = "label", - distribution = "tweedie", + distribution = "multinomial", hidden = c(1), - epochs = 1000, + epochs = 100, train_samples_per_iteration = -1, reproducible = TRUE, activation = "Tanh", @@ -50,21 +55,18 @@ dl <- h2o.deeplearning(x = 1:6, balance_classes = FALSE, force_load_balance = FALSE, seed = 23123, - tweedie_power = 1.5, score_training_samples = 0, score_validation_samples = 0, training_frame = df, stopping_rounds = 0) -# Eval performance: +# Eval performance of deep learning model: perf <- h2o.performance(dl) perf # Generate predictions on a test set (if necessary): pred <- h2o.predict(dl, newdata = df) -pred summary(dl) -plot(dl) # Save the model dl_model <- h2o.saveModel(object = dl, @@ -72,57 +74,67 @@ dl_model <- h2o.saveModel(object = dl, force = TRUE) print(dl_model) -h2o.varimp_plot(dl) -h2o.learning_curve_plot(dl) - - - - - - - - -ind <- createDataPartition(dataset$label, p=0.6, list=FALSE) -dataset.train <- dataset[ind,] -dataset.test <- dataset[-ind,] - - - - - - - -# Decision Tree -tree <- rpart(label ~., data = dataset.train) -rpart.plot(tree) -printcp(tree) -plotcp(tree) -p <- predict(tree, dataset.train) -confusionMatrix(p, dataset.train$label, positive='y') - - - - -# Split the class attribute -dataset.traintarget <- dataset[ind == 1, 5] -dataset.testtarget <- dataset[ind==2, 5] - - -# Remove Redundant Features - First Find Correlated Features -correlationMatrix <- cor(dataset) -highlyCorrelated <- findCorrelation(correlationMatrix, cutoff=0.5) -print(highlyCorrelated) - -dataset <- dataset[, c(8,2,7,3,5,12,13)] - - - - - +# Build and train distributed random forest model: +drf <- h2o.randomForest(x = predictors, + y = response, + ntrees = 10, + max_depth = 5, + min_rows = 10, + calibration_frame = test, + binomial_double_trees = TRUE, + training_frame = train, + validation_frame = test) +# Eval Performance of distributed random forest model: +h2o.performance(drf) +summary(dl) +# Save the model +drf_model <- h2o.saveModel(object = drf, + path = "/Users/lucifer/Documents/projects/NetworkIntrusionDetection/models", + force = TRUE) +# Build and train the Gradient Boosting machine model: +gbm <- h2o.gbm(x = predictors, + y = response, + nfolds = 5, + seed = 1111, + keep_cross_validation_predictions = TRUE, + training_frame = df) +# Eval Performance of GBM model: +h2o.performance(gbm) +summary(dl) +# Save the model +gbm_model <- h2o.saveModel(object = gbm, + path = "/Users/lucifer/Documents/projects/NetworkIntrusionDetection/models", + force = TRUE) + +# Build and train the Naive Bayes model: +nb <- h2o.naiveBayes(x = predictors, + y = response, + training_frame = df, + laplace = 0, + nfolds = 5, + seed = 1234) + +# Eval performance of the Naive Bayes: +h2o.performance(nb) +summary(nb) + + +nb_model <- h2o.saveModel(object = nb, + path = "/Users/lucifer/Documents/projects/NetworkIntrusionDetection/models", + force = TRUE) + +# Build and train the XGBoost model: +xgb <- h2o.xgboost(x = predictors, + y = response, + training_frame = train, + validation_frame = test, + booster = "dart", + normalize_type = "tree", + seed = 1234) |
