aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBobby <[email protected]>2022-04-29 18:18:42 -0400
committerBobby <[email protected]>2022-04-29 18:18:42 -0400
commit5dbd34fbc10b190d6c3934db21a2918757c2a132 (patch)
treeba932201fccd78e2bd03da3e1ce3b89e194238b2
parent77ac9ab78f0d14ba4e26537bf9c35b66a7dcaa0f (diff)
downloadNetwork-Intrusion-Detection-5dbd34fbc10b190d6c3934db21a2918757c2a132.tar.xz
Network-Intrusion-Detection-5dbd34fbc10b190d6c3934db21a2918757c2a132.zip
all models
-rw-r--r--app.R156
-rw-r--r--models.R130
2 files changed, 209 insertions, 77 deletions
diff --git a/app.R b/app.R
index c55d105..c6639d9 100644
--- a/app.R
+++ b/app.R
@@ -43,7 +43,10 @@ get_color <- function(a = 1) {
}
# Load the models
-model.dl = h2o.loadModel(dl_model)
+model.dl <- h2o.loadModel(dl_model)
+model.drf <- h2o.loadModel(drf_model)
+model.gbm <- h2o.loadModel(gbm_model)
+model.nb <- h2o.loadModel(nb_model)
# Define UI for application
ui <- fluidPage(
@@ -100,7 +103,7 @@ ui <- fluidPage(
sidebarPanel(
selectInput(
"plotType",
- p("Type of Plot:"),
+ p("Type of plot:"),
choices = c(Histogram = "hist",
"Scatter Plot" = "scatter",
"Mosaic Plot" = "mosaic")
@@ -110,12 +113,12 @@ ui <- fluidPage(
condition = "input.plotType == 'hist'",
selectInput(
"plotVariable",
- p("Feature to Visualize:"),
+ p("Feature to visualize:"),
choices = features
),
selectInput(
"plotVariant",
- p("Plot Variant:"),
+ p("Plot variant:"),
choices = c("Normal", "Log 10 Scale")
)
),
@@ -125,7 +128,7 @@ ui <- fluidPage(
condition = "input.plotType == 'scatter'",
selectInput(
"plotVariable1",
- p("First Feature to Visualize:"),
+ p("First feature to visualize:"),
choices = features
),
uiOutput("secondSelection")
@@ -136,7 +139,7 @@ ui <- fluidPage(
condition = "input.plotType == 'mosaic'",
selectInput(
"mosaicVariable",
- p("Select Features to Visualize:"),
+ p("Select features to visualize:"),
choices = c("Labels vs Protocols" = "labproto")
)
),
@@ -158,8 +161,11 @@ ui <- fluidPage(
sidebarPanel(
selectInput(
"modelType",
- p("Choose a Model to Predict:"),
- choices = c("Deep Learning" = "dl")
+ p("Choose a model to predict:"),
+ choices = c("Deep Learning" = "dl",
+ "Distributed Random Forest" = "drf",
+ "Gradient Boosting Machine" = "gbm",
+ "Naive Bayes" = "nb")
),
numericInput("npin", "Number of inbound packets:",
10, min = 0),
@@ -169,9 +175,9 @@ ui <- fluidPage(
2000, min = 0),
numericInput("nbob", "Number of bytes out:",
10000, min = 0),
- numericInput("dprt", "Destination Port (1024 - 49151):",
+ numericInput("dprt", "Destination port (1024 - 49151):",
5234, min = 1024, max = 49151),
- numericInput("tepy", "Total Entropy:",
+ numericInput("tepy", "Total entropy:",
18000, min = 0),
actionButton("predictButton", "Predict",
width = "100%", icon = icon("think-peaks"),
@@ -180,11 +186,26 @@ ui <- fluidPage(
mainPanel(
tags$label(h3('Status/Output')),
verbatimTextOutput('contents'),
- p(strong("Prediction Legend"), br(), br(), em("1.00 - 1.99"),
- " - Benign", br(), em("2.00 - 2.99"), " - Malicious",
- br(), em("3.00 - 3.99"), " - Outlier",
- style="text-align:justify;color:black;
+ fluidRow(
+ column(
+ width = 6,
+ p(strong("Prediction Legend"), br(), br(), em("1"),
+ " - Benign", br(), em("2"), " - Malicious",
+ br(), em("3"), " - Outlier",
+ style="text-align:justify;color:black;
background-color:lavender;padding:15px;border-radius:10px"),
+ ),
+ column(
+ width = 6,
+ p(strong("Probabilty Legend"), br(), br(), em("p1"),
+ " - Probability of being a Benign Connection",
+ br(), em("p2"), " - Probability of being a
+ Malicious Connection", br(), em("p3"), " -
+ Probability of being an Outlier Connection",
+ style="text-align:justify;color:black;
+ background-color:papayawhip;padding:15px;border-radius:10px"),
+ )
+ ),
tableOutput('tabledata'), # Prediction results table
fluidRow(
column(
@@ -204,6 +225,48 @@ ui <- fluidPage(
)
)
)
+ ),
+ tabPanel(
+ "Model Metrics",
+ sidebarLayout(
+ sidebarPanel(
+ selectInput(
+ "metricModelType",
+ p("Choose a model to show metrics:"),
+ choices = c("Deep Learning" = "dl",
+ "Distributed Random Forest" = "drf",
+ "Gradient Boosting Machine" = "gbm",
+ "Naive Bayes" = "nb"),
+ ),
+ actionButton("metricsButton", "Show Metrics",
+ width = "100%", icon = icon("tachometer-alt"),
+ class = "btn btn-primary")
+ ),
+ mainPanel(
+ verbatimTextOutput('metrics')
+ )
+ )
+ ),
+ tabPanel(
+ "Model Summary",
+ sidebarLayout(
+ sidebarPanel(
+ selectInput(
+ "summaryModelType",
+ p("Choose a model to show metrics:"),
+ choices = c("Deep Learning" = "dl",
+ "Distributed Random Forest" = "drf",
+ "Gradient Boosting Machine" = "gbm",
+ "Naive Bayes" = "nb"),
+ ),
+ actionButton("summaryButton", "Show Information",
+ width = "100%", icon = icon("clipboard"),
+ class = "btn btn-primary")
+ ),
+ mainPanel(
+ verbatimTextOutput('summary')
+ )
+ )
)
)
)
@@ -228,6 +291,7 @@ server <- function(input, output) {
)
datasetInput <- reactive({
+ modelType <- input$modelType
req(input$npin)
req(input$npob)
req(input$nbin)
@@ -246,15 +310,71 @@ server <- function(input, output) {
write.table(input,"input.csv", sep=",", quote = FALSE,
row.names = FALSE, col.names = FALSE)
test <- read.csv(paste("input", ".csv", sep=""), header = TRUE)
- prediction <- predict(model.dl, as.h2o(test))
+
+ if (modelType == "dl") {
+ predict(model.dl, as.h2o(test))
+ } else if (modelType == "drf") {
+ predict(model.drf, as.h2o(test))
+ } else if (modelType == "gbm") {
+ predict(model.gbm, as.h2o(test))
+ } else if (modelType == "nb") {
+ predict(model.nb, as.h2o(test))
+ }
})
output$varImpPlot <- renderPlot({
- h2o.varimp_plot(dl)
+ modelType <- input$modelType
+ if (modelType == "dl") {
+ h2o.varimp_plot(model.dl)
+ } else if (modelType == "drf") {
+ h2o.varimp_plot(model.drf)
+ } else if (modelType == "gbm") {
+ h2o.varimp_plot(model.gbm)
+ }
+
})
output$lcPlot <- renderPlot({
- h2o.learning_curve_plot(dl)
+ modelType <- input$modelType
+ if (modelType == "dl") {
+ h2o.learning_curve_plot(model.dl)
+ } else if (modelType == "drf") {
+ h2o.learning_curve_plot(model.drf)
+ } else if (modelType == "gbm") {
+ h2o.learning_curve_plot(model.gbm)
+ }
+ })
+
+ output$metrics <- renderPrint({
+ input$metricsButton
+ isolate({
+ modelType <- input$metricModelType
+ if (modelType == "dl") {
+ h2o.performance(model.dl)
+ } else if (modelType == "drf") {
+ h2o.performance(model.drf)
+ } else if (modelType == "gbm") {
+ h2o.performance(model.gbm)
+ } else {
+ h2o.performance(model.nb)
+ }
+ })
+ })
+
+ output$summary <- renderPrint({
+ input$summaryButton
+ isolate({
+ modelType <- input$summaryModelType
+ if (modelType == "dl") {
+ summary(model.dl)
+ } else if (modelType == "drf") {
+ summary(model.drf)
+ } else if (modelType == "gbm") {
+ summary(model.gbm)
+ } else {
+ summary(model.nb)
+ }
+ })
})
# Status/Output Text Box
@@ -277,7 +397,7 @@ server <- function(input, output) {
selectedFeature <- input$plotVariable1
selectInput(
"plotVariable2",
- p("Second Feature to Visualize:"),
+ p("Second feature to visualize:"),
choices = features[!features %in% selectedFeature]
)
})
diff --git a/models.R b/models.R
index 1444a8e..3e269e7 100644
--- a/models.R
+++ b/models.R
@@ -6,22 +6,27 @@ localH2O = h2o.init()
# Importing the Network Intrusion Data set
dataset <- fread("2020.10.01.csv")
-dataset = na.omit(dataset)
+dataset <- na.omit(dataset)
dataset <- dataset[, -c(12, 13)]
+correlationSet <- dataset
-# Encoding 'label' as Numeric Variable
+# Encoding 'label' as Catagorical Variable
dataset$label <- factor(dataset$label,
levels = c("benign", "malicious", "outlier"),
labels = c(1, 2, 3))
-dataset$label <- as.numeric(dataset$label)
+correlationSet$label <- factor(correlationSet$label,
+ levels = c("benign", "malicious", "outlier"),
+ labels = c(1, 2, 3))
+
+correlationSet$label <- as.numeric(correlationSet$label)
# Remove Redundant Features - First Find Correlated Features
-correlationMatrix <- cor(dataset)
+correlationMatrix <- cor(correlationSet)
highlyCorrelated <- findCorrelation(correlationMatrix, cutoff=0.5)
print(highlyCorrelated)
df <- dataset[, c(8,2,7,3,5,12,13)]
-df <- as.h2o(df)
+df <- as.h2o(dataset)
head(dataset[, c(8,2,7,3,5,12,13)])
@@ -37,12 +42,12 @@ train <- df_splits[[1]]
test <- df_splits[[2]]
-# Build and train the model:
+# Build and train Deep learning model:
dl <- h2o.deeplearning(x = 1:6,
y = "label",
- distribution = "tweedie",
+ distribution = "multinomial",
hidden = c(1),
- epochs = 1000,
+ epochs = 100,
train_samples_per_iteration = -1,
reproducible = TRUE,
activation = "Tanh",
@@ -50,21 +55,18 @@ dl <- h2o.deeplearning(x = 1:6,
balance_classes = FALSE,
force_load_balance = FALSE,
seed = 23123,
- tweedie_power = 1.5,
score_training_samples = 0,
score_validation_samples = 0,
training_frame = df,
stopping_rounds = 0)
-# Eval performance:
+# Eval performance of deep learning model:
perf <- h2o.performance(dl)
perf
# Generate predictions on a test set (if necessary):
pred <- h2o.predict(dl, newdata = df)
-pred
summary(dl)
-plot(dl)
# Save the model
dl_model <- h2o.saveModel(object = dl,
@@ -72,57 +74,67 @@ dl_model <- h2o.saveModel(object = dl,
force = TRUE)
print(dl_model)
-h2o.varimp_plot(dl)
-h2o.learning_curve_plot(dl)
-
-
-
-
-
-
-
-
-ind <- createDataPartition(dataset$label, p=0.6, list=FALSE)
-dataset.train <- dataset[ind,]
-dataset.test <- dataset[-ind,]
-
-
-
-
-
-
-
-# Decision Tree
-tree <- rpart(label ~., data = dataset.train)
-rpart.plot(tree)
-printcp(tree)
-plotcp(tree)
-p <- predict(tree, dataset.train)
-confusionMatrix(p, dataset.train$label, positive='y')
-
-
-
-
-# Split the class attribute
-dataset.traintarget <- dataset[ind == 1, 5]
-dataset.testtarget <- dataset[ind==2, 5]
-
-
-# Remove Redundant Features - First Find Correlated Features
-correlationMatrix <- cor(dataset)
-highlyCorrelated <- findCorrelation(correlationMatrix, cutoff=0.5)
-print(highlyCorrelated)
-
-dataset <- dataset[, c(8,2,7,3,5,12,13)]
-
-
-
-
-
+# Build and train distributed random forest model:
+drf <- h2o.randomForest(x = predictors,
+ y = response,
+ ntrees = 10,
+ max_depth = 5,
+ min_rows = 10,
+ calibration_frame = test,
+ binomial_double_trees = TRUE,
+ training_frame = train,
+ validation_frame = test)
+# Eval Performance of distributed random forest model:
+h2o.performance(drf)
+summary(dl)
+# Save the model
+drf_model <- h2o.saveModel(object = drf,
+ path = "/Users/lucifer/Documents/projects/NetworkIntrusionDetection/models",
+ force = TRUE)
+# Build and train the Gradient Boosting machine model:
+gbm <- h2o.gbm(x = predictors,
+ y = response,
+ nfolds = 5,
+ seed = 1111,
+ keep_cross_validation_predictions = TRUE,
+ training_frame = df)
+# Eval Performance of GBM model:
+h2o.performance(gbm)
+summary(dl)
+# Save the model
+gbm_model <- h2o.saveModel(object = gbm,
+ path = "/Users/lucifer/Documents/projects/NetworkIntrusionDetection/models",
+ force = TRUE)
+
+# Build and train the Naive Bayes model:
+nb <- h2o.naiveBayes(x = predictors,
+ y = response,
+ training_frame = df,
+ laplace = 0,
+ nfolds = 5,
+ seed = 1234)
+
+# Eval performance of the Naive Bayes:
+h2o.performance(nb)
+summary(nb)
+
+
+nb_model <- h2o.saveModel(object = nb,
+ path = "/Users/lucifer/Documents/projects/NetworkIntrusionDetection/models",
+ force = TRUE)
+
+# Build and train the XGBoost model:
+xgb <- h2o.xgboost(x = predictors,
+ y = response,
+ training_frame = train,
+ validation_frame = test,
+ booster = "dart",
+ normalize_type = "tree",
+ seed = 1234)