all models

author: Bobby <[email protected]> 2022-04-29 18:18:42 -0400
committer: Bobby <[email protected]> 2022-04-29 18:18:42 -0400
commit: 5dbd34fbc10b190d6c3934db21a2918757c2a132 (patch)
tree: ba932201fccd78e2bd03da3e1ce3b89e194238b2 /models.R
parent: 77ac9ab78f0d14ba4e26537bf9c35b66a7dcaa0f (diff)
download: Network-Intrusion-Detection-5dbd34fbc10b190d6c3934db21a2918757c2a132.tar.xz
Network-Intrusion-Detection-5dbd34fbc10b190d6c3934db21a2918757c2a132.zip
1 files changed, 71 insertions, 59 deletions
diff --git a/models.R b/models.R
index 1444a8e..3e269e7 100644
--- a/models.R
+++ b/models.R
@@ -6,22 +6,27 @@ localH2O = h2o.init()
 
 # Importing the Network Intrusion Data set
 dataset <- fread("2020.10.01.csv")
-dataset = na.omit(dataset)
+dataset <- na.omit(dataset)
 dataset <- dataset[, -c(12, 13)]
+correlationSet <- dataset
 
-# Encoding 'label' as Numeric Variable
+# Encoding 'label' as Catagorical Variable
 dataset$label <- factor(dataset$label,
                            levels = c("benign", "malicious", "outlier"),
                            labels = c(1, 2, 3))
-dataset$label <- as.numeric(dataset$label)
+correlationSet$label <- factor(correlationSet$label,
+                        levels = c("benign", "malicious", "outlier"),
+                        labels = c(1, 2, 3))
+
+correlationSet$label <- as.numeric(correlationSet$label)
 
 # Remove Redundant Features - First Find Correlated Features
-correlationMatrix <- cor(dataset)
+correlationMatrix <- cor(correlationSet)
 highlyCorrelated <- findCorrelation(correlationMatrix, cutoff=0.5)
 print(highlyCorrelated)
 
 df <- dataset[, c(8,2,7,3,5,12,13)]
-df <- as.h2o(df)
+df <- as.h2o(dataset)
 
 head(dataset[, c(8,2,7,3,5,12,13)])
 
@@ -37,12 +42,12 @@ train <- df_splits[[1]]
 test <- df_splits[[2]]
 
 
-# Build and train the model:
+# Build and train Deep learning model:
 dl <- h2o.deeplearning(x = 1:6,
                        y = "label",
-                       distribution = "tweedie",
+                       distribution = "multinomial",
                        hidden = c(1),
-                       epochs = 1000,
+                       epochs = 100,
                        train_samples_per_iteration = -1,
                        reproducible = TRUE,
                        activation = "Tanh",
@@ -50,21 +55,18 @@ dl <- h2o.deeplearning(x = 1:6,
                        balance_classes = FALSE,
                        force_load_balance = FALSE,
                        seed = 23123,
-                       tweedie_power = 1.5,
                        score_training_samples = 0,
                        score_validation_samples = 0,
                        training_frame = df,
                        stopping_rounds = 0)
 
-# Eval performance:
+# Eval performance of deep learning model:
 perf <- h2o.performance(dl)
 perf
 
 # Generate predictions on a test set (if necessary):
 pred <- h2o.predict(dl, newdata = df)
-pred
 summary(dl)
-plot(dl)
 
 # Save the model
 dl_model <- h2o.saveModel(object = dl, 
@@ -72,57 +74,67 @@ dl_model <- h2o.saveModel(object = dl,
                           force = TRUE)
 print(dl_model)
 
-h2o.varimp_plot(dl)
-h2o.learning_curve_plot(dl)
-
-
-
-
-
-
-
-
-ind <- createDataPartition(dataset$label, p=0.6, list=FALSE)
-dataset.train <- dataset[ind,]
-dataset.test <- dataset[-ind,]
-
-
-
-
-
-
-
-# Decision Tree
-tree <- rpart(label ~., data = dataset.train)
-rpart.plot(tree)
-printcp(tree)
-plotcp(tree)
-p <- predict(tree, dataset.train)
-confusionMatrix(p, dataset.train$label, positive='y')
-
-
-
-
-# Split the class attribute
-dataset.traintarget <- dataset[ind == 1, 5]
-dataset.testtarget <- dataset[ind==2, 5]
-
-
-# Remove Redundant Features - First Find Correlated Features
-correlationMatrix <- cor(dataset)
-highlyCorrelated <- findCorrelation(correlationMatrix, cutoff=0.5)
-print(highlyCorrelated)
-
-dataset <- dataset[, c(8,2,7,3,5,12,13)]
-
-
-
-
-
 
+# Build and train distributed random forest model:
+drf <- h2o.randomForest(x = predictors,
+                             y = response,
+                             ntrees = 10,
+                             max_depth = 5,
+                             min_rows = 10,
+                             calibration_frame = test,
+                             binomial_double_trees = TRUE,
+                             training_frame = train,
+                             validation_frame = test)
 
+# Eval Performance of distributed random forest model:
+h2o.performance(drf)
+summary(dl)
 
+# Save the model
+drf_model <- h2o.saveModel(object = drf, 
+                           path = "/Users/lucifer/Documents/projects/NetworkIntrusionDetection/models", 
+                           force = TRUE)
 
+# Build and train the Gradient Boosting machine model:
+gbm <- h2o.gbm(x = predictors,
+                    y = response,
+                    nfolds = 5,
+                    seed = 1111,
+                    keep_cross_validation_predictions = TRUE,
+                    training_frame = df)
 
 
+# Eval Performance of GBM model:
+h2o.performance(gbm)
+summary(dl)
 
+# Save the model
+gbm_model <- h2o.saveModel(object = gbm, 
+                           path = "/Users/lucifer/Documents/projects/NetworkIntrusionDetection/models", 
+                           force = TRUE)
+
+# Build and train the Naive Bayes model:
+nb <- h2o.naiveBayes(x = predictors,
+                          y = response,
+                          training_frame = df,
+                          laplace = 0,
+                          nfolds = 5,
+                          seed = 1234)
+
+# Eval performance of the Naive Bayes:
+h2o.performance(nb)
+summary(nb)
+
+
+nb_model <- h2o.saveModel(object = nb, 
+                           path = "/Users/lucifer/Documents/projects/NetworkIntrusionDetection/models", 
+                           force = TRUE)
+
+# Build and train the XGBoost model:
+xgb <- h2o.xgboost(x = predictors,
+                   y = response,
+                   training_frame = train,
+                   validation_frame = test,
+                   booster = "dart",
+                   normalize_type = "tree",
+                   seed = 1234)
author	Bobby <[email protected]>	2022-04-29 18:18:42 -0400
committer	Bobby <[email protected]>	2022-04-29 18:18:42 -0400
commit	5dbd34fbc10b190d6c3934db21a2918757c2a132 (patch)
tree	ba932201fccd78e2bd03da3e1ce3b89e194238b2 /models.R
parent	77ac9ab78f0d14ba4e26537bf9c35b66a7dcaa0f (diff)
download	Network-Intrusion-Detection-5dbd34fbc10b190d6c3934db21a2918757c2a132.tar.xz Network-Intrusion-Detection-5dbd34fbc10b190d6c3934db21a2918757c2a132.zip