diff options
| author | Bobby <[email protected]> | 2022-04-27 20:57:47 -0400 |
|---|---|---|
| committer | Bobby <[email protected]> | 2022-04-27 20:57:47 -0400 |
| commit | 4820317683754e8e1faf9dbf456c20b7f27f637a (patch) | |
| tree | 3a93ba15e0b2562338829f39ff4c3481ad88d44d | |
| download | Network-Intrusion-Detection-4820317683754e8e1faf9dbf456c20b7f27f637a.tar.xz Network-Intrusion-Detection-4820317683754e8e1faf9dbf456c20b7f27f637a.zip | |
basic R shiny app with data visualizationdataset
| -rw-r--r-- | .gitignore | 41 | ||||
| -rw-r--r-- | app.R | 233 | ||||
| -rw-r--r-- | project.R | 142 |
3 files changed, 416 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9fb7372 --- /dev/null +++ b/.gitignore @@ -0,0 +1,41 @@ +# History files +.Rhistory +.Rapp.history + +# Session Data files +.RData +.RData* + +# User-specific files +.Ruserdata + +# Example code in package build process +*-Ex.R + +# Output files from R CMD build +/*.tar.gz + +# Output files from R CMD check +/*.Rcheck/ + +# RStudio files +.Rproj.user/ + +# produced vignettes +vignettes/*.html +vignettes/*.pdf + +# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 +.httr-oauth + +# knitr and R markdown default cache directories +*_cache/ +/cache/ + +# Temporary files created by R markdown +*.utf8.md +*.knit.md + +# R Environment Variables +.Renviron +*.csv @@ -0,0 +1,233 @@ +# +# This is a Shiny web application. You can run the application by clicking +# the 'Run App' button above. +# +# Find out more about building applications with Shiny here: +# +# http://shiny.rstudio.com/ +# +library(shinythemes) +library(shiny) +library(DT) +library(data.table) +library(ggplot2) +library(shinycssloaders) + + +# Defining Non Changing Variables +data <- fread("2020.10.01.csv") +data_na_removed <- na.omit(data) + +# Encoding the Label Column +# 1 - Benign 2 - Malicious 3 - Outlier +data_encoded <- data_na_removed +data_encoded$label <- factor(data_na_removed$label, + levels = c("benign", "malicious", "outlier"), + labels = c(1, 2, 3)) +data_encoded$label = as.numeric(data_encoded$label) +# Define Default Values +pch = 16 +features <- c("Average Input","Incoming Bytes","Outgoing Bytes", + "Destination IP", "Destination Port", "Entropy", + "Inbound Packets", "Outbound Packets", "Protocol", + "Source IP", "Source Port", "Start Time (s)", + "End Time (s)", "Total Entropy", "Type", "Duration") +feature_variables <- c("avg_ipt", "bytes_in", "bytes_out", "dest_ip", + "dest_port", "entropy", "num_pkts_out", "num_pkts_in", + "proto", "src_ip", "src_port", "time_end", "time_start", + "total_entropy", "label", "duration") + +# Define Elementary Functions +get_color <- function(a = 1) { + return(alpha("#e95420", a)) +} + +# Define UI for application +ui <- fluidPage( + theme = shinytheme("united"), + # Application title + titlePanel("A Comprehensive Approach To Analysis and Detection of Emerging + Threats due to Network Intrusion"), + + navbarPage( + "Network Intrusion Detection Demo", + tabPanel( + icon("home"), + p("Through this application, it is intended to develop a demo of a", + strong("Network Intrusion Detection System"), + "using different Machine Learning Techniques using the + LUFlow Network Intrusion Detection Data Set. This page is intended + to display the information about the dataset." + ,style="text-align:justify;color:black; + background-color:lavender;padding:15px;border-radius:10px"), + br(), + p("The data used in this application are publicly available on the", + em("LUFlow Network Intrusion Detection Data Set"), "Kaggle page. + The Data Set contains telemetry cap- tured using Cisco’s Joy tool. + This tool records multiple measurements asso- ciated with flows. + Features are engineered from these measurements, which are also + outlined below",style="text-align:justify;color:black; + background-color:papayawhip;padding:15px;border-radius:10px"), + hr(), + tags$style(".fa-database {color:#e95420}"), + h3(p(icon("database",lib = "font-awesome"), + em("Dataset Exploration "), + style="color:black;text-align:center")), + fluidRow(column(DT::dataTableOutput("renderData"), + width = 12)), + hr(), + p(em("Developed by"), br("Kumar Priyansh, Ritu Dimri, + Sandeep Perumalla, Hemanth Katikala"), + style="text-align:center; font-family: times") + ), + tabPanel( + "Data Visualization", + p("This part allows you to visualize features via different types of + plots. You can select whatever features you want to plot and hit + the \"Plot Graph\" button. Please keep in mind that all plots", + strong("might not be useful"), + "and you need to select which plots you want to visualize. If you + want to save an image of the currently visualized plot, please + right click on the plot and click on the relevant", + strong("save image"), + "option." + ,style="text-align:justify;color:black; + background-color:lavender;padding:15px;border-radius:10px"), + sidebarLayout( + sidebarPanel( + selectInput( + "plotType", + p("Type of Plot:"), + choices = c(Histogram = "hist", + "Scatter Plot" = "scatter", + "Mosaic Plot" = "mosaic") + ), + # Only show this panel if the plot type is a histogram + conditionalPanel( + condition = "input.plotType == 'hist'", + selectInput( + "plotVariable", + p("Feature to Visualize:"), + choices = features + ), + selectInput( + "plotVariant", + p("Plot Variant:"), + choices = c("Normal", "Log 10 Scale") + ) + ), + + # Only show this panel if the plot type is a scatter plot + conditionalPanel( + condition = "input.plotType == 'scatter'", + selectInput( + "plotVariable1", + p("First Feature to Visualize:"), + choices = features + ), + uiOutput("secondSelection") + ), + + # Single Mosiac Plot for now + conditionalPanel( + condition = "input.plotType == 'mosaic'", + selectInput( + "mosaicVariable", + p("Select Features to Visualize:"), + choices = c("Labels vs Protocols" = "labproto") + ) + ), + actionButton("plot", "Plot Graph", + width = "100%", icon = icon("chart-line"), + style="color: #fff; background-color: #e95420; + outline: none") + ), + mainPanel( + withSpinner( + plotOutput("selectedFeatureVariableForVisualization"), + type = 6, color = "#e95420" + ) + ) + ) + ), + tabPanel( + "Compare Models" + ) + ) +) + +# Define server logic +server <- function(input, output) { + output$renderData <- DT::renderDataTable( + DT::datatable({ + data_na_removed + }, + options = list( + initComplete = JS( + "function(settings, json) {", + "$(this.api().table().header()).css({'background-color': + 'moccasin', 'color': '1c1b1b'});", + "}"), + columnDefs=list(list(className='dt-center',targets="_all"))), + style = 'bootstrap', + class = 'cell-border stripe', + rownames = FALSE, + colnames = features) + ) + + output$secondSelection <- renderUI({ + selectedFeature <- input$plotVariable1 + selectInput( + "plotVariable2", + p("Second Feature to Visualize:"), + choices = features[!features %in% selectedFeature] + ) + }) + + output$selectedFeatureVariableForVisualization <- renderPlot({ + input$plot + isolate({ + plotType <- input$plotType + if (plotType == 'hist') { + selectedFeature <- input$plotVariable + plotVariant <- input$plotVariant + positionInFeatureArray <- which(features == selectedFeature) + selectedFeatureVariable <- feature_variables[positionInFeatureArray] + if (plotVariant == "Normal") { + hist(data_encoded[[selectedFeatureVariable]], + main = paste("Histogram Plot of", selectedFeature, sep = " ", collapse = NULL), + ylab = "Frequency", xlab = selectedFeature, + col = get_color(), pch = pch) + } else { + nonZeroSelectedFeature = data_encoded[data_encoded[[selectedFeatureVariable]] > 0] + hist(log(nonZeroSelectedFeature[[selectedFeatureVariable]]), + main = paste("Log 10 Base Histogram Plot of", selectedFeature, sep = " ", collapse = NULL), + ylab = "Frequency", xlab = selectedFeature, + col = get_color(), pch = pch) + } + } else if (plotType == 'scatter') { + firstFeature <- feature_variables[which(features == + input$plotVariable1)] + secondFeature <- feature_variables[which(features == + input$plotVariable2)] + try(plot(data_encoded[[firstFeature]], data_encoded[[secondFeature]], + main = paste("Scatter Plot of", input$plotVariable1, + "vs", input$plotVariable2, sep = " ", collapse = NULL), + ylab = input$plotVariable2, xlab = input$plotVariable1, + col = get_color(0.02), + pch = 16,), silent = TRUE) + } else { + selectedFeatures <- input$mosaicVariable + if (selectedFeatures == 'labproto') { + proto_label_mosaic <- table(data_encoded$proto, data_encoded$label) + mosaicplot(~ factor(proto)+factor(label, labels=c("benign","malicious","outlier")), + data = data_encoded,xlab = "Protocol", ylab = "Category", + main= "Mosaic plot of Protocol vs Category",shade = TRUE) + } + } + }) + }) +} + +# Run the application +shinyApp(ui = ui, server = server) diff --git a/project.R b/project.R new file mode 100644 index 0000000..2320cb3 --- /dev/null +++ b/project.R @@ -0,0 +1,142 @@ +library(data.table) +library(lattice) +library(caret) +library(nnet) +data <- fread("2020.10.02.csv") +data1<-fread("2020.10.03.csv") +data=rbind(data,data1) +#selecting few rows + + +library(fastDummies) +library(ggplot2) +library(plotly) +library(GGally) +#finding missing values in each column +colSums(is.na(data)) +#each column missing values box plot comes here +#taking non missing rows alone +data_na_removed = na.omit(data) +#checking if missing values are gone +colSums(is.na(data_na_removed)) +#box plot of label column comes here +#checking unique values +unique(data_na_removed$label) + +# 1 - Benign 2 - Malicious 3 - Outlier + +data_na_removed$label = factor(data_na_removed$label, + levels = c("benign", "malicious", "outlier"), + labels = c(1, 2, 3)) +summary(data_na_removed$label) +data_na_removed +#summary(data_na_removed) + +#ggpairs(data_na_removed) + +#options(scipen = 999) + +#ggplot(na.omit(data), aes(x=label, colour = label, fill = label), stat = "count") + geom_bar() + +# ggtitle("Distibution of Labels in Dataset") + +# labs(y = "Number of Cases", x = "Type of Label") + + +#cor.test(data_na_removed$entropy, as.numeric(data_na_removed$label)) + +#data_na_removed$label = as.numeric(data_na_removed$label) + +#data_na_removed = data_na_removed[, -c(12, 13)] + + +#ggplot(data_na_removed, aes(x = entropy)) + geom_bar() + +# facet_wrap(~label) + + +#hist(data_na_removed$entropy, bins = 10) +data_na_removed$label=as.factor(data_na_removed$label) +training=createDataPartition(data_na_removed$label,p=0.6,list=FALSE) +train_set=data_na_removed[training,] +test_set=data_na_removed[-training,] +head(train_set) +model=train(data=train_set,label~.,method="nnet",tuneGrid=expand.grid(.size=c(5), .decay=0.1),trControl=trainControl(method="none",seeds = 123),MaxNWts=100,maxit=100) +confusionMatrix(train_set$label,predict(model,data=train_set)) +test_set$test_pred=predict(model,newdata = test_set[,-15]) +confusionMatrix(test_set$label, test_set$test_pred) +aggregate() + + +####kmeans +install.packages("ClusterR") +install.packages("cluster") +library(ClusterR) +library(cluster) +data_na_removed +dendogram=hclust(dist(data,method="euclidean"),method="complete") +data1=data_na_removed[,-15] +data1 +kmeans1<- kmeans(data1, centers = 3) +cm=table(data_na_removed$label, kmeans1$cluster) +cm +confusionMatrix(cm) + +###cart +data_na_removed +data1=data_na_removed[,c(4,5,10,11,15)] +data1 +training=createDataPartition(data_na_removed$label,p=0.6,list=FALSE) +train_set=data_na_removed[training,] +test_set=data_na_removed[-training,] +model=train(data=train_set,label~.,method="rpart") +confusionMatrix(predict(model,new_data=test_set),train_set$label) + +##doing data cleaning + +data_na_removed=data_na_removed%>%mutate(timediff=time_end-time_start) +data_na_removed$time_end<-NULL +data_na_removed$time_start<-NULL +#SELECTING ONLY POSITIVES +data_na_removed=data_na_removed[data_na_removed$timediff>=1] +data_na_removed + +data_na_removed=data_na_removed[data_na_removed$dest_ip%in% c(786 , 15169 ,202425 , 61337 , 49453 ,45899 , 7713 , 16276 , 49505, + 57172 , 43350)] +data_na_removed$dest_ip=as.factor(data_na_removed$dest_ip) +summary(data_na_removed$dest_ip) +data_na_removed=data_na_removed[data_na_removed$dest_port %in% c(445,9200,22,5900,5060,53 , 5060 , 23 , 123 , 33522, 33524 , + 33518, 33504 , 33520,33524 , 33518, 33504, 33520, + 33526 , 3389 , 33514, 33512 , 60490 , 60506, 60512 , 60510)] +data_na_removed$dest_port=as.factor(data_na_removed$dest_port ) +summary(data_na_removed$dest_port) +data_na_removed=data_na_removed[data_na_removed$src_ip %in% c(786 , 45899 ,202425 , 7552 , 7713 , 49453 , 8048 , 18403 , 16276 , 43350 ,213371 , + 4134 , 34665,12389 , 200019 , 57172, 9299 , 12876, 8452 , 3462, + 25019 , 24961 , 55836 , 45820 , 8151 , 45090,45595 , 9498 , 45903, 47331 , 4812 , 9121 , + 6503 , 9484 , 4837 , 8376 , 15895, 9009 , 6057 )] + +data_na_removed$src_ip=as.factor(data_na_removed$src_ip) +summary(data_na_removed$src_ip) +data_na_removed=data_na_removed[data_na_removed$src_port %in% c(9200 , 33504 , 33524 , 33518 , 33514 , 33522 , 60510, + 33512, 60516 , 33526 , 60490 , 33520 , 60512 ,60506 , 60514 , 60518 , 60508, 55336 , 55330, 55332 , + 55334 , 123, 53278 , 53020 , 32651 , 26042)] + + +data_na_removed$src_port=as.factor(data_na_removed$src_port) +summary(data_na_removed$src_port) +data_na_removed +dmy <- dummyVars(" ~dest_ip+dest_port+src_ip+src_port", data = data_na_removed) +trsf <- data.frame(predict(dmy, newdata = data_na_removed)) +data_na_removed=cbind(data_na_removed,trsf) +data_na_removed=data_na_removed[,c(-4,-5,-10,-11)] +data_na_removed$timediff<-NULL +data_na_removed +data_na_removed$avg_ipt=scale(data_na_removed$avg_ipt) +data_na_removed$bytes_in=scale(data_na_removed$bytes_in) +data_na_removed$bytes_out=scale(data_na_removed$bytes_out) +data_na_removed$entropy=scale(data_na_removed$entropy) +data_na_removed$num_pkts_out=scale(data_na_removed$num_pkts_out) +data_na_removed$proto=scale(data_na_removed$proto) +data_na_removed$total_entropy=scale(data_na_removed$total_entropy) +data_na_removed$duration=scale(data_na_removed$duration) +summary(data_na_removed) + + + |
