aboutsummaryrefslogtreecommitdiff
path: root/project.R
diff options
context:
space:
mode:
authorBobby <[email protected]>2022-04-27 20:57:47 -0400
committerBobby <[email protected]>2022-04-27 20:57:47 -0400
commit4820317683754e8e1faf9dbf456c20b7f27f637a (patch)
tree3a93ba15e0b2562338829f39ff4c3481ad88d44d /project.R
downloadNetwork-Intrusion-Detection-4820317683754e8e1faf9dbf456c20b7f27f637a.tar.xz
Network-Intrusion-Detection-4820317683754e8e1faf9dbf456c20b7f27f637a.zip
basic R shiny app with data visualizationdataset
Diffstat (limited to 'project.R')
-rw-r--r--project.R142
1 files changed, 142 insertions, 0 deletions
diff --git a/project.R b/project.R
new file mode 100644
index 0000000..2320cb3
--- /dev/null
+++ b/project.R
@@ -0,0 +1,142 @@
+library(data.table)
+library(lattice)
+library(caret)
+library(nnet)
+data <- fread("2020.10.02.csv")
+data1<-fread("2020.10.03.csv")
+data=rbind(data,data1)
+#selecting few rows
+
+
+library(fastDummies)
+library(ggplot2)
+library(plotly)
+library(GGally)
+#finding missing values in each column
+colSums(is.na(data))
+#each column missing values box plot comes here
+#taking non missing rows alone
+data_na_removed = na.omit(data)
+#checking if missing values are gone
+colSums(is.na(data_na_removed))
+#box plot of label column comes here
+#checking unique values
+unique(data_na_removed$label)
+
+# 1 - Benign 2 - Malicious 3 - Outlier
+
+data_na_removed$label = factor(data_na_removed$label,
+ levels = c("benign", "malicious", "outlier"),
+ labels = c(1, 2, 3))
+summary(data_na_removed$label)
+data_na_removed
+#summary(data_na_removed)
+
+#ggpairs(data_na_removed)
+
+#options(scipen = 999)
+
+#ggplot(na.omit(data), aes(x=label, colour = label, fill = label), stat = "count") + geom_bar() +
+# ggtitle("Distibution of Labels in Dataset") +
+# labs(y = "Number of Cases", x = "Type of Label")
+
+
+#cor.test(data_na_removed$entropy, as.numeric(data_na_removed$label))
+
+#data_na_removed$label = as.numeric(data_na_removed$label)
+
+#data_na_removed = data_na_removed[, -c(12, 13)]
+
+
+#ggplot(data_na_removed, aes(x = entropy)) + geom_bar() +
+# facet_wrap(~label)
+
+
+#hist(data_na_removed$entropy, bins = 10)
+data_na_removed$label=as.factor(data_na_removed$label)
+training=createDataPartition(data_na_removed$label,p=0.6,list=FALSE)
+train_set=data_na_removed[training,]
+test_set=data_na_removed[-training,]
+head(train_set)
+model=train(data=train_set,label~.,method="nnet",tuneGrid=expand.grid(.size=c(5), .decay=0.1),trControl=trainControl(method="none",seeds = 123),MaxNWts=100,maxit=100)
+confusionMatrix(train_set$label,predict(model,data=train_set))
+test_set$test_pred=predict(model,newdata = test_set[,-15])
+confusionMatrix(test_set$label, test_set$test_pred)
+aggregate()
+
+
+####kmeans
+install.packages("ClusterR")
+install.packages("cluster")
+library(ClusterR)
+library(cluster)
+data_na_removed
+dendogram=hclust(dist(data,method="euclidean"),method="complete")
+data1=data_na_removed[,-15]
+data1
+kmeans1<- kmeans(data1, centers = 3)
+cm=table(data_na_removed$label, kmeans1$cluster)
+cm
+confusionMatrix(cm)
+
+###cart
+data_na_removed
+data1=data_na_removed[,c(4,5,10,11,15)]
+data1
+training=createDataPartition(data_na_removed$label,p=0.6,list=FALSE)
+train_set=data_na_removed[training,]
+test_set=data_na_removed[-training,]
+model=train(data=train_set,label~.,method="rpart")
+confusionMatrix(predict(model,new_data=test_set),train_set$label)
+
+##doing data cleaning
+
+data_na_removed=data_na_removed%>%mutate(timediff=time_end-time_start)
+data_na_removed$time_end<-NULL
+data_na_removed$time_start<-NULL
+#SELECTING ONLY POSITIVES
+data_na_removed=data_na_removed[data_na_removed$timediff>=1]
+data_na_removed
+
+data_na_removed=data_na_removed[data_na_removed$dest_ip%in% c(786 , 15169 ,202425 , 61337 , 49453 ,45899 , 7713 , 16276 , 49505,
+ 57172 , 43350)]
+data_na_removed$dest_ip=as.factor(data_na_removed$dest_ip)
+summary(data_na_removed$dest_ip)
+data_na_removed=data_na_removed[data_na_removed$dest_port %in% c(445,9200,22,5900,5060,53 , 5060 , 23 , 123 , 33522, 33524 ,
+ 33518, 33504 , 33520,33524 , 33518, 33504, 33520,
+ 33526 , 3389 , 33514, 33512 , 60490 , 60506, 60512 , 60510)]
+data_na_removed$dest_port=as.factor(data_na_removed$dest_port )
+summary(data_na_removed$dest_port)
+data_na_removed=data_na_removed[data_na_removed$src_ip %in% c(786 , 45899 ,202425 , 7552 , 7713 , 49453 , 8048 , 18403 , 16276 , 43350 ,213371 ,
+ 4134 , 34665,12389 , 200019 , 57172, 9299 , 12876, 8452 , 3462,
+ 25019 , 24961 , 55836 , 45820 , 8151 , 45090,45595 , 9498 , 45903, 47331 , 4812 , 9121 ,
+ 6503 , 9484 , 4837 , 8376 , 15895, 9009 , 6057 )]
+
+data_na_removed$src_ip=as.factor(data_na_removed$src_ip)
+summary(data_na_removed$src_ip)
+data_na_removed=data_na_removed[data_na_removed$src_port %in% c(9200 , 33504 , 33524 , 33518 , 33514 , 33522 , 60510,
+ 33512, 60516 , 33526 , 60490 , 33520 , 60512 ,60506 , 60514 , 60518 , 60508, 55336 , 55330, 55332 ,
+ 55334 , 123, 53278 , 53020 , 32651 , 26042)]
+
+
+data_na_removed$src_port=as.factor(data_na_removed$src_port)
+summary(data_na_removed$src_port)
+data_na_removed
+dmy <- dummyVars(" ~dest_ip+dest_port+src_ip+src_port", data = data_na_removed)
+trsf <- data.frame(predict(dmy, newdata = data_na_removed))
+data_na_removed=cbind(data_na_removed,trsf)
+data_na_removed=data_na_removed[,c(-4,-5,-10,-11)]
+data_na_removed$timediff<-NULL
+data_na_removed
+data_na_removed$avg_ipt=scale(data_na_removed$avg_ipt)
+data_na_removed$bytes_in=scale(data_na_removed$bytes_in)
+data_na_removed$bytes_out=scale(data_na_removed$bytes_out)
+data_na_removed$entropy=scale(data_na_removed$entropy)
+data_na_removed$num_pkts_out=scale(data_na_removed$num_pkts_out)
+data_na_removed$proto=scale(data_na_removed$proto)
+data_na_removed$total_entropy=scale(data_na_removed$total_entropy)
+data_na_removed$duration=scale(data_na_removed$duration)
+summary(data_na_removed)
+
+
+