-
Notifications
You must be signed in to change notification settings - Fork 1
/
hirclust.R
48 lines (30 loc) · 1.83 KB
/
hirclust.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
AirLine_DF<-read.csv("D:/Assignment/Clustering(11th june'20)-20200618T064708Z-001/Clustering(11th june_20)/Assignments/EastWestAirlines.csv")
str(AirLine_DF)
#Convert the cc1_miles, cc2_miles, cc3_miles to numeric
AirLine_DF$cc1_miles = ifelse(AirLine_DF$cc1_miles==1,2500,
ifelse(AirLine_DF$cc1_miles==2,7500,
ifelse(AirLine_DF$cc1_miles==3,17500,
ifelse(AirLine_DF$cc1_miles==4,32500,
ifelse(AirLine_DF$cc1_miles==5,50000,0)))))
AirLine_DF$cc2_miles = ifelse(AirLine_DF$cc2_miles==1,2500,
ifelse(AirLine_DF$cc2_miles==2,7500,
ifelse(AirLine_DF$cc2_miles==3,17500,
ifelse(AirLine_DF$cc2_miles==4,32500,
ifelse(AirLine_DF$cc2_miles==5,50000,0)))))
AirLine_DF$cc3_miles = ifelse(AirLine_DF$cc3_miles==1,2500,
ifelse(AirLine_DF$cc3_miles==2,7500,
ifelse(AirLine_DF$cc3_miles==3,17500,
ifelse(AirLine_DF$cc3_miles==4,32500,
ifelse(AirLine_DF$cc3_miles==5,50000,0)))))
# Normlize data
mydata = scale(AirLine_DF[,-1])
d <- dist(mydata, method = "euclidean") #Computing the distance natrix
as.matrix(d)[1:11, 1:11]
fit <- hclust(d, method="ward.D2")
plot(fit)
clusters <- cutree(fit, k=2) # cut tree into 2 clusters
# draw dendogram with red borders around the 2 clusters
rect.hclust(fit, k=2, border="red")
#Attach the cluster numbers to ID
Final_output=data.frame('Uni'=AirLine_DF[,1],'Cluster' =clusters)
View(Final_output)