dpcc
aims to enable fast computation and path
visualization of L1 convex clustering with identical weights.
You can install dpcc
from GitHub with:
# install.packages("dpcc")
::install_github("bingyuan-zhang/dpcc") devtools
Load the packages.
library(dpcc)
We first generate the three clusters example.
#install.packages("ggplot2")
library(ggplot2)
set.seed(12)
= 50
n = matrix(rnorm(n*2,sd = 1.4),n,2)
error =sample(1:3, n, replace=TRUE)
which= matrix(rnorm(3*2,sd = 11),3,2)
xmean = error + xmean[which,]
tb1 = data.frame(
data x = scale(tb1[,1]),
y = scale(tb1[,2]),
clusters = factor(which)
)
ggplot(data,aes(x,y,color=factor(clusters))) +
geom_point(size = 2, show.legend = FALSE)
Now we construct a sequence of tuning parameters with length K = 10.
= data.matrix(data)[,1:2]
dat = find_lambda(dat)/1.5;
lam_max = 10
K = sapply(1:K, function(i) i/K*lam_max)
Lam
Lam#> [1] 0.002726164 0.005452327 0.008178491 0.010904655 0.013630819 0.016356982
#> [7] 0.019083146 0.021809310 0.024535474 0.027261637
Next we use the function in the package to draw the clusterpath.
= cpaint(dat,Lam)
res <- data.frame(x = dat[,1],y = dat[,2], group=1:n)
df.paths for (j in 1:K) {
<- data.frame(x=res[[1]][j,], y=res[[2]][j,], group=1:n)
df <- rbind(df.paths,df)
df.paths
}
ggplot(data) +
geom_path(data = df.paths, aes(x = x, y = y, group=group), colour='grey60', alpha = 0.5) +
geom_point(aes(x = x, y = y, col = clusters), size = 2, show.legend = FALSE)
[1.] [Dynamic visualization for L1 fusion convex clustering in near-linear time] Bingyuan Zhang, Yoshikazu Terada, Jie Chen (UAI 2021 to appear).