basic usage

Aviezer Lifshitz

2023-06-26

Basic usage of the package.

Basic usage

First, let’s create 5 clusters normally distributed around 1 to 5, with sd of 0.3:

data <- simulate_data(n = 100, sd = 0.3, nclust = 5, dims = 2)
data
##      id        V1        V2 true_clust
## 1     1 1.0261098 0.8789749          1
## 2     2 1.2082016 1.3878250          1
## 3     3 0.4074118 1.3766459          1
## 4     4 0.6051425 0.6178642          1
## 5     5 0.9524910 1.1564759          1
## 6     6 1.0521462 1.1003523          1
## 7     7 1.0824339 0.9615825          1
## 8     8 1.1844157 1.1233981          1
## 9     9 1.0455999 0.7943182          1
## 10   10 0.7854149 1.0792299          1
## 11   11 1.0210792 1.0148444          1
## 12   12 0.4673453 1.7059383          1
## 13   13 1.1874463 1.3096575          1
## 14   14 0.8660050 1.4647951          1
## 15   15 0.6774450 1.2602245          1
## 16   16 1.1722299 1.2319227          1
## 17   17 0.5210028 1.3175402          1
## 18   18 0.8446220 0.9294415          1
## 19   19 0.7843899 1.3028062          1
## 20   20 1.0987512 1.1828475          1
## 21   21 0.7061653 0.9081029          1
## 22   22 0.8097757 1.5410117          1
## 23   23 1.0355111 1.1280316          1
## 24   24 0.7933902 0.9487525          1
## 25   25 0.9196697 0.5746029          1
## 26   26 1.1158750 0.9110239          1
## 27   27 1.0225545 1.2589772          1
## 28   28 0.5656501 1.0978835          1
## 29   29 1.4112862 0.8262509          1
## 30   30 0.8374893 0.8430044          1
## 31   31 1.2817694 1.0326764          1
## 32   32 0.6429815 1.0066302          1
## 33   33 1.0422322 0.8143470          1
## 34   34 1.1554849 0.7581531          1
## 35   35 0.7382609 0.6445084          1
## 36   36 0.6174064 1.5658486          1
## 37   37 1.2322509 0.9360519          1
## 38   38 1.0664780 0.4662921          1
## 39   39 1.5402956 0.9916549          1
## 40   40 0.9002048 1.1083222          1
## 41   41 0.6686538 0.8906331          1
## 42   42 1.1649300 0.5197539          1
## 43   43 0.4598309 1.3919587          1
## 44   44 1.2098375 0.9882193          1
## 45   45 1.6444289 1.1863258          1
## 46   46 0.9009659 0.8880477          1
## 47   47 1.1907833 0.8702168          1
## 48   48 0.6477844 0.9888838          1
## 49   49 1.1872958 1.4506953          1
## 50   50 0.9209193 1.3881039          1
## 51   51 1.0306521 1.1531862          1
## 52   52 1.0165386 1.5388289          1
## 53   53 1.3660712 0.8130430          1
## 54   54 0.7028710 0.5542610          1
## 55   55 1.3475285 1.6116860          1
## 56   56 1.4838698 0.8363814          1
## 57   57 1.4701719 1.0088474          1
## 58   58 0.8452572 1.0180214          1
## 59   59 1.3778488 1.1640458          1
## 60   60 0.5782077 1.1141623          1
## 61   61 1.5977919 1.1992580          1
## 62   62 1.3806729 0.8147248          1
## 63   63 1.0612339 1.1848853          1
## 64   64 0.8444858 1.0577277          1
## 65   65 0.4775080 1.1730890          1
## 66   66 1.0170843 1.7271554          1
## 67   67 0.8116216 0.9659902          1
## 68   68 0.9547799 1.1695377          1
## 69   69 1.1267423 0.8728154          1
## 70   70 0.7633143 0.9193829          1
## 71   71 1.1200924 1.0618220          1
## 72   72 1.2760440 1.3197330          1
## 73   73 1.3331154 0.8181071          1
## 74   74 0.6103232 1.2971236          1
## 75   75 1.0733139 0.6211208          1
## 76   76 1.3590314 0.7965386          1
## 77   77 1.0956969 0.9418009          1
## 78   78 0.7798725 1.0848282          1
## 79   79 0.8840384 1.2756488          1
## 80   80 0.9639136 1.0888227          1
## 81   81 0.9828408 0.8680878          1
## 82   82 1.3777760 0.5978993          1
## 83   83 1.4223231 1.2930153          1
## 84   84 0.6977858 1.3715870          1
## 85   85 0.8497892 1.3137351          1
## 86   86 0.7315108 1.7112728          1
## 87   87 1.2868578 1.0365225          1
## 88   88 1.2382604 1.1371517          1
## 89   89 0.7734909 0.9699200          1
## 90   90 1.2163963 1.1401624          1
## 91   91 1.5565348 1.0742591          1
## 92   92 0.8407294 1.1583621          1
## 93   93 0.6293467 1.3518135          1
## 94   94 0.9492449 1.1646492          1
## 95   95 1.0176978 0.8171222          1
## 96   96 0.7293823 1.4980478          1
## 97   97 1.1091045 0.9631714          1
## 98   98 1.1203562 0.5722272          1
## 99   99 1.0120280 0.6676874          1
## 100 100 0.8652859 0.9072811          1
## 101 101 2.2507390 1.9734441          2
## 102 102 1.7111898 1.4200256          2
## 103 103 1.9420641 2.1271098          2
## 104 104 1.8473650 2.2720518          2
## 105 105 2.3362771 1.5908843          2
## 106 106 1.7730098 2.5610983          2
## 107 107 1.9314666 2.3425966          2
## 108 108 1.5556423 2.2254545          2
## 109 109 1.7456221 2.0292042          2
## 110 110 2.2823768 2.0903847          2
## 111 111 1.6564114 1.7883869          2
## 112 112 2.0871240 2.0333653          2
## 113 113 2.3165806 1.5842302          2
## 114 114 2.0965648 2.0872073          2
## 115 115 1.4835689 1.8331407          2
## 116 116 2.5872570 2.2380236          2
## 117 117 2.6848959 2.2263639          2
## 118 118 2.0350289 2.1211310          2
## 119 119 1.7968176 2.5712147          2
## 120 120 2.3919229 1.7891205          2
## 121 121 2.0821343 1.8197605          2
## 122 122 2.4191544 2.2517732          2
## 123 123 2.4019949 1.7326654          2
## 124 124 2.5926465 2.5594668          2
## 125 125 1.9408834 2.3052861          2
## 126 126 1.5549761 1.9005603          2
## 127 127 2.1191262 1.6481778          2
## 128 128 2.1825576 1.8092879          2
## 129 129 2.0836692 2.3271355          2
## 130 130 1.9312853 2.2593607          2
## 131 131 1.9074311 2.1591312          2
## 132 132 2.0672354 1.3796532          2
## 133 133 1.8630051 1.9181871          2
## 134 134 2.1546865 2.5561643          2
## 135 135 1.5212970 2.4327141          2
## 136 136 1.6934812 1.5337411          2
## 137 137 2.0074968 1.9250483          2
## 138 138 2.3650280 2.4040551          2
## 139 139 1.9946912 1.8481383          2
## 140 140 2.4641530 2.3764864          2
## 141 141 1.6062787 2.0116725          2
## 142 142 1.9332370 1.9134960          2
## 143 143 1.6034844 2.1019813          2
## 144 144 1.8774210 2.0600012          2
## 145 145 1.6196067 1.6925646          2
## 146 146 2.5005191 1.8960383          2
## 147 147 1.5326075 1.9784592          2
## 148 148 2.1294801 2.0637206          2
## 149 149 1.7302110 1.6931117          2
## 150 150 1.7683858 2.0212679          2
## 151 151 1.6265316 2.2111318          2
## 152 152 1.6412212 1.8536057          2
## 153 153 2.0175564 1.9695532          2
## 154 154 1.4046822 1.7861228          2
## 155 155 1.7118952 1.8946449          2
## 156 156 2.0159079 2.2202097          2
## 157 157 1.9978108 1.9627807          2
## 158 158 1.7467426 2.2380811          2
## 159 159 1.9664537 1.8383967          2
## 160 160 2.4394158 1.9986309          2
## 161 161 2.3159907 2.0334511          2
## 162 162 1.4274191 2.0114730          2
## 163 163 1.7181576 1.8792156          2
## 164 164 1.7026063 1.8163556          2
## 165 165 1.5779771 2.2480382          2
## 166 166 1.9203426 2.3704888          2
## 167 167 2.0069635 1.8451700          2
## 168 168 1.8325267 1.7415076          2
## 169 169 1.3455124 1.7951035          2
## 170 170 1.5983914 1.7675893          2
## 171 171 1.9731544 2.0227249          2
## 172 172 2.4525061 1.8036962          2
## 173 173 1.5473594 1.9714050          2
## 174 174 1.9270898 2.0394617          2
## 175 175 1.9160513 1.8629466          2
## 176 176 1.5318938 2.3317852          2
## 177 177 1.4961472 1.8122467          2
## 178 178 1.8877376 1.8662566          2
## 179 179 1.8301150 1.9615677          2
## 180 180 1.8450436 2.1222798          2
## 181 181 2.2311471 1.8595561          2
## 182 182 2.0098754 2.6510669          2
## 183 183 2.1456931 2.1631415          2
## 184 184 1.9221451 1.8618524          2
## 185 185 2.3252137 2.2418015          2
## 186 186 2.2216200 1.9485285          2
## 187 187 2.0332722 1.4354763          2
## 188 188 1.8062872 2.0424980          2
## 189 189 2.1704762 1.9608621          2
## 190 190 2.2995006 1.4887770          2
## 191 191 1.3853784 1.7359899          2
## 192 192 2.7447421 2.0566132          2
## 193 193 2.0100253 1.7841727          2
## 194 194 2.1998917 1.7847640          2
## 195 195 1.8997662 2.2572201          2
## 196 196 2.1560595 1.5400715          2
## 197 197 2.1952147 1.8842155          2
## 198 198 1.9428259 1.7772452          2
## 199 199 1.8721822 2.1863995          2
## 200 200 2.2076150 1.9373723          2
## 201 201 2.9644982 3.3045560          3
## 202 202 3.0767623 3.0423814          3
## 203 203 3.3480945 2.9347146          3
## 204 204 2.9757784 2.6994756          3
## 205 205 3.6237907 3.0982942          3
## 206 206 3.1093692 2.5971876          3
## 207 207 2.9448006 3.0504244          3
## 208 208 3.4877780 2.6188103          3
## 209 209 3.2889513 2.8808945          3
## 210 210 3.3371604 3.6150125          3
## 211 211 2.6532187 3.2721277          3
## 212 212 2.7724586 2.7386915          3
## 213 213 2.6488393 3.0757640          3
## 214 214 2.2355128 3.4279864          3
## 215 215 2.8721251 3.8205650          3
## 216 216 2.5414902 3.3653067          3
## 217 217 3.1064884 2.9693804          3
## 218 218 3.3445566 3.1595375          3
## 219 219 2.6882466 3.0957165          3
## 220 220 3.3605355 2.8162665          3
## 221 221 3.1225768 2.5457724          3
## 222 222 2.9216233 2.8181887          3
## 223 223 3.3885296 2.6978747          3
## 224 224 2.9788114 3.0930984          3
## 225 225 2.9996620 2.9405271          3
## 226 226 2.9231382 3.0038614          3
## 227 227 3.1610709 2.9907439          3
## 228 228 3.1463583 3.1137368          3
## 229 229 2.8712579 3.4172526          3
## 230 230 2.6763562 3.1215626          3
## 231 231 2.7568793 3.3169666          3
## 232 232 2.6210659 3.0198710          3
## 233 233 2.3799133 3.1524617          3
## 234 234 3.1580385 2.9083142          3
## 235 235 3.6636855 3.0234735          3
## 236 236 3.4069018 2.7186596          3
## 237 237 2.8895418 3.2578791          3
## 238 238 2.8983658 2.8773267          3
## 239 239 3.0458218 3.6716847          3
## 240 240 3.1373504 2.9705692          3
## 241 241 2.7603036 3.2741140          3
## 242 242 2.5684575 2.9700122          3
## 243 243 3.1090833 2.9459835          3
## 244 244 2.7971848 2.8170710          3
## 245 245 3.2929566 2.8851342          3
## 246 246 3.1277096 3.1412690          3
## 247 247 2.9642676 2.8230784          3
## 248 248 3.4477851 3.5492766          3
## 249 249 2.9031761 2.9824638          3
## 250 250 3.0623919 3.0857292          3
## 251 251 3.0021596 3.3278480          3
## 252 252 2.9929373 3.3052969          3
## 253 253 3.1603051 2.9051714          3
## 254 254 3.4191972 2.8875159          3
## 255 255 3.0565207 2.8055598          3
## 256 256 3.3351354 2.5403902          3
## 257 257 3.5337701 3.1381065          3
## 258 258 3.0277587 3.1104109          3
## 259 259 3.2624869 3.0819888          3
## 260 260 2.8899357 2.2924429          3
## 261 261 2.8455557 3.4330489          3
## 262 262 2.9410421 2.5220921          3
## 263 263 3.0303561 3.3840035          3
## 264 264 3.1603250 2.6901277          3
## 265 265 3.1808988 3.2473704          3
## 266 266 3.3236162 3.2332316          3
## 267 267 2.9664698 2.8995253          3
## 268 268 2.8603979 2.8253417          3
## 269 269 3.1223213 2.8696162          3
## 270 270 3.0593400 3.1338066          3
## 271 271 2.7386037 3.4270862          3
## 272 272 3.0559020 3.2736268          3
## 273 273 3.3540388 2.8783431          3
## 274 274 2.9712758 2.8914531          3
## 275 275 2.7854939 3.1026714          3
## 276 276 2.8425763 3.0493717          3
## 277 277 2.8866258 2.6415335          3
## 278 278 3.6073911 2.9196142          3
## 279 279 2.7023403 2.7342157          3
## 280 280 3.2500848 3.2142301          3
## 281 281 3.2725901 3.1032279          3
## 282 282 3.3623681 3.1765918          3
## 283 283 3.1812447 3.2693132          3
## 284 284 2.4879224 2.5940344          3
## 285 285 2.3788675 3.1956744          3
## 286 286 3.2537871 3.2684014          3
## 287 287 3.0053145 3.1645367          3
## 288 288 2.7887662 3.2133506          3
## 289 289 2.7860445 3.1687454          3
## 290 290 3.3884620 2.7436036          3
## 291 291 2.8207429 3.2909459          3
## 292 292 2.6650897 3.2522069          3
## 293 293 2.8604777 2.9610793          3
## 294 294 2.6595696 2.8621988          3
## 295 295 3.6919376 3.1869565          3
## 296 296 2.6181407 2.7466759          3
## 297 297 2.7649412 2.9552225          3
## 298 298 3.1684679 3.2133397          3
## 299 299 2.4592127 2.7515784          3
## 300 300 3.3604407 3.3986271          3
## 301 301 3.8150552 3.8911655          4
## 302 302 3.8620399 4.0148583          4
## 303 303 3.9303883 4.4566098          4
## 304 304 3.4494426 3.7811612          4
## 305 305 3.5593164 4.0012256          4
## 306 306 4.6754920 3.9073560          4
## 307 307 4.1275032 3.9260513          4
## 308 308 3.4495542 4.2364470          4
## 309 309 4.1506832 4.6906092          4
## 310 310 4.2831032 4.4027792          4
## 311 311 4.0805069 3.8229632          4
## 312 312 3.7435101 3.9462776          4
## 313 313 3.4707019 3.6563803          4
## 314 314 3.6423924 4.7996343          4
## 315 315 4.3845184 4.2347890          4
## 316 316 4.9294786 4.3220426          4
## 317 317 4.1385435 4.1449025          4
## 318 318 3.8126941 3.6681342          4
## 319 319 3.9630498 3.7205179          4
## 320 320 3.7301834 3.5606677          4
## 321 321 4.4614430 4.4918087          4
## 322 322 3.7391237 4.2120136          4
## 323 323 4.5335820 3.8693868          4
## 324 324 4.0891795 4.2719885          4
## 325 325 3.9966463 3.9687613          4
## 326 326 3.7952514 4.0315092          4
## 327 327 3.7472274 4.1969199          4
## 328 328 4.1297671 4.3557411          4
## 329 329 4.1715826 4.1925438          4
## 330 330 3.6592755 3.7783528          4
## 331 331 3.6837094 4.4049570          4
## 332 332 4.2188747 4.2994978          4
## 333 333 4.1829662 4.7916938          4
## 334 334 4.2188903 4.3751630          4
## 335 335 3.9010871 3.5328454          4
## 336 336 3.3571560 4.0663318          4
## 337 337 4.3217689 3.9261015          4
## 338 338 4.0917867 4.0564099          4
## 339 339 3.6259036 3.7131891          4
## 340 340 3.1774542 3.7296186          4
## 341 341 4.2516770 3.7832621          4
## 342 342 4.3521115 4.0404713          4
## 343 343 4.0647554 3.9041629          4
## 344 344 4.2476680 4.1827443          4
## 345 345 3.5262697 4.5034723          4
## 346 346 4.1090812 4.1956417          4
## 347 347 4.3368593 4.2822037          4
## 348 348 3.8414390 3.7130584          4
## 349 349 3.8273510 4.5809608          4
## 350 350 4.1962328 4.2749378          4
## 351 351 3.8323661 4.0047422          4
## 352 352 4.0595610 4.0291594          4
## 353 353 3.9139591 3.6938850          4
## 354 354 4.0633317 3.5810512          4
## 355 355 4.0211601 3.9291616          4
## 356 356 3.4055907 4.2089805          4
## 357 357 4.0383264 4.0610310          4
## 358 358 4.0023825 3.7142438          4
## 359 359 3.9555421 3.5568174          4
## 360 360 3.8696491 4.1284379          4
## 361 361 3.5307229 4.0540466          4
## 362 362 4.0532103 4.0004110          4
## 363 363 4.4426359 3.7634205          4
## 364 364 3.7439423 4.5551217          4
## 365 365 3.5727560 3.9462284          4
## 366 366 3.7386382 3.9048182          4
## 367 367 3.9806303 4.2083876          4
## 368 368 3.9196637 4.1747687          4
## 369 369 4.0454099 3.6438132          4
## 370 370 4.1605602 3.3823816          4
## 371 371 4.3511813 3.6806420          4
## 372 372 3.8349338 3.7167995          4
## 373 373 4.0369799 4.2736769          4
## 374 374 4.5103537 3.9703504          4
## 375 375 4.1557439 3.9223041          4
## 376 376 4.0623563 3.5519397          4
## 377 377 3.7816710 4.4896374          4
## 378 378 3.5810105 4.3806533          4
## 379 379 4.1300465 3.6097102          4
## 380 380 4.0786997 4.5747733          4
## 381 381 4.3724580 3.7500087          4
## 382 382 4.2635479 3.8884393          4
## 383 383 3.8538995 3.9490199          4
## 384 384 4.2875971 4.3024995          4
## 385 385 3.8019310 3.8387001          4
## 386 386 4.0597019 3.8668607          4
## 387 387 4.3921892 3.9147553          4
## 388 388 4.4878683 3.8997774          4
## 389 389 3.9414044 3.4991334          4
## 390 390 4.2047905 3.6156824          4
## 391 391 3.7232433 4.2947780          4
## 392 392 4.0337634 3.9896309          4
## 393 393 4.2602384 4.0058069          4
## 394 394 4.3666546 4.1811769          4
## 395 395 4.4041752 3.4869136          4
## 396 396 4.5298810 3.9097011          4
## 397 397 4.5461991 4.0606491          4
## 398 398 3.8435015 3.4712584          4
## 399 399 3.7759167 3.8559521          4
## 400 400 3.9367632 3.6361123          4
## 401 401 5.1959132 4.7043474          5
## 402 402 4.9710391 5.2283173          5
## 403 403 5.2799763 5.0112050          5
## 404 404 5.6039455 5.0396016          5
## 405 405 5.5179100 4.5518811          5
## 406 406 4.9051777 5.1580281          5
## 407 407 4.8888652 5.0582977          5
## 408 408 4.8590585 4.8616592          5
## 409 409 4.7994401 4.9630736          5
## 410 410 5.1173934 5.1073318          5
## 411 411 4.7668870 4.5677399          5
## 412 412 5.4565015 4.4270640          5
## 413 413 5.2758851 4.8787421          5
## 414 414 5.6392475 4.6031895          5
## 415 415 4.5688112 5.3011705          5
## 416 416 4.6240364 4.6747033          5
## 417 417 5.0268785 4.4299915          5
## 418 418 4.6862224 5.1837046          5
## 419 419 5.7793721 4.6168303          5
## 420 420 5.0485462 4.9370302          5
## 421 421 4.8721570 5.5486141          5
## 422 422 4.9258209 5.0644702          5
## 423 423 5.1591566 4.9150393          5
## 424 424 5.2794576 4.8490151          5
## 425 425 4.8533669 5.1182890          5
## 426 426 4.6015747 4.9464760          5
## 427 427 5.4087635 5.1468993          5
## 428 428 4.9965817 4.7661751          5
## 429 429 4.9084920 5.3954795          5
## 430 430 4.8831154 5.1394959          5
## 431 431 4.6306043 5.2673779          5
## 432 432 5.5279933 5.0271293          5
## 433 433 5.2391559 4.9783583          5
## 434 434 4.8532624 5.3298015          5
## 435 435 4.5080321 4.7898259          5
## 436 436 4.4412752 4.4000539          5
## 437 437 4.5036700 5.3610643          5
## 438 438 5.1605864 5.0310968          5
## 439 439 5.1678780 5.1732393          5
## 440 440 4.5366550 4.7473369          5
## 441 441 4.7146401 5.0213202          5
## 442 442 4.8810275 5.0811272          5
## 443 443 5.0529390 5.0122737          5
## 444 444 4.8143399 4.9662499          5
## 445 445 4.7348780 5.0355672          5
## 446 446 5.3251666 5.2523205          5
## 447 447 5.4223449 4.5777824          5
## 448 448 4.5660183 5.7456791          5
## 449 449 5.1397081 4.5611519          5
## 450 450 4.9501835 4.9273732          5
## 451 451 4.7763388 4.6618955          5
## 452 452 4.5096301 5.6182603          5
## 453 453 4.9949883 5.4341986          5
## 454 454 5.1945799 4.7561842          5
## 455 455 4.9441766 5.2289558          5
## 456 456 4.7580814 4.6930924          5
## 457 457 4.9361521 4.7071678          5
## 458 458 5.1024525 5.2535712          5
## 459 459 5.0841455 4.9981395          5
## 460 460 4.9661352 4.6498650          5
## 461 461 5.4798362 5.1375566          5
## 462 462 4.5382840 5.2913328          5
## 463 463 5.0506000 4.8869843          5
## 464 464 5.0524429 5.4307025          5
## 465 465 5.4705519 5.0203635          5
## 466 466 5.1896942 4.9896229          5
## 467 467 5.0728852 5.0661373          5
## 468 468 4.6308796 4.6952349          5
## 469 469 5.3343800 4.9137657          5
## 470 470 5.2598079 5.0505460          5
## 471 471 4.3349339 5.4654376          5
## 472 472 4.9278000 4.9821169          5
## 473 473 4.8608547 4.2846235          5
## 474 474 5.2351332 5.0325038          5
## 475 475 5.1462362 4.9174674          5
## 476 476 5.0051903 4.8017315          5
## 477 477 4.8004298 5.0220797          5
## 478 478 4.8450263 4.8948155          5
## 479 479 5.1140468 4.2417863          5
## 480 480 5.4210548 4.8498863          5
## 481 481 5.1201761 4.5085377          5
## 482 482 5.6491915 5.2044117          5
## 483 483 4.9122083 5.0925686          5
## 484 484 4.8756856 4.9103697          5
## 485 485 4.3647099 5.0351246          5
## 486 486 5.2045482 4.7066891          5
## 487 487 5.1872291 5.1087911          5
## 488 488 4.5548826 5.2882998          5
## 489 489 5.2269490 4.8130265          5
## 490 490 4.9194575 5.0309199          5
## 491 491 5.6167773 5.2426163          5
## 492 492 5.2150903 5.0670556          5
## 493 493 4.9006303 4.6887966          5
## 494 494 5.2478889 4.9758907          5
## 495 495 4.6539141 5.4967413          5
## 496 496 4.9108693 4.7573776          5
## 497 497 5.0094066 5.1521305          5
## 498 498 4.9779951 4.7643682          5
## 499 499 5.2422127 5.1636813          5
## 500 500 4.6108840 4.8566559          5

This is how our data looks like:

data %>% ggplot(aes(x = V1, y = V2, color = factor(true_clust))) +
    geom_point() +
    scale_color_discrete(name = "true cluster")

Now we can cluster it using kmeans++:

data_for_clust <- data %>% select(id, starts_with("V"))
km <- TGL_kmeans_tidy(data_for_clust,
    k = 5,
    metric = "euclid",
    verbose = TRUE
)
## id column: id
## KMEans: will generate seeds
## KMeans into generate seeds
## at seed 0
## add new core from 295 to 0
## at seed 1
## done update min distance
## seed range 350 450
## picked up 466 dist was 1.60439
## add new core from 466 to 1
## at seed 2
## done update min distance
## seed range 300 400
## picked up 22 dist was 1.2156
## add new core from 22 to 2
## at seed 3
## done update min distance
## seed range 250 350
## picked up 388 dist was 0.674026
## add new core from 388 to 3
## at seed 4
## done update min distance
## seed range 200 300
## picked up 107 dist was 0.637651
## add new core from 107 to 4
## KMEans: reassign after init
## KMEans: iter 0
## KMEans: iter 1 changed 7
## KMEans: iter 1
## KMEans: iter 2 changed 9
## KMEans: iter 2
## KMEans: iter 3 changed 2
## KMEans: iter 3
## KMEans: iter 4 changed 0

The returned list contains 3 fields:

names(km)
## [1] "centers" "cluster" "size"

km$centers contains a tibble with clust column and the cluster centers:

km$centers
## # A tibble: 5 × 3
##   clust    V1    V2
##   <int> <dbl> <dbl>
## 1     1 5.01   4.97
## 2     2 1.96   1.99
## 3     3 4.01   4.01
## 4     4 3.02   3.04
## 5     5 0.996  1.07

clusters are numbered according to order_func (see ‘Custom cluster ordering’ section).

km$cluster contains tibble with id column with the observation id (1:n if no id column was supplied), and clust column with the observation assigned cluster:

km$cluster
## # A tibble: 500 × 2
##    id    clust
##    <chr> <int>
##  1 1         5
##  2 2         5
##  3 3         5
##  4 4         5
##  5 5         5
##  6 6         5
##  7 7         5
##  8 8         5
##  9 9         5
## 10 10        5
## # ℹ 490 more rows

km$size contains tibble with clust column and n column with the number of points in each cluster:

km$size
## # A tibble: 5 × 2
##   clust     n
##   <int> <int>
## 1     1   100
## 2     2    99
## 3     3    99
## 4     4   102
## 5     5   100

We can now check our clustering performance - fraction of observations that were classified correctly (Note that match_clusters function is internal to the package and is used only in this vignette):

d <- tglkmeans:::match_clusters(data, km, 5)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.992

And plot the results:

d %>% ggplot(aes(x = V1, y = V2, color = factor(new_clust), shape = factor(true_clust))) +
    geom_point() +
    scale_color_discrete(name = "cluster") +
    scale_shape_discrete(name = "true cluster") +
    geom_point(data = km$centers, size = 7, color = "black", shape = "X")

Custom cluster ordering

By default, the clusters where ordered using the following function: hclust(dist(cor(t(centers)))) - hclust of the euclidean distance of the correlation matrix of the centers.

We can supply our own function to order the clusters using reorder_func argument. The function would be applied to each center and he clusters would be ordered by the result.

km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
    k = 5,
    metric = "euclid",
    verbose = FALSE,
    reorder_func = median
)
km$centers
## # A tibble: 5 × 3
##   clust    V1    V2
##   <int> <dbl> <dbl>
## 1     1 0.996  1.07
## 2     2 1.96   1.99
## 3     3 3.02   3.04
## 4     4 4.01   4.01
## 5     5 5.01   4.97

Missing data

tglkmeans can deal with missing data, as long as at least one dimension is not missing. for example:

data$V1[sample(1:nrow(data), round(nrow(data) * 0.2))] <- NA
data
##      id        V1        V2 true_clust
## 1     1 1.0261098 0.8789749          1
## 2     2 1.2082016 1.3878250          1
## 3     3 0.4074118 1.3766459          1
## 4     4 0.6051425 0.6178642          1
## 5     5 0.9524910 1.1564759          1
## 6     6 1.0521462 1.1003523          1
## 7     7 1.0824339 0.9615825          1
## 8     8 1.1844157 1.1233981          1
## 9     9 1.0455999 0.7943182          1
## 10   10 0.7854149 1.0792299          1
## 11   11 1.0210792 1.0148444          1
## 12   12        NA 1.7059383          1
## 13   13 1.1874463 1.3096575          1
## 14   14 0.8660050 1.4647951          1
## 15   15 0.6774450 1.2602245          1
## 16   16 1.1722299 1.2319227          1
## 17   17 0.5210028 1.3175402          1
## 18   18 0.8446220 0.9294415          1
## 19   19        NA 1.3028062          1
## 20   20 1.0987512 1.1828475          1
## 21   21 0.7061653 0.9081029          1
## 22   22 0.8097757 1.5410117          1
## 23   23 1.0355111 1.1280316          1
## 24   24 0.7933902 0.9487525          1
## 25   25 0.9196697 0.5746029          1
## 26   26 1.1158750 0.9110239          1
## 27   27 1.0225545 1.2589772          1
## 28   28        NA 1.0978835          1
## 29   29        NA 0.8262509          1
## 30   30 0.8374893 0.8430044          1
## 31   31 1.2817694 1.0326764          1
## 32   32 0.6429815 1.0066302          1
## 33   33        NA 0.8143470          1
## 34   34 1.1554849 0.7581531          1
## 35   35 0.7382609 0.6445084          1
## 36   36 0.6174064 1.5658486          1
## 37   37 1.2322509 0.9360519          1
## 38   38 1.0664780 0.4662921          1
## 39   39 1.5402956 0.9916549          1
## 40   40 0.9002048 1.1083222          1
## 41   41 0.6686538 0.8906331          1
## 42   42 1.1649300 0.5197539          1
## 43   43 0.4598309 1.3919587          1
## 44   44 1.2098375 0.9882193          1
## 45   45 1.6444289 1.1863258          1
## 46   46 0.9009659 0.8880477          1
## 47   47 1.1907833 0.8702168          1
## 48   48 0.6477844 0.9888838          1
## 49   49        NA 1.4506953          1
## 50   50 0.9209193 1.3881039          1
## 51   51        NA 1.1531862          1
## 52   52 1.0165386 1.5388289          1
## 53   53 1.3660712 0.8130430          1
## 54   54 0.7028710 0.5542610          1
## 55   55 1.3475285 1.6116860          1
## 56   56 1.4838698 0.8363814          1
## 57   57 1.4701719 1.0088474          1
## 58   58 0.8452572 1.0180214          1
## 59   59 1.3778488 1.1640458          1
## 60   60        NA 1.1141623          1
## 61   61 1.5977919 1.1992580          1
## 62   62 1.3806729 0.8147248          1
## 63   63        NA 1.1848853          1
## 64   64 0.8444858 1.0577277          1
## 65   65 0.4775080 1.1730890          1
## 66   66        NA 1.7271554          1
## 67   67 0.8116216 0.9659902          1
## 68   68 0.9547799 1.1695377          1
## 69   69 1.1267423 0.8728154          1
## 70   70 0.7633143 0.9193829          1
## 71   71 1.1200924 1.0618220          1
## 72   72        NA 1.3197330          1
## 73   73        NA 0.8181071          1
## 74   74 0.6103232 1.2971236          1
## 75   75 1.0733139 0.6211208          1
## 76   76 1.3590314 0.7965386          1
## 77   77 1.0956969 0.9418009          1
## 78   78 0.7798725 1.0848282          1
## 79   79 0.8840384 1.2756488          1
## 80   80 0.9639136 1.0888227          1
## 81   81        NA 0.8680878          1
## 82   82 1.3777760 0.5978993          1
## 83   83        NA 1.2930153          1
## 84   84 0.6977858 1.3715870          1
## 85   85        NA 1.3137351          1
## 86   86 0.7315108 1.7112728          1
## 87   87 1.2868578 1.0365225          1
## 88   88 1.2382604 1.1371517          1
## 89   89 0.7734909 0.9699200          1
## 90   90 1.2163963 1.1401624          1
## 91   91 1.5565348 1.0742591          1
## 92   92 0.8407294 1.1583621          1
## 93   93 0.6293467 1.3518135          1
## 94   94 0.9492449 1.1646492          1
## 95   95 1.0176978 0.8171222          1
## 96   96 0.7293823 1.4980478          1
## 97   97        NA 0.9631714          1
## 98   98 1.1203562 0.5722272          1
## 99   99 1.0120280 0.6676874          1
## 100 100 0.8652859 0.9072811          1
## 101 101 2.2507390 1.9734441          2
## 102 102 1.7111898 1.4200256          2
## 103 103 1.9420641 2.1271098          2
## 104 104 1.8473650 2.2720518          2
## 105 105        NA 1.5908843          2
## 106 106 1.7730098 2.5610983          2
## 107 107 1.9314666 2.3425966          2
## 108 108 1.5556423 2.2254545          2
## 109 109        NA 2.0292042          2
## 110 110 2.2823768 2.0903847          2
## 111 111 1.6564114 1.7883869          2
## 112 112 2.0871240 2.0333653          2
## 113 113 2.3165806 1.5842302          2
## 114 114 2.0965648 2.0872073          2
## 115 115        NA 1.8331407          2
## 116 116 2.5872570 2.2380236          2
## 117 117        NA 2.2263639          2
## 118 118 2.0350289 2.1211310          2
## 119 119        NA 2.5712147          2
## 120 120        NA 1.7891205          2
## 121 121 2.0821343 1.8197605          2
## 122 122 2.4191544 2.2517732          2
## 123 123 2.4019949 1.7326654          2
## 124 124        NA 2.5594668          2
## 125 125 1.9408834 2.3052861          2
## 126 126 1.5549761 1.9005603          2
## 127 127 2.1191262 1.6481778          2
## 128 128        NA 1.8092879          2
## 129 129 2.0836692 2.3271355          2
## 130 130        NA 2.2593607          2
## 131 131 1.9074311 2.1591312          2
## 132 132        NA 1.3796532          2
## 133 133 1.8630051 1.9181871          2
## 134 134 2.1546865 2.5561643          2
## 135 135 1.5212970 2.4327141          2
## 136 136 1.6934812 1.5337411          2
## 137 137 2.0074968 1.9250483          2
## 138 138 2.3650280 2.4040551          2
## 139 139 1.9946912 1.8481383          2
## 140 140 2.4641530 2.3764864          2
## 141 141 1.6062787 2.0116725          2
## 142 142 1.9332370 1.9134960          2
## 143 143        NA 2.1019813          2
## 144 144 1.8774210 2.0600012          2
## 145 145        NA 1.6925646          2
## 146 146        NA 1.8960383          2
## 147 147 1.5326075 1.9784592          2
## 148 148        NA 2.0637206          2
## 149 149 1.7302110 1.6931117          2
## 150 150 1.7683858 2.0212679          2
## 151 151 1.6265316 2.2111318          2
## 152 152 1.6412212 1.8536057          2
## 153 153 2.0175564 1.9695532          2
## 154 154 1.4046822 1.7861228          2
## 155 155 1.7118952 1.8946449          2
## 156 156        NA 2.2202097          2
## 157 157 1.9978108 1.9627807          2
## 158 158 1.7467426 2.2380811          2
## 159 159 1.9664537 1.8383967          2
## 160 160 2.4394158 1.9986309          2
## 161 161 2.3159907 2.0334511          2
## 162 162 1.4274191 2.0114730          2
## 163 163 1.7181576 1.8792156          2
## 164 164 1.7026063 1.8163556          2
## 165 165 1.5779771 2.2480382          2
## 166 166 1.9203426 2.3704888          2
## 167 167 2.0069635 1.8451700          2
## 168 168 1.8325267 1.7415076          2
## 169 169 1.3455124 1.7951035          2
## 170 170 1.5983914 1.7675893          2
## 171 171 1.9731544 2.0227249          2
## 172 172        NA 1.8036962          2
## 173 173 1.5473594 1.9714050          2
## 174 174 1.9270898 2.0394617          2
## 175 175 1.9160513 1.8629466          2
## 176 176 1.5318938 2.3317852          2
## 177 177        NA 1.8122467          2
## 178 178 1.8877376 1.8662566          2
## 179 179 1.8301150 1.9615677          2
## 180 180 1.8450436 2.1222798          2
## 181 181 2.2311471 1.8595561          2
## 182 182 2.0098754 2.6510669          2
## 183 183 2.1456931 2.1631415          2
## 184 184 1.9221451 1.8618524          2
## 185 185        NA 2.2418015          2
## 186 186 2.2216200 1.9485285          2
## 187 187 2.0332722 1.4354763          2
## 188 188 1.8062872 2.0424980          2
## 189 189        NA 1.9608621          2
## 190 190 2.2995006 1.4887770          2
## 191 191 1.3853784 1.7359899          2
## 192 192 2.7447421 2.0566132          2
## 193 193 2.0100253 1.7841727          2
## 194 194 2.1998917 1.7847640          2
## 195 195        NA 2.2572201          2
## 196 196 2.1560595 1.5400715          2
## 197 197 2.1952147 1.8842155          2
## 198 198 1.9428259 1.7772452          2
## 199 199        NA 2.1863995          2
## 200 200 2.2076150 1.9373723          2
## 201 201        NA 3.3045560          3
## 202 202        NA 3.0423814          3
## 203 203 3.3480945 2.9347146          3
## 204 204 2.9757784 2.6994756          3
## 205 205 3.6237907 3.0982942          3
## 206 206 3.1093692 2.5971876          3
## 207 207 2.9448006 3.0504244          3
## 208 208 3.4877780 2.6188103          3
## 209 209 3.2889513 2.8808945          3
## 210 210 3.3371604 3.6150125          3
## 211 211 2.6532187 3.2721277          3
## 212 212 2.7724586 2.7386915          3
## 213 213        NA 3.0757640          3
## 214 214        NA 3.4279864          3
## 215 215 2.8721251 3.8205650          3
## 216 216        NA 3.3653067          3
## 217 217 3.1064884 2.9693804          3
## 218 218 3.3445566 3.1595375          3
## 219 219 2.6882466 3.0957165          3
## 220 220 3.3605355 2.8162665          3
## 221 221        NA 2.5457724          3
## 222 222 2.9216233 2.8181887          3
## 223 223 3.3885296 2.6978747          3
## 224 224        NA 3.0930984          3
## 225 225 2.9996620 2.9405271          3
## 226 226 2.9231382 3.0038614          3
## 227 227 3.1610709 2.9907439          3
## 228 228 3.1463583 3.1137368          3
## 229 229 2.8712579 3.4172526          3
## 230 230 2.6763562 3.1215626          3
## 231 231 2.7568793 3.3169666          3
## 232 232 2.6210659 3.0198710          3
## 233 233        NA 3.1524617          3
## 234 234        NA 2.9083142          3
## 235 235 3.6636855 3.0234735          3
## 236 236 3.4069018 2.7186596          3
## 237 237 2.8895418 3.2578791          3
## 238 238 2.8983658 2.8773267          3
## 239 239 3.0458218 3.6716847          3
## 240 240 3.1373504 2.9705692          3
## 241 241 2.7603036 3.2741140          3
## 242 242 2.5684575 2.9700122          3
## 243 243 3.1090833 2.9459835          3
## 244 244 2.7971848 2.8170710          3
## 245 245 3.2929566 2.8851342          3
## 246 246 3.1277096 3.1412690          3
## 247 247        NA 2.8230784          3
## 248 248        NA 3.5492766          3
## 249 249        NA 2.9824638          3
## 250 250 3.0623919 3.0857292          3
## 251 251        NA 3.3278480          3
## 252 252 2.9929373 3.3052969          3
## 253 253        NA 2.9051714          3
## 254 254 3.4191972 2.8875159          3
## 255 255 3.0565207 2.8055598          3
## 256 256 3.3351354 2.5403902          3
## 257 257        NA 3.1381065          3
## 258 258 3.0277587 3.1104109          3
## 259 259 3.2624869 3.0819888          3
## 260 260 2.8899357 2.2924429          3
## 261 261 2.8455557 3.4330489          3
## 262 262 2.9410421 2.5220921          3
## 263 263 3.0303561 3.3840035          3
## 264 264        NA 2.6901277          3
## 265 265        NA 3.2473704          3
## 266 266 3.3236162 3.2332316          3
## 267 267 2.9664698 2.8995253          3
## 268 268 2.8603979 2.8253417          3
## 269 269 3.1223213 2.8696162          3
## 270 270 3.0593400 3.1338066          3
## 271 271 2.7386037 3.4270862          3
## 272 272 3.0559020 3.2736268          3
## 273 273 3.3540388 2.8783431          3
## 274 274 2.9712758 2.8914531          3
## 275 275 2.7854939 3.1026714          3
## 276 276 2.8425763 3.0493717          3
## 277 277 2.8866258 2.6415335          3
## 278 278 3.6073911 2.9196142          3
## 279 279 2.7023403 2.7342157          3
## 280 280 3.2500848 3.2142301          3
## 281 281 3.2725901 3.1032279          3
## 282 282        NA 3.1765918          3
## 283 283 3.1812447 3.2693132          3
## 284 284        NA 2.5940344          3
## 285 285 2.3788675 3.1956744          3
## 286 286 3.2537871 3.2684014          3
## 287 287 3.0053145 3.1645367          3
## 288 288 2.7887662 3.2133506          3
## 289 289 2.7860445 3.1687454          3
## 290 290 3.3884620 2.7436036          3
## 291 291 2.8207429 3.2909459          3
## 292 292 2.6650897 3.2522069          3
## 293 293 2.8604777 2.9610793          3
## 294 294        NA 2.8621988          3
## 295 295 3.6919376 3.1869565          3
## 296 296 2.6181407 2.7466759          3
## 297 297 2.7649412 2.9552225          3
## 298 298 3.1684679 3.2133397          3
## 299 299 2.4592127 2.7515784          3
## 300 300 3.3604407 3.3986271          3
## 301 301        NA 3.8911655          4
## 302 302        NA 4.0148583          4
## 303 303 3.9303883 4.4566098          4
## 304 304 3.4494426 3.7811612          4
## 305 305 3.5593164 4.0012256          4
## 306 306 4.6754920 3.9073560          4
## 307 307 4.1275032 3.9260513          4
## 308 308        NA 4.2364470          4
## 309 309 4.1506832 4.6906092          4
## 310 310 4.2831032 4.4027792          4
## 311 311 4.0805069 3.8229632          4
## 312 312 3.7435101 3.9462776          4
## 313 313        NA 3.6563803          4
## 314 314 3.6423924 4.7996343          4
## 315 315 4.3845184 4.2347890          4
## 316 316 4.9294786 4.3220426          4
## 317 317 4.1385435 4.1449025          4
## 318 318 3.8126941 3.6681342          4
## 319 319 3.9630498 3.7205179          4
## 320 320        NA 3.5606677          4
## 321 321 4.4614430 4.4918087          4
## 322 322 3.7391237 4.2120136          4
## 323 323        NA 3.8693868          4
## 324 324 4.0891795 4.2719885          4
## 325 325 3.9966463 3.9687613          4
## 326 326 3.7952514 4.0315092          4
## 327 327 3.7472274 4.1969199          4
## 328 328 4.1297671 4.3557411          4
## 329 329 4.1715826 4.1925438          4
## 330 330 3.6592755 3.7783528          4
## 331 331        NA 4.4049570          4
## 332 332        NA 4.2994978          4
## 333 333 4.1829662 4.7916938          4
## 334 334 4.2188903 4.3751630          4
## 335 335 3.9010871 3.5328454          4
## 336 336        NA 4.0663318          4
## 337 337 4.3217689 3.9261015          4
## 338 338 4.0917867 4.0564099          4
## 339 339 3.6259036 3.7131891          4
## 340 340 3.1774542 3.7296186          4
## 341 341 4.2516770 3.7832621          4
## 342 342        NA 4.0404713          4
## 343 343        NA 3.9041629          4
## 344 344 4.2476680 4.1827443          4
## 345 345 3.5262697 4.5034723          4
## 346 346 4.1090812 4.1956417          4
## 347 347        NA 4.2822037          4
## 348 348 3.8414390 3.7130584          4
## 349 349 3.8273510 4.5809608          4
## 350 350 4.1962328 4.2749378          4
## 351 351 3.8323661 4.0047422          4
## 352 352        NA 4.0291594          4
## 353 353 3.9139591 3.6938850          4
## 354 354 4.0633317 3.5810512          4
## 355 355 4.0211601 3.9291616          4
## 356 356 3.4055907 4.2089805          4
## 357 357 4.0383264 4.0610310          4
## 358 358        NA 3.7142438          4
## 359 359 3.9555421 3.5568174          4
## 360 360 3.8696491 4.1284379          4
## 361 361 3.5307229 4.0540466          4
## 362 362        NA 4.0004110          4
## 363 363 4.4426359 3.7634205          4
## 364 364        NA 4.5551217          4
## 365 365        NA 3.9462284          4
## 366 366 3.7386382 3.9048182          4
## 367 367 3.9806303 4.2083876          4
## 368 368 3.9196637 4.1747687          4
## 369 369 4.0454099 3.6438132          4
## 370 370 4.1605602 3.3823816          4
## 371 371 4.3511813 3.6806420          4
## 372 372 3.8349338 3.7167995          4
## 373 373 4.0369799 4.2736769          4
## 374 374        NA 3.9703504          4
## 375 375 4.1557439 3.9223041          4
## 376 376 4.0623563 3.5519397          4
## 377 377 3.7816710 4.4896374          4
## 378 378 3.5810105 4.3806533          4
## 379 379        NA 3.6097102          4
## 380 380 4.0786997 4.5747733          4
## 381 381 4.3724580 3.7500087          4
## 382 382 4.2635479 3.8884393          4
## 383 383        NA 3.9490199          4
## 384 384 4.2875971 4.3024995          4
## 385 385 3.8019310 3.8387001          4
## 386 386 4.0597019 3.8668607          4
## 387 387 4.3921892 3.9147553          4
## 388 388 4.4878683 3.8997774          4
## 389 389        NA 3.4991334          4
## 390 390        NA 3.6156824          4
## 391 391 3.7232433 4.2947780          4
## 392 392        NA 3.9896309          4
## 393 393 4.2602384 4.0058069          4
## 394 394 4.3666546 4.1811769          4
## 395 395 4.4041752 3.4869136          4
## 396 396 4.5298810 3.9097011          4
## 397 397        NA 4.0606491          4
## 398 398 3.8435015 3.4712584          4
## 399 399 3.7759167 3.8559521          4
## 400 400 3.9367632 3.6361123          4
## 401 401 5.1959132 4.7043474          5
## 402 402 4.9710391 5.2283173          5
## 403 403 5.2799763 5.0112050          5
## 404 404 5.6039455 5.0396016          5
## 405 405 5.5179100 4.5518811          5
## 406 406 4.9051777 5.1580281          5
## 407 407 4.8888652 5.0582977          5
## 408 408 4.8590585 4.8616592          5
## 409 409 4.7994401 4.9630736          5
## 410 410 5.1173934 5.1073318          5
## 411 411 4.7668870 4.5677399          5
## 412 412 5.4565015 4.4270640          5
## 413 413 5.2758851 4.8787421          5
## 414 414        NA 4.6031895          5
## 415 415 4.5688112 5.3011705          5
## 416 416 4.6240364 4.6747033          5
## 417 417 5.0268785 4.4299915          5
## 418 418 4.6862224 5.1837046          5
## 419 419 5.7793721 4.6168303          5
## 420 420 5.0485462 4.9370302          5
## 421 421        NA 5.5486141          5
## 422 422 4.9258209 5.0644702          5
## 423 423 5.1591566 4.9150393          5
## 424 424 5.2794576 4.8490151          5
## 425 425 4.8533669 5.1182890          5
## 426 426        NA 4.9464760          5
## 427 427 5.4087635 5.1468993          5
## 428 428 4.9965817 4.7661751          5
## 429 429 4.9084920 5.3954795          5
## 430 430        NA 5.1394959          5
## 431 431 4.6306043 5.2673779          5
## 432 432 5.5279933 5.0271293          5
## 433 433        NA 4.9783583          5
## 434 434        NA 5.3298015          5
## 435 435 4.5080321 4.7898259          5
## 436 436 4.4412752 4.4000539          5
## 437 437 4.5036700 5.3610643          5
## 438 438        NA 5.0310968          5
## 439 439 5.1678780 5.1732393          5
## 440 440 4.5366550 4.7473369          5
## 441 441 4.7146401 5.0213202          5
## 442 442        NA 5.0811272          5
## 443 443        NA 5.0122737          5
## 444 444 4.8143399 4.9662499          5
## 445 445 4.7348780 5.0355672          5
## 446 446 5.3251666 5.2523205          5
## 447 447 5.4223449 4.5777824          5
## 448 448 4.5660183 5.7456791          5
## 449 449 5.1397081 4.5611519          5
## 450 450        NA 4.9273732          5
## 451 451 4.7763388 4.6618955          5
## 452 452        NA 5.6182603          5
## 453 453 4.9949883 5.4341986          5
## 454 454 5.1945799 4.7561842          5
## 455 455 4.9441766 5.2289558          5
## 456 456        NA 4.6930924          5
## 457 457 4.9361521 4.7071678          5
## 458 458 5.1024525 5.2535712          5
## 459 459 5.0841455 4.9981395          5
## 460 460 4.9661352 4.6498650          5
## 461 461        NA 5.1375566          5
## 462 462 4.5382840 5.2913328          5
## 463 463 5.0506000 4.8869843          5
## 464 464 5.0524429 5.4307025          5
## 465 465        NA 5.0203635          5
## 466 466 5.1896942 4.9896229          5
## 467 467 5.0728852 5.0661373          5
## 468 468 4.6308796 4.6952349          5
## 469 469 5.3343800 4.9137657          5
## 470 470 5.2598079 5.0505460          5
## 471 471 4.3349339 5.4654376          5
## 472 472 4.9278000 4.9821169          5
## 473 473 4.8608547 4.2846235          5
## 474 474 5.2351332 5.0325038          5
## 475 475 5.1462362 4.9174674          5
## 476 476        NA 4.8017315          5
## 477 477 4.8004298 5.0220797          5
## 478 478 4.8450263 4.8948155          5
## 479 479 5.1140468 4.2417863          5
## 480 480 5.4210548 4.8498863          5
## 481 481        NA 4.5085377          5
## 482 482 5.6491915 5.2044117          5
## 483 483        NA 5.0925686          5
## 484 484        NA 4.9103697          5
## 485 485 4.3647099 5.0351246          5
## 486 486 5.2045482 4.7066891          5
## 487 487 5.1872291 5.1087911          5
## 488 488 4.5548826 5.2882998          5
## 489 489 5.2269490 4.8130265          5
## 490 490 4.9194575 5.0309199          5
## 491 491 5.6167773 5.2426163          5
## 492 492        NA 5.0670556          5
## 493 493 4.9006303 4.6887966          5
## 494 494 5.2478889 4.9758907          5
## 495 495 4.6539141 5.4967413          5
## 496 496 4.9108693 4.7573776          5
## 497 497 5.0094066 5.1521305          5
## 498 498 4.9779951 4.7643682          5
## 499 499 5.2422127 5.1636813          5
## 500 500 4.6108840 4.8566559          5
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
    k = 5,
    metric = "euclid",
    verbose = FALSE
)
d <- tglkmeans:::match_clusters(data, km, 5)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.978

and plotting the results (without the NA’s) we get:

d %>% ggplot(aes(x = V1, y = V2, color = factor(new_clust), shape = factor(true_clust))) +
    geom_point() +
    scale_color_discrete(name = "cluster") +
    scale_shape_discrete(name = "true cluster") +
    geom_point(data = km$centers, size = 7, color = "black", shape = "X")
## Warning: Removed 100 rows containing missing values (`geom_point()`).

High dimensions

Let’s move to higher dimensions (and higher noise):

data <- simulate_data(n = 100, sd = 0.3, nclust = 30, dims = 300)
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
    k = 30,
    metric = "euclid",
    verbose = FALSE
)
d <- tglkmeans:::match_clusters(data, km, 30)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 1

Comparison with R vanilla kmeans

Let’s compare it to R vanilla kmeans:

km_standard <- kmeans(data %>% select(starts_with("V")), 30)
km_standard$clust <- tibble(id = 1:nrow(data), clust = km_standard$cluster)

d <- tglkmeans:::match_clusters(data, km_standard, 30)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.75

We can see that kmeans++ clusters significantly better than R vanilla kmeans.

Random seed

we can set the seed for the c++ random number generator, for reproducible results:

km1 <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
    k = 30,
    metric = "euclid",
    verbose = FALSE,
    seed = 60427
)
km2 <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
    k = 30,
    metric = "euclid",
    verbose = FALSE,
    seed = 60427
)
all(km1$centers[, -1] == km2$centers[, -1])
## [1] TRUE