For this practical lesson about Persistence Homology Inference, we need the TDA R-package.

Part of the experiments studied in this document can be found in the Vignette Presentation about this package.

install.packages("TDA", dependencies=TRUE) 
library(TDA) 

Persistence diagrams for Rips filtration

The ripsDiag() function computes the persistence diagram of the Rips filtration built on top of a point cloud.

The following code generates 60 points from two circles:

Circle1 <- circleUnif(60)
Circle2 <- circleUnif(60, r = 2) + 3
Circles <- rbind(Circle1, Circle2)
plot(Circles, pch = 16, xlab = "",ylab = "")

We specify the limit of the Rips filtration and the max dimension of the homological features we are interested in (0 for components, 1 for loops, 2 for voids, etc.).

maxdimension <- 1    # components and loops
maxscale <- 5        # limit of the filtration

and we generate the persistence diagram using either the C++ library GUDHI (or Dionysus):

Diag <- ripsDiag(X = Circles,
                 maxdimension,
                 maxscale,
                 library = "GUDHI",
                 printProgress = FALSE)
print(Diag[["diagram"]])
##        dimension     Birth       Death
##   [1,]         0 0.0000000 5.000000000
##   [2,]         0 0.0000000 1.259980903
##   [3,]         0 0.0000000 0.791237864
##   [4,]         0 0.0000000 0.733675168
##   [5,]         0 0.0000000 0.592679604
##   [6,]         0 0.0000000 0.563610633
##   [7,]         0 0.0000000 0.552392512
##   [8,]         0 0.0000000 0.507856901
##   [9,]         0 0.0000000 0.462569409
##  [10,]         0 0.0000000 0.397572209
##  [11,]         0 0.0000000 0.392282047
##  [12,]         0 0.0000000 0.357463051
##  [13,]         0 0.0000000 0.340271539
##  [14,]         0 0.0000000 0.332317725
##  [15,]         0 0.0000000 0.328988648
##  [16,]         0 0.0000000 0.320244448
##  [17,]         0 0.0000000 0.317022057
##  [18,]         0 0.0000000 0.314938696
##  [19,]         0 0.0000000 0.297049973
##  [20,]         0 0.0000000 0.294150225
##  [21,]         0 0.0000000 0.266674763
##  [22,]         0 0.0000000 0.250587171
##  [23,]         0 0.0000000 0.246545792
##  [24,]         0 0.0000000 0.223972862
##  [25,]         0 0.0000000 0.223218074
##  [26,]         0 0.0000000 0.217245457
##  [27,]         0 0.0000000 0.210731407
##  [28,]         0 0.0000000 0.208512253
##  [29,]         0 0.0000000 0.202779090
##  [30,]         0 0.0000000 0.201422945
##  [31,]         0 0.0000000 0.193179082
##  [32,]         0 0.0000000 0.191318170
##  [33,]         0 0.0000000 0.188072785
##  [34,]         0 0.0000000 0.183203965
##  [35,]         0 0.0000000 0.182692605
##  [36,]         0 0.0000000 0.178072848
##  [37,]         0 0.0000000 0.171015807
##  [38,]         0 0.0000000 0.170639017
##  [39,]         0 0.0000000 0.165675368
##  [40,]         0 0.0000000 0.164371711
##  [41,]         0 0.0000000 0.163393229
##  [42,]         0 0.0000000 0.162301627
##  [43,]         0 0.0000000 0.161911358
##  [44,]         0 0.0000000 0.157963087
##  [45,]         0 0.0000000 0.150711550
##  [46,]         0 0.0000000 0.150141215
##  [47,]         0 0.0000000 0.147537796
##  [48,]         0 0.0000000 0.144768602
##  [49,]         0 0.0000000 0.135616024
##  [50,]         0 0.0000000 0.135022576
##  [51,]         0 0.0000000 0.132410701
##  [52,]         0 0.0000000 0.131372021
##  [53,]         0 0.0000000 0.129915030
##  [54,]         0 0.0000000 0.122300623
##  [55,]         0 0.0000000 0.121446591
##  [56,]         0 0.0000000 0.116068344
##  [57,]         0 0.0000000 0.115114030
##  [58,]         0 0.0000000 0.112272682
##  [59,]         0 0.0000000 0.109941061
##  [60,]         0 0.0000000 0.108963028
##  [61,]         0 0.0000000 0.108250286
##  [62,]         0 0.0000000 0.104910036
##  [63,]         0 0.0000000 0.104802847
##  [64,]         0 0.0000000 0.102063677
##  [65,]         0 0.0000000 0.101810832
##  [66,]         0 0.0000000 0.101223698
##  [67,]         0 0.0000000 0.100583009
##  [68,]         0 0.0000000 0.100335972
##  [69,]         0 0.0000000 0.098486869
##  [70,]         0 0.0000000 0.098014229
##  [71,]         0 0.0000000 0.093936463
##  [72,]         0 0.0000000 0.087845191
##  [73,]         0 0.0000000 0.081984234
##  [74,]         0 0.0000000 0.080072796
##  [75,]         0 0.0000000 0.076552272
##  [76,]         0 0.0000000 0.076381312
##  [77,]         0 0.0000000 0.073133148
##  [78,]         0 0.0000000 0.067002719
##  [79,]         0 0.0000000 0.064912983
##  [80,]         0 0.0000000 0.057699543
##  [81,]         0 0.0000000 0.056940998
##  [82,]         0 0.0000000 0.056302747
##  [83,]         0 0.0000000 0.052878912
##  [84,]         0 0.0000000 0.052656910
##  [85,]         0 0.0000000 0.052540359
##  [86,]         0 0.0000000 0.050225020
##  [87,]         0 0.0000000 0.049596526
##  [88,]         0 0.0000000 0.049425107
##  [89,]         0 0.0000000 0.048713002
##  [90,]         0 0.0000000 0.047216174
##  [91,]         0 0.0000000 0.047172317
##  [92,]         0 0.0000000 0.045573623
##  [93,]         0 0.0000000 0.043722187
##  [94,]         0 0.0000000 0.040781289
##  [95,]         0 0.0000000 0.040176517
##  [96,]         0 0.0000000 0.039500301
##  [97,]         0 0.0000000 0.033376179
##  [98,]         0 0.0000000 0.032801758
##  [99,]         0 0.0000000 0.031876747
## [100,]         0 0.0000000 0.028832129
## [101,]         0 0.0000000 0.023417350
## [102,]         0 0.0000000 0.021536643
## [103,]         0 0.0000000 0.020410471
## [104,]         0 0.0000000 0.020363521
## [105,]         0 0.0000000 0.018547034
## [106,]         0 0.0000000 0.016868297
## [107,]         0 0.0000000 0.016701423
## [108,]         0 0.0000000 0.016569151
## [109,]         0 0.0000000 0.015798148
## [110,]         0 0.0000000 0.012498999
## [111,]         0 0.0000000 0.011516652
## [112,]         0 0.0000000 0.010550493
## [113,]         0 0.0000000 0.009833564
## [114,]         0 0.0000000 0.006741785
## [115,]         0 0.0000000 0.006621730
## [116,]         0 0.0000000 0.006209791
## [117,]         0 0.0000000 0.005527178
## [118,]         0 0.0000000 0.004545620
## [119,]         0 0.0000000 0.001967983
## [120,]         0 0.0000000 0.001381185
## [121,]         1 0.7952784 3.471199713
## [122,]         1 0.4405785 1.745360872
print(summary.diagram(Diag[["diagram"]]))
## Call: 
## ripsDiag(X = Circles, maxdimension = maxdimension, maxscale = maxscale, 
##     library = "GUDHI", printProgress = FALSE)
## 
## Number of features: 
## [1] 122
## 
## Max dimension: 
## [1] 1
## 
## Scale: 
## [1] 0 5

Finally we plot the diagram:

plot(Diag[["diagram"]])

Persistent homology of sublevel and superlevel sets of functions

Download the crater dataset on your laptop and import the data into R:

crater = read.table("./data/crater.xy")
plot(crater, cex = 0.1,main = "Crater Dataset",xlab = "x",ylab="y")

Define a grid:

Xlim <- c(0, 10);
Ylim <- c(0, 10);
by <- 0.1;
Xseq <- seq(Xlim[1], Xlim[2], by = by)
Yseq <- seq(Ylim[1], Ylim[2], by = by)
Grid <- expand.grid(Xseq, Yseq)

Compute the k-Nearest Neighbor density estimator of the data:

k <- 100
crater.knn <- knnDE(X = crater, Grid = Grid, k = k) # takes a few sec on my laptop

Plot the density surface using the persp() function:

persp(Xseq, Yseq,
      matrix(crater.knn,ncol = length(Yseq), nrow = length(Xseq)),
      xlab = "",ylab = "", zlab = "",
      theta = -20, phi = 35, ltheta = 50,col = 2, 
      main = "KDE", d = 0.5)

We use the gridDiag() function to compute the persistent homology of sublevelsets of the k-Nearest Neighbor function. The function gridDiag()
1. evaluate the function over a triangulated grid,
2. construct a filtration of simplices using the values of the function,
3. computes the persistent homology of the filtration.

The following code computes the persistent homology of the superlevel sets (sublevel=FALSE) of a kernel density estimator fitted for the data crater.

crater.knn.Diag <- gridDiag(X = crater, FUN = knnDE, k = 100,
                            lim = cbind(Xlim, Ylim),by = by,
                            sublevel = FALSE, library = "Dionysus",
                            printProgress = TRUE)
## # Generated complex of size: 60401 
## # Persistence timer: Elapsed time [ 0.200250 ] seconds
print(crater.knn.Diag)
## $diagram
##        dimension        Death       Birth
##   [1,]         0 0.0009876232 0.035607535
##   [2,]         0 0.0337514286 0.034815236
##   [3,]         0 0.0255596452 0.034635486
##   [4,]         0 0.0266706410 0.034169988
##   [5,]         0 0.0312926351 0.034073238
##   [6,]         0 0.0330463867 0.034008039
##   [7,]         0 0.0301402128 0.033681199
##   [8,]         0 0.0322235892 0.033402643
##   [9,]         0 0.0332387625 0.033304131
##  [10,]         0 0.0294695571 0.032496050
##  [11,]         0 0.0322235892 0.032393647
##  [12,]         0 0.0306358016 0.032218220
##  [13,]         0 0.0284187394 0.032214363
##  [14,]         0 0.0308833649 0.031874381
##  [15,]         0 0.0293569677 0.031795002
##  [16,]         0 0.0309025588 0.031283414
##  [17,]         0 0.0266348301 0.031074532
##  [18,]         0 0.0300527022 0.031051643
##  [19,]         0 0.0075849668 0.030650848
##  [20,]         0 0.0298698068 0.030639688
##  [21,]         0 0.0297459062 0.030389721
##  [22,]         0 0.0296132905 0.030361159
##  [23,]         0 0.0298828265 0.030207586
##  [24,]         0 0.0291843408 0.030197800
##  [25,]         0 0.0263907427 0.030055416
##  [26,]         0 0.0290763547 0.030038152
##  [27,]         0 0.0275071129 0.029852746
##  [28,]         0 0.0292867599 0.029754312
##  [29,]         0 0.0297388200 0.029742400
##  [30,]         0 0.0280620646 0.029742391
##  [31,]         0 0.0289234540 0.029626807
##  [32,]         0 0.0262714805 0.029531638
##  [33,]         0 0.0288284750 0.029516352
##  [34,]         0 0.0279981667 0.029338129
##  [35,]         0 0.0270341311 0.029296105
##  [36,]         0 0.0283290841 0.029269658
##  [37,]         0 0.0281534424 0.029247974
##  [38,]         0 0.0286975370 0.029085475
##  [39,]         0 0.0276511473 0.028740258
##  [40,]         0 0.0281545152 0.028565445
##  [41,]         0 0.0281545152 0.028399640
##  [42,]         0 0.0279541606 0.028261426
##  [43,]         0 0.0270828701 0.028257809
##  [44,]         0 0.0268494100 0.028214552
##  [45,]         0 0.0215546905 0.028156698
##  [46,]         0 0.0279577204 0.028113646
##  [47,]         0 0.0278226707 0.028001649
##  [48,]         0 0.0273761946 0.027922135
##  [49,]         0 0.0273793530 0.027701954
##  [50,]         0 0.0267792418 0.027602790
##  [51,]         0 0.0268237250 0.027490340
##  [52,]         0 0.0271612867 0.027470993
##  [53,]         0 0.0273793530 0.027396864
##  [54,]         0 0.0263129888 0.027340533
##  [55,]         0 0.0260275019 0.027299646
##  [56,]         0 0.0225206437 0.027229284
##  [57,]         0 0.0265334653 0.027144183
##  [58,]         0 0.0268237250 0.027116029
##  [59,]         0 0.0266098682 0.026853832
##  [60,]         0 0.0252437189 0.026733473
##  [61,]         0 0.0256763629 0.026633619
##  [62,]         0 0.0264420672 0.026543473
##  [63,]         0 0.0253438536 0.026508803
##  [64,]         0 0.0257911013 0.026505643
##  [65,]         0 0.0255154574 0.026458097
##  [66,]         0 0.0255039296 0.026336331
##  [67,]         0 0.0254684316 0.026214865
##  [68,]         0 0.0255582061 0.026176769
##  [69,]         0 0.0253530000 0.026158659
##  [70,]         0 0.0242158474 0.026122374
##  [71,]         0 0.0238515228 0.026094338
##  [72,]         0 0.0214073832 0.025590076
##  [73,]         0 0.0236679330 0.025582010
##  [74,]         0 0.0254679874 0.025525179
##  [75,]         0 0.0250932354 0.025387761
##  [76,]         0 0.0249516877 0.025279598
##  [77,]         0 0.0250600023 0.025231862
##  [78,]         0 0.0247725979 0.025188544
##  [79,]         0 0.0250361076 0.025159487
##  [80,]         0 0.0249420419 0.025071210
##  [81,]         0 0.0245745479 0.024799184
##  [82,]         0 0.0240803540 0.024263943
##  [83,]         0 0.0221253796 0.024260814
##  [84,]         0 0.0225396923 0.024189147
##  [85,]         0 0.0227369937 0.024043603
##  [86,]         0 0.0237348591 0.023949349
##  [87,]         0 0.0234473592 0.023903190
##  [88,]         0 0.0227421701 0.023725097
##  [89,]         0 0.0229555517 0.023623319
##  [90,]         0 0.0231654604 0.023565095
##  [91,]         0 0.0234587545 0.023466768
##  [92,]         0 0.0224231632 0.023386798
##  [93,]         0 0.0230308084 0.023356359
##  [94,]         0 0.0226406226 0.023244243
##  [95,]         0 0.0222716230 0.023144059
##  [96,]         0 0.0202023609 0.023099774
##  [97,]         0 0.0227101049 0.022996622
##  [98,]         0 0.0227197667 0.022732928
##  [99,]         0 0.0222236136 0.022559835
## [100,]         0 0.0222928829 0.022518957
## [101,]         0 0.0221888513 0.022501351
## [102,]         0 0.0223956399 0.022497604
## [103,]         0 0.0214694563 0.022269250
## [104,]         0 0.0216909823 0.022041750
## [105,]         0 0.0203967851 0.021935132
## [106,]         0 0.0200888072 0.021812129
## [107,]         0 0.0209753672 0.021732286
## [108,]         0 0.0214526939 0.021727252
## [109,]         0 0.0215194197 0.021574655
## [110,]         0 0.0212796087 0.021525410
## [111,]         0 0.0202672884 0.020942557
## [112,]         0 0.0207455970 0.020905981
## [113,]         0 0.0049692107 0.020854982
## [114,]         0 0.0204549827 0.020794295
## [115,]         0 0.0204454533 0.020738236
## [116,]         0 0.0197167915 0.020711857
## [117,]         0 0.0201184775 0.020676931
## [118,]         0 0.0203264946 0.020564629
## [119,]         0 0.0201701439 0.020347742
## [120,]         0 0.0201123397 0.020191090
## [121,]         0 0.0195072262 0.019624616
## [122,]         0 0.0190847631 0.019552450
## [123,]         0 0.0053061911 0.019339731
## [124,]         0 0.0046854952 0.019070994
## [125,]         0 0.0185441351 0.018847815
## [126,]         0 0.0179494440 0.018773672
## [127,]         0 0.0182798987 0.018757954
## [128,]         0 0.0185441351 0.018568254
## [129,]         0 0.0180752191 0.018390617
## [130,]         0 0.0177961550 0.017843390
## [131,]         0 0.0054841980 0.017829629
## [132,]         0 0.0172834661 0.017715301
## [133,]         0 0.0175595372 0.017659552
## [134,]         0 0.0171263155 0.017379771
## [135,]         0 0.0164713157 0.017034489
## [136,]         0 0.0150965744 0.016011648
## [137,]         0 0.0154816345 0.015694119
## [138,]         0 0.0144165889 0.015058343
## [139,]         0 0.0140897543 0.015052139
## [140,]         0 0.0146188304 0.014807208
## [141,]         0 0.0141767738 0.014600132
## [142,]         0 0.0142260505 0.014289833
## [143,]         0 0.0138833792 0.014068103
## [144,]         0 0.0135311447 0.013656789
## [145,]         0 0.0134138474 0.013461335
## [146,]         0 0.0126179502 0.012717959
## [147,]         0 0.0118578731 0.012164172
## [148,]         0 0.0087566959 0.008858871
## [149,]         0 0.0078389565 0.007866786
## [150,]         0 0.0076512606 0.007691682
## [151,]         0 0.0074850538 0.007577994
## [152,]         0 0.0073760482 0.007515239
## [153,]         0 0.0074136167 0.007472584
## [154,]         0 0.0060218499 0.006033124
## [155,]         0 0.0058909314 0.005928510
## [156,]         0 0.0057809795 0.005782820
## [157,]         0 0.0055201422 0.005557265
## [158,]         0 0.0052484927 0.005320394
## [159,]         0 0.0044912059 0.004510172
## [160,]         0 0.0041645377 0.004176135
## [161,]         0 0.0036108977 0.003666207
## [162,]         1 0.0288717198 0.029704961
## [163,]         1 0.0291587531 0.029390150
## [164,]         1 0.0291329443 0.029166838
## [165,]         1 0.0290295378 0.029051080
## [166,]         1 0.0276219858 0.029008262
## [167,]         1 0.0269815023 0.028519385
## [168,]         1 0.0278676592 0.028323659
## [169,]         1 0.0275449033 0.028263577
## [170,]         1 0.0278909274 0.027949072
## [171,]         1 0.0270832162 0.027708173
## [172,]         1 0.0274467344 0.027651147
## [173,]         1 0.0275721885 0.027596909
## [174,]         1 0.0264680370 0.026958027
## [175,]         1 0.0265585723 0.026682531
## [176,]         1 0.0261754973 0.026271480
## [177,]         1 0.0251745872 0.026253710
## [178,]         1 0.0259101732 0.026032680
## [179,]         1 0.0255620422 0.025944891
## [180,]         1 0.0251757681 0.025821848
## [181,]         1 0.0256386724 0.025756639
## [182,]         1 0.0255415340 0.025748102
## [183,]         1 0.0245041833 0.025704969
## [184,]         1 0.0251410869 0.025335281
## [185,]         1 0.0249126687 0.025166040
## [186,]         1 0.0243942926 0.025034173
## [187,]         1 0.0248817610 0.024883063
## [188,]         1 0.0238877870 0.024390346
## [189,]         1 0.0232995237 0.023595984
## [190,]         1 0.0232073674 0.023585509
## [191,]         1 0.0233391969 0.023468243
## [192,]         1 0.0216122139 0.023402437
## [193,]         1 0.0230885139 0.023375228
## [194,]         1 0.0223093225 0.022349213
## [195,]         1 0.0222855622 0.022348640
## [196,]         1 0.0221336510 0.022244189
## [197,]         1 0.0220340326 0.022094729
## [198,]         1 0.0205765385 0.021579073
## [199,]         1 0.0208437463 0.021554691
## [200,]         1 0.0211527320 0.021407383
## [201,]         1 0.0208297637 0.021252890
## [202,]         1 0.0205068225 0.021189826
## [203,]         1 0.0206422983 0.021095059
## [204,]         1 0.0206946361 0.020791372
## [205,]         1 0.0203216778 0.020768931
## [206,]         1 0.0197055715 0.020445453
## [207,]         1 0.0200101506 0.020435194
## [208,]         1 0.0202144792 0.020323856
## [209,]         1 0.0196501738 0.020202361
## [210,]         1 0.0195140877 0.020170144
## [211,]         1 0.0198122182 0.019914896
## [212,]         1 0.0193688624 0.019863561
## [213,]         1 0.0191716135 0.019596850
## [214,]         1 0.0192442518 0.019559678
## [215,]         1 0.0187714300 0.019451421
## [216,]         1 0.0059988593 0.019426027
## [217,]         1 0.0184815751 0.019197094
## [218,]         1 0.0190054332 0.019130537
## [219,]         1 0.0187538184 0.019126898
## [220,]         1 0.0184248388 0.018695320
## [221,]         1 0.0184062311 0.018523927
## [222,]         1 0.0170857219 0.018500546
## [223,]         1 0.0170715206 0.018435256
## [224,]         1 0.0179881846 0.018276182
## [225,]         1 0.0173491474 0.017968902
## [226,]         1 0.0174734240 0.017600516
## [227,]         1 0.0165009646 0.016519064
## [228,]         1 0.0162112720 0.016508057
## [229,]         1 0.0138222673 0.014000653
## [230,]         1 0.0134880994 0.013874583
## [231,]         1 0.0133416278 0.013439894
## [232,]         1 0.0129191455 0.013015410
## [233,]         1 0.0102928515 0.010703671
## [234,]         1 0.0096819122 0.009876977
## [235,]         1 0.0084425898 0.008633973
## [236,]         1 0.0074760402 0.007569166
## [237,]         1 0.0065812208 0.007552319
## [238,]         1 0.0067733649 0.007413617
## [239,]         1 0.0070406266 0.007354021
## [240,]         1 0.0072618622 0.007345895
## [241,]         1 0.0071041792 0.007305085
## [242,]         1 0.0070580603 0.007206200
## [243,]         1 0.0070513517 0.007192578
## [244,]         1 0.0070563799 0.007172496
## [245,]         1 0.0070550473 0.007159601
## [246,]         1 0.0071272333 0.007132805
## [247,]         1 0.0071103132 0.007127595
## [248,]         1 0.0069364950 0.007090538
## [249,]         1 0.0067751754 0.006865659
## [250,]         1 0.0065020578 0.006761544
## [251,]         1 0.0066914836 0.006744528
## [252,]         1 0.0064863117 0.006654225
## [253,]         1 0.0065508510 0.006634363
## [254,]         1 0.0065212511 0.006609653
## [255,]         1 0.0064262288 0.006565804
## [256,]         1 0.0063421995 0.006488291
## [257,]         1 0.0061316347 0.006259568
## [258,]         1 0.0051824796 0.005434557
## [259,]         1 0.0051641382 0.005276561
## [260,]         1 0.0051780523 0.005270376
## [261,]         1 0.0049625722 0.005183812
## [262,]         1 0.0050594975 0.005063246
## [263,]         1 0.0049149309 0.004942738
## [264,]         1 0.0047090000 0.004832514
## [265,]         1 0.0047815356 0.004799890
## [266,]         1 0.0046403385 0.004779708
## [267,]         1 0.0045766308 0.004678045
## [268,]         1 0.0045636069 0.004571487
## [269,]         1 0.0043667259 0.004370385
## [270,]         1 0.0041899293 0.004233219
## [271,]         1 0.0041168134 0.004136820
## [272,]         1 0.0040707787 0.004077082

In the function, the argument FUN requires a function whose inputs are:
1. an n by d matrix of coordinates X, 2. an m by d matrix of coordinates Grid, 3. an optional smoothing parameter, and returns a numeric vector of length m.

For example the distance function distFct(), the kernel density estimator kde(), and the distance to measure function dtm().

We now plot the data and the diagram:

plot(crater.knn.Diag[["diagram"]],
      main = "persistent homology of sublevelsets of the knnDE \n Crater Dataset")

An other example with the Gaussian Kernel Density Estimator (KDE):

crater.KDE.Diag <- gridDiag(X = crater, FUN = kde, h=1,
                          lim = cbind(Xlim, Ylim),by = by,
                          sublevel = FALSE,
                          library = "Dionysus", 
                          printProgress = TRUE)
## # Generated complex of size: 60401 
## # Persistence timer: Elapsed time [ 0.374804 ] seconds
plot(crater.KDE.Diag[["diagram"]],
      main = "persistent homology of sublevelsets of the KDE \n Crater Dataset")

Bottleneck Distance

The function bottleneck() computes the bottleneck distance between two persistence diagrams:

Diag1 <- ripsDiag(Circle1, maxdimension = 1, maxscale = 5)
Diag2 <- ripsDiag(Circle2, maxdimension = 1, maxscale = 5)

Now we compute the bottleneck distance between Diag1 and Diag2:

print(bottleneck(Diag1[["diagram"]], Diag2[["diagram"]]))
## [1] 1.337961

It is also possible to compute the distances using only one dimensional features:

print(bottleneck(Diag1[["diagram"]],Diag2[["diagram"]],
                 dimension = 1))
## [1] 1.337961

Exercice : compute the matrix of bottleneck distances between the fourteen configurations of protein binding. Example from Peter’s paper Using persistent homology and dynamical distances to analyze protein binding. In this paper persistent homology is used to analyze protein binding. The paper compares closed and open forms of the maltose-binding protein (MBP), a large biomolecule consisting of 370 amino acid residue. The anlysis is not based on geometric distances in \(\mathbb R^3\) but on a metric of dynamical distances definded by \[D_{ij} = 1 - |C_{ij}|\] where C is the correlation matrices between residues.

Correlation matrices between residues can be found at this link. Download the [ precompute dynamic matrices] (http://www.lsta.upmc.fr/michelb/Enseignements/TDA/Data/Distance_Dynamic.Rdata) for 7 closed and open forms of MBP and import the data into R:

load("./Data/Distance_Dynamic.Rdata")

You can use this code to compute in parallel the 14 Persistence Diagrams (Rips Filtration):

Rips.dynamic.ls <- list()
for (i in 1:14){
  Rips.temp <- ripsDiag(X=dist.dynamic.ls[[i]],maxscale= 0.6,
                                   maxdimension=1,
                                   dist="arbitrary",library="Dionysus",
                                   printProgress = TRUE)
  Rips.dynamic.ls[[i]] <- Rips.temp
}

or alternatively you can use the doParallel package:

library(doParallel)
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
nbcores <- 3 # number of cores of your laptop
cl <- makeCluster(nbcores)
registerDoParallel(cl)
Rips.dynamic.ls <- foreach(i=1:14, .packages = "TDA") %dopar% 
  (ripsDiag(X=dist.dynamic.ls[[i]],maxscale= 0.6,
                                   maxdimension=1,
                                   dist="arbitrary",library="Dionysus",
                                   printProgress = FALSE))

Plot the fourteen diagram:

par(mfrow = c(1,2))
for (i in 1:14)
plot(Rips.dynamic.ls[[i]]$diagram)

# all combinations of 2 elements in 1:14
par(mfrow = c(1,1))
library(caTools)
Couples = combs(1:14,2)

Parallel loop to compute all the distances:

Dist.ls <- foreach(a=1:nrow(Couples), .export = "bottleneck") %dopar% (bottleneck(Rips.dynamic.ls[[Couples[a,1]]]$diagram,Rips.dynamic.ls[[Couples[a,2]]]$diagram))

Stop the clusters

stopCluster(cl)

Reorganize the outputs:

Dist.bottle <- matrix(0,14,14)
for (a in 1:nrow(Couples)){
  Dist.bottle[Couples[a,1],Couples[a,2]] = Dist.ls[[a]]
}
Dist.bottle = (Dist.bottle + t(Dist.bottle))/2

We can use a multidimensional scaling (MDS) procedure to visualize the fourteen points:

fit <- cmdscale(Dist.bottle,eig=TRUE, k=2) # k is the number of dim
library(ggplot2)
qplot(fit$points[,1] ,  fit$points[,2], colour=  State, xlab="Coordinate 1", ylab="Coordinate 2")

Landscapes and Silhouette functions

The landscape and silhouette functions are evaluated over a one-dimensional grid of points (tseq in the code below). The option KK=1 specifies that we are interested in the 1st landscape function, and p=1 is the power of the weights in the definition of the silhouette function.

tseq <- seq(0, maxscale, length = 1000)   #domain
Land <- landscape(Diag[["diagram"]], dimension = 1, KK = 1, tseq)
Sil <- silhouette(Diag[["diagram"]], p = 1, dimension = 1, tseq)

The functions landscape and silhouette return real valued vectors, which can be simply plotted with :

plot(tseq, Land, type="l")

plot(tseq, Sil, type="l")

Confidence regions with bootstrap:

Implementation of the boostrap

We can deduce confidence regions for the persistence diagram for the sub (or super) level sets of a function f, from confidence regions on f together with the stability results. Confidence regions on f (for the sup norm) can be obtained with bootstrap procedures.

We now illustrate the method for the k-Nearest Neighbor density estimator.

Exercise Implementation of this method for the crater data set.
Define a grid:

Xlim <- c(0, 10);
Ylim <- c(0, 10);
by <- 0.1;
Xseq <- seq(Xlim[1], Xlim[2], by = by)
Yseq <- seq(Ylim[1], Ylim[2], by = by)
Grid <- expand.grid(Xseq, Yseq)
  1. Using the boot package, generate the bootstrapped 95% uniform confidence region on the grid for $ E f_k where \(\hat f_k\) is the KnnDE estimator.
  2. Draw the persistence diagram of the crater data and overlay the confidence band on the diagram.
  3. Implement the bottleneck bootstrap method

knn estimator:

k <- 100
crater.knn <- knnDE(X = crater, Grid = Grid, k = k) 

sup norm function:

supnor <- function(data,indices){
  data.extract <- data[indices,]
  hatf <- knnDE(X = data.extract, Grid = Grid, k = k)
  supnorm <- max(abs(hatf - crater.knn))
  return(supnorm)
}

Bootstrap procedure:

library(boot)
supnor.boot <- boot(data=crater,
                    statistic=supnor,
                    R=500,
                    parallel="multicore",
                    ncpus=3
                    )
q95 <- quantile(supnor.boot$t,probs = 0.95)
print(q95)
##        95% 
## 0.01677607
plot(crater.knn.Diag$diagram)
abline(q95,1, col = "black", lty = 2)

Bottleneck bootstrap:

supnor.bottle <- function(data,indices){
  data.extract <- data[indices,]
  crater.knn.Diag.boot <- gridDiag(X = data.extract, FUN = knnDE, k = 100,
                            lim = cbind(Xlim, Ylim),by = by,
                            sublevel = FALSE, library = "Dionysus",
                            printProgress = FALSE)
  
  bottle <- bottleneck(crater.knn.Diag.boot$diagram, crater.knn.Diag$diagram)
  return(bottle)
}

Bootstrap procedure for the bottleneck distance:

bottle.boot <- boot(data=crater,
                    statistic=supnor.bottle,
                    R=100,
                    parallel="multicore",
                    ncpus=3
                    )
q95.bottle <- quantile(bottle.boot$t,probs = 0.95)
print(q95.bottle)
##         95% 
## 0.002997133
plot(crater.knn.Diag$diagram)
abline(q95,1, col = "black", lty = 2)
abline(q95.bottle,1, col = "red", lty = 2)

Bootsrap with the TDA package

The following code computes a 80% confidence band for $ E f_k where \(\hat f_k\):

band <- bootstrapBand(X = crater, FUN = knnDE,k=100, Grid = Grid, B = 100,parallel = TRUE, alpha = 0.2)
plot(crater.knn.Diag$diagram, band = band[["width"]], main = "KnnDE Diagram")

You can use the bootstrapDiagram to obtain a confidence band with a bootstrap procedure for the bottleneck distance.

Robust TDA with the distance to measure

Download the [Cassini curve data] (http://www.lsta.upmc.fr/michelb/Enseignements/TDA/Data/cassini_noise.Rdata) f and import the data into R:

load("Data/cassini_noise.Rdata")
qplot(X[,1],X[,2], xlab="",ylab="", main="Cassini curve with Noise")

Construct a grid of points over which we evaluate the functions:

Xlim=c(-2.8,2.8)
Ylim=c(-1.4,1.4)
m = 100
by=(Xlim[2]-Xlim[1])/m
Xseq = seq(Xlim[1], Xlim[2], by=by)
Yseq = seq(Ylim[1], Ylim[2], by=by)
mx=length(Xseq)
my=length(Yseq)
Grid = expand.grid(Xseq,Yseq)

Compute the DTM on the grid:

m0=0.1
DTM = dtm(X, Grid, m0=m0)

Plot the persistence diagram of the sublevel sets of the DTM:

Diag.dtm=gridDiag(X, dtm, lim=cbind(Xlim, Ylim), by=by, sublevel=TRUE, printProgress=TRUE, m0=m0)
## # Generated complex of size: 30301 
## # Persistence timer: Elapsed time [ 0.098474 ] seconds
Diag.knDE=gridDiag(X, knnDE, lim=cbind(Xlim, Ylim), by=by, sublevel=FALSE, printProgress=TRUE,k=10)
## # Generated complex of size: 30301 
## # Persistence timer: Elapsed time [ 0.086010 ] seconds
Diag.dist = gridDiag(X, distFct, lim=cbind(Xlim, Ylim), by=by,sublevel=TRUE,printProgress=TRUE)
## # Generated complex of size: 30301 
## # Persistence timer: Elapsed time [ 0.068328 ] seconds
par(mfrow = c(1,2))
plot(Diag.dist$diagram,main = "distance function")
plot(Diag.dtm$diagram,main = "DTM")

Persistence for statistical learning

We illustrate how features derived form persistent homology constructions can be used for a classification problem. Accelerometer data from three peoples walking in a building have been recorded on a smartphone ( 3D point cloud in the Euclidean space). The aim is to recognize the walker from the accelerometer data.

Load the acceler_data.Rdata:

load("Data/acceler_data.Rdata")

We cut the data into small intervals:

mm=80 #length of the time series used to compute the Rips (=number of points)
nn=45  #number of landscape computed for each data set

We need to store diagrams for each interval

Diags1=list()
Diags2=list()
Diags3=list()

We compute the diagrams

DiagLim=1.5
for (i in 1:nn){
  Diags1[[i]]=ripsDiag(X1[((i-1)*mm+1):((i-1)*mm+mm),],maxdimension=1,DiagLim)
  Diags2[[i]]=ripsDiag(X2[((i-1)*mm+1):((i-1)*mm+mm),],maxdimension=1,DiagLim)
  Diags3[[i]]=ripsDiag(X3[((i-1)*mm+1):((i-1)*mm+mm),],maxdimension=1,DiagLim)
  print(i)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
## [1] 26
## [1] 27
## [1] 28
## [1] 29
## [1] 30
## [1] 31
## [1] 32
## [1] 33
## [1] 34
## [1] 35
## [1] 36
## [1] 37
## [1] 38
## [1] 39
## [1] 40
## [1] 41
## [1] 42
## [1] 43
## [1] 44
## [1] 45

and the landscapes (for KK=2 and KK = 1, dimension =0):

dimension = 0
lseq=100
tseq=seq(0,1.8,length=lseq)
Lands1=matrix(0, nrow=nn, ncol=2 * lseq)
Lands2=matrix(0, nrow=nn, ncol=2 * lseq)
Lands3=matrix(0, nrow=nn, ncol=2 * lseq)
for (i in 1:nn){
  Lands1[i,]=c(landscape(Diags1[[i]]$d, dimension=dimension, KK=1, tseq),landscape(Diags1[[i]]$d, dimension=dimension, KK=2, tseq))
  Lands2[i,]=c(landscape(Diags2[[i]]$d, dimension=dimension, KK=1, tseq),landscape(Diags2[[i]]$d, dimension=dimension, KK=2, tseq))
  Lands3[i,]=c(landscape(Diags3[[i]]$d, dimension=dimension, KK=1, tseq),landscape(Diags3[[i]]$d, dimension=dimension, KK=2, tseq))
  print(i)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
## [1] 26
## [1] 27
## [1] 28
## [1] 29
## [1] 30
## [1] 31
## [1] 32
## [1] 33
## [1] 34
## [1] 35
## [1] 36
## [1] 37
## [1] 38
## [1] 39
## [1] 40
## [1] 41
## [1] 42
## [1] 43
## [1] 44
## [1] 45

We reorganize the data into a (3 nn) x (2 lseq) matrix:

Landscape.sample <- rbind(Lands1,Lands2,Lands3)
walker <-  as.factor(c(rep("fred",nn),rep("fab",nn),rep("bert",nn)))
dim(Landscape.sample)
## [1] 135 200

Finally we fit a Random Forest to predict the walker using the landscapes:

library(randomForest)
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
fit <- randomForest(x=Landscape.sample,y=walker)
print(fit) # view results
## 
## Call:
##  randomForest(x = Landscape.sample, y = walker) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 14
## 
##         OOB estimate of  error rate: 47.41%
## Confusion matrix:
##      bert fab fred class.error
## bert   27   7   11   0.4000000
## fab     5  24   16   0.4666667
## fred   10  15   20   0.5555556
image(1-fit$confusion[,1:3] / nn)