Package 'ISR' reference manual

Title:	The Iterated Score Regression-Based Estimation
Description:	We use the ISR to handle with PCA-based missing data with high correlation, and the DISR to handle with distributed PCA-based missing data. The philosophy of the package is described in Guo G. (2024) <doi:10.1080/03610918.2022.2091779>.
Authors:	Guangbao Guo [aut, cre] , Haoyue Song [aut], Lixing Zhu [aut]
Maintainer:	Guangbao Guo <[email protected]>
License:	MIT + file LICENSE
Version:	2025.01.14
Built:	2025-03-28 05:51:04 UTC
Source:	https://github.com/cran/ISR

CKD

Description

chronic kidney disease

Usage

data("CKD")data("CKD")

Format

The format is: num [1:400, 1:18] 48 7 62 48 51 60 68 24 52 53 ... - attr(*, "dimnames")=List of 2 ..$ : NULL ..$ : chr [1:18] "age" "bp" "sg" "al" ...

Details

There are 1010 missing values in the data set, accounting for 14.03 percent.

Source

Dr.P.Soundarapandian.M.D.,D.M (Senior Consultant Nephrologist), Apollo Hospitals, Managiri, Madurai Main Road, Karaikudi, Tamilnadu, Indi

References

Polat, H., Danaei-Mehr, H., and Cetin, A. (2017). Diagnosis of chronic kidney disease based on support vector machine by feature selection methods. Journal of Medical Systems, 41(4), 1-11.

Examples

data(CKD)
## maybe str(CKD) ; plot(CKD) ...
data(CKD)
## maybe str(CKD) ; plot(CKD) ...

Caculate the estimator with the DISR method

Description

Caculate the estimator with the DISR method

Usage

DISR(data, data0, real = TRUE, example = FALSE, D)
DISR(data, data0, real = TRUE, example = FALSE, D)

Arguments

`data`	is the orignal data set
`data0`	is the missing data set
`real`	is to judge whether the data set is a real missing data set
`example`	is to judge whether the data set is a simulation example
`D`	is the number of nodes

Value

`XDISR`	is the estimator on the DISR method
`MSEDISR`	is the MSE value of the DISR method
`MAEDISR`	is the MAE value of the DISR method
`REDISR`	is the RE value of the DISR method
`GCVDISR`	is the GCV value of the DISR method
`timeDISR`	is the time cost of the DISR method

Examples

 library(MASS)
 n=100;p=10;per=0.1
 X0=data=matrix(mvrnorm(n*p,0,1),n,p)
 m=round(per*n*p,digits=0)
 mr=sample(1:(n*p),m,replace=FALSE)
 X0[mr]=NA;data0=X0
 DISR(data=data,data0=data0,real=FALSE,example=FALSE,D=2)
library(MASS)
 n=100;p=10;per=0.1
 X0=data=matrix(mvrnorm(n*p,0,1),n,p)
 m=round(per*n*p,digits=0)
 mr=sample(1:(n*p),m,replace=FALSE)
 X0[mr]=NA;data0=X0
 DISR(data=data,data0=data0,real=FALSE,example=FALSE,D=2)

HCV

Description

Hepatitis C virus

Usage

data("HCV")data("HCV")

Format

The format is: num [1:615, 1:13] 1 1 1 1 1 1 1 1 1 1 ... - attr(*, "dimnames")=List of 2 ..$ : chr [1:615] "1" "2" "3" "4" ... ..$ : chr [1:13] "Category" "Age" "Sex" "ALB" ...

Details

There are 31 missing values in the data set, accounting for 0.39 percent.

Source

UCI repository

References

Lichtinghagen, R., Pietsch, D., Bantel, H., Manns, M., Brand, K. and Bahr, Matthias. (2013). The Enhanced Liver Fibrosis (ELF) Score: Normal Values, Influence Factors and Proposed Cut-Off Values.. Journal of hepatology. 59. 236-242.

Examples

data(HCV)
## maybe str(HCV) ; plot(HCV) ...
data(HCV)
## maybe str(HCV) ; plot(HCV) ...

Caculate the estimator with the ISR method

Description

Caculate the estimator with the ISR method

Usage

ISR(data, data0, real = TRUE, example = FALSE)
ISR(data, data0, real = TRUE, example = FALSE)

Arguments

`data`	is the orignal data set
`data0`	is the missing data set
`real`	is to judge whether the data set is a real missing data set
`example`	is to judge whether the data set is a simulation example.

Value

`XISR`	is the estimator on the ISR method
`MSEISR`	is the MSE value of the ISR method
`MAEISR`	is the MAE value of the ISR method
`REISR`	is the RE value of the ISR method
`GCVISR`	is the GCV value of the ISR method
`timeISR`	is the time cost of the ISR method

Examples

 library(MASS)
 n=100;p=10;per=0.1
 X0=data=matrix(mvrnorm(n*p,0,1),n,p)
 m=round(per*n*p,digits=0)
 mr=sample(1:(n*p),m,replace=FALSE)
 X0[mr]=NA;data0=X0
 ISR(data=data,data0=data0,real=FALSE,example=FALSE)
library(MASS)
 n=100;p=10;per=0.1
 X0=data=matrix(mvrnorm(n*p,0,1),n,p)
 m=round(per*n*p,digits=0)
 mr=sample(1:(n*p),m,replace=FALSE)
 X0[mr]=NA;data0=X0
 ISR(data=data,data0=data0,real=FALSE,example=FALSE)

Caculate the estimator on the Mean method

Description

Caculate the estimator on the Mean method

Usage

Mean(data, data0, real = TRUE, example = FALSE)
Mean(data, data0, real = TRUE, example = FALSE)

Arguments

`data`	is the orignal data set
`data0`	is the missing data set
`real`	is to judge whether the data set is a real missing data set
`example`	is to judge whether the data set is a simulation example.

Value

`XMean`	is the estimator on the Mean method
`MSEMean`	is the MSE value of the Mean method
`MAEMean`	is the MAE value of the Mean method
`REMean`	is the RE value of the Mean method
`GCVMean`	is the GCV value of the Mean method
`timeMean`	is the time cost of the Mean method

Examples

 library(MASS)
 n=100;p=10;per=0.1
 X0=data=matrix(mvrnorm(n*p,0,1),n,p)
 m=round(per*n*p,digits=0)
 mr=sample(1:(n*p),m,replace=FALSE)
 X0[mr]=NA;data0=X0
 Mean(data=data,data0=data0,real=FALSE,example=FALSE)
library(MASS)
 n=100;p=10;per=0.1
 X0=data=matrix(mvrnorm(n*p,0,1),n,p)
 m=round(per*n*p,digits=0)
 mr=sample(1:(n*p),m,replace=FALSE)
 X0[mr]=NA;data0=X0
 Mean(data=data,data0=data0,real=FALSE,example=FALSE)

Caculate the estimator on the MMLPCA method

Description

Caculate the estimator on the MMLPCA method

Usage

MMLPCA(data, data0, real = TRUE, example = FALSE)
MMLPCA(data, data0, real = TRUE, example = FALSE)

Arguments

`data`	is the orignal data set
`data0`	is the missing data set
`real`	is to judge whether the data set is a real missing data set
`example`	is to judge whether the data set is a simulation example.

Value

`XMMLPCA`	is the estimator on the MMLPCA method
`MSEMMLPCA`	is the MSE value of the MMLPCA method
`MAEMMLPCA`	is the MAE value of the MMLPCA method
`REMMLPCA`	is the RE value of the MMLPCA method
`GCVMMLPCA`	is the GCV value of the MMLPCA method
`timeMMLPCA`	is the time cost of the MMLPCA method

Examples

 library(MASS)
 n=100;p=10;per=0.1
 X0=data=matrix(mvrnorm(n*p,0,1),n,p)
 m=round(per*n*p,digits=0)
 mr=sample(1:(n*p),m,replace=FALSE)
 X0[mr]=NA;data0=X0
 MMLPCA(data=data,data0=data0,real=FALSE,example=FALSE)
library(MASS)
 n=100;p=10;per=0.1
 X0=data=matrix(mvrnorm(n*p,0,1),n,p)
 m=round(per*n*p,digits=0)
 mr=sample(1:(n*p),m,replace=FALSE)
 X0[mr]=NA;data0=X0
 MMLPCA(data=data,data0=data0,real=FALSE,example=FALSE)

Caculate the estimator on the MNIPALS method

Description

Caculate the estimator on the MNIPALS method

Usage

MNIPALS(data, data0, real = TRUE, example = FALSE)
MNIPALS(data, data0, real = TRUE, example = FALSE)

Arguments

`data`	is the orignal data set
`data0`	is the missing data set
`real`	is to judge whether the data set is a real missing data set
`example`	is to judge whether the data set is a simulation example.

Value

`XMNIPALS`	is the estimator on the MNIPALS method
`MSEMNIPALS`	is the MSE value of the MNIPALS method
`MAEMNIPALS`	is the MAE value of the MNIPALS method
`REMNIPALS`	is the RE value of the MNIPALS method
`GCVMNIPALS`	is the GCV value of the MNIPALS method
`timeMNIPALS`	is the time cost of the MNIPALS method

Examples

 library(MASS)
 n=100;p=10;per=0.1
 X0=data=matrix(mvrnorm(n*p,0,1),n,p)
 m=round(per*n*p,digits=0)
 mr=sample(1:(n*p),m,replace=FALSE)
 X0[mr]=NA;data0=X0
 MNIPALS(data=data,data0=data0,real=FALSE,example=FALSE)
library(MASS)
 n=100;p=10;per=0.1
 X0=data=matrix(mvrnorm(n*p,0,1),n,p)
 m=round(per*n*p,digits=0)
 mr=sample(1:(n*p),m,replace=FALSE)
 X0[mr]=NA;data0=X0
 MNIPALS(data=data,data0=data0,real=FALSE,example=FALSE)

Caculate the estimator on the MRPCA method

Description

Caculate the estimator on the MRPCA method

Usage

MRPCA(data, data0, real = TRUE, example = FALSE)
MRPCA(data, data0, real = TRUE, example = FALSE)

Arguments

`data`	is the orignal data set
`data0`	is the missing data set
`real`	is to judge whether the data set is a real missing data set
`example`	is to judge whether the data set is a simulation example

Value

`XMRPCA`	is the estimator on the MRPCA method
`MSEMRPCA`	is the MSE value of the MRPCA method
`MAEMRPCA`	is the MAE value of the MRPCA method
`REMRPCA`	is the RE value of the MRPCA method
`GCVMRPCA`	is the GCV value of the MRPCA method
`timeMRPCA`	is the time cost of the MRPCA method

Examples

 library(MASS)
 library(MASS)
 n=100;p=10;per=0.1
 X0=data=matrix(mvrnorm(n*p,0,1),n,p)
 m=round(per*n*p,digits=0)
 mr=sample(1:(n*p),m,replace=FALSE)
 X0[mr]=NA;data0=X0
 MRPCA(data=data,data0=data0,real=FALSE,example=FALSE)
library(MASS)
 library(MASS)
 n=100;p=10;per=0.1
 X0=data=matrix(mvrnorm(n*p,0,1),n,p)
 m=round(per*n*p,digits=0)
 mr=sample(1:(n*p),m,replace=FALSE)
 X0[mr]=NA;data0=X0
 MRPCA(data=data,data0=data0,real=FALSE,example=FALSE)

orange

Description

orange

Usage

data("orange")data("orange")

Format

The format is: num [1:12, 1:8] 4.79 4.58 4.71 6.58 NA ... - attr(*, "dimnames")=List of 2 ..$ : chr [1:12] "1" "2" "3" "4" ... ..$ : chr [1:8] "Color.intensity" "Odor.intensity" "Attack.intensity" "Sweet" ...

Details

There are 19 missing values in the data set, accounting for 19.79 percent.

Source

http://factominer.free.fr/missMDA/index.html

References

Josse J, Husson F (2016). missMDA: A Package for Handling Missing Values in Multivariate Data Analysis. Journal of Statistical Software, 70(1), 1–31.

Examples

data(orange)
## maybe str(orange) ; plot(orange) ...
data(orange)
## maybe str(orange) ; plot(orange) ...

ozone

Description

ozone

Usage

data("ozone")data("ozone")

Format

A data frame with 112 observations on the following 11 variables.

maxO3: a numeric vector
T9: a numeric vector
T12: a numeric vector
T15: a numeric vector
Ne9: a numeric vector
Ne12: a numeric vector
Ne15: a numeric vector
Vx9: a numeric vector
Vx12: a numeric vector
Vx15: a numeric vector
maxO3v: a numeric vector

Details

There are 115 missing values in it, accounting for 9.96 percent.

Source

http://factominer.free.fr/missMDA/index.html

References

Audigier, V., Husson, F., and Josse, J. (2014). A principal components method to impute missing values for mixed data. Advances in Data Analysis and Classification, 10(1), 5-26.

Examples

data(ozone)
## maybe str(ozone) ; plot(ozone) ...
data(ozone)
## maybe str(ozone) ; plot(ozone) ...

PM2.5

Description

Beijing PM2.5

Usage

data("PM2.5")data("PM2.5")

Format

The format is: num [1:43824, 1:12] 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 ... - attr(*, "dimnames")=List of 2 ..$ : chr [1:43824] "1" "2" "3" "4" ... ..$ : chr [1:12] "year" "month" "day" "hour" ...

Details

It records 43824 daily measurements on 12 variables and there are 2067 missing values on 2067 measurements, accounting for 0.00393.

Source

UCI repository

References

X. Liang, T. Zou, B. Guo, S. Li, H. Zhang, S. Zhang, H. Huang, and S. Chen. Assessing Beijing's PM2.5 pollution: severity, weather impact, APEC and winter heating. Proceedings of the Royal Society A, 471(2182):1–20, 2015.

Examples

data(PM2.5)
## maybe str(PM2.5) ; plot(PM2.5) ...
data(PM2.5)
## maybe str(PM2.5) ; plot(PM2.5) ...

review

Description

Travel reviews

Usage

data("review")data("review")

Format

The format is: num [1:980, 1:10] 0.93 1.02 1.22 0.45 0.51 0.99 0.9 0.74 1.12 0.7 ... - attr(*, "dimnames")=List of 2 ..$ : chr [1:980] "User_1" "User_2" "User_3" "User_4" ... ..$ : chr [1:10] "Category_1" "Category_2" "Category_3" "Category_4" ...

Details

980 travelers' reviews of 10 different types of travel facilities in East Asia

Source

UCI repository

References

Renjith, S., Sreekumar, A., and Jathavedan, M. (2018). Evaluation of partitioning clustering algorithms for processing social media data in tourism domain. 2018 IEEE Recent Advances in Intelligent Computational Systems (RAICS), 127-131.

Examples

 
data(review) 
## maybe str(review) ; plot(review) ... 
data(review) 
## maybe str(review) ; plot(review) ...

Caculate the estimator on the SR method

Description

Caculate the estimator on the SR method

Usage

SR(data, data0, real = TRUE, example = FALSE)
SR(data, data0, real = TRUE, example = FALSE)

Arguments

`data`	is the orignal data set
`data0`	is the missing data set
`real`	is to judge whether the data set is a real missing data set
`example`	is to judge whether the data set is a simulation example.

Value

`XSR`	is the estimator on the SR method
`MSESR`	is the MSE value of the SR method
`MAESR`	is the MAE value of the SR method
`RESR`	is the RE value of the SR method
`GCVSR`	is the GCV value of the SR method

Examples

 library(MASS)
 n=100;p=10;per=0.1
 X0=data=matrix(mvrnorm(n*p,0,1),n,p)
 m=round(per*n*p,digits=0)
 mr=sample(1:(n*p),m,replace=FALSE)
 X0[mr]=NA;data0=X0
 SR(data=data,data0=data0,real=FALSE,example=FALSE)
library(MASS)
 n=100;p=10;per=0.1
 X0=data=matrix(mvrnorm(n*p,0,1),n,p)
 m=round(per*n*p,digits=0)
 mr=sample(1:(n*p),m,replace=FALSE)
 X0[mr]=NA;data0=X0
 SR(data=data,data0=data0,real=FALSE,example=FALSE)

Package 'ISR'

Help Index

CKD

Description

Usage

Format

Details

Source

References

Examples

Caculate the estimator with the DISR method

Description

Usage

Arguments

Value

Examples

HCV

Description

Usage

Format

Details

Source

References

Examples

Caculate the estimator with the ISR method

Description

Usage

Arguments

Value

Examples

Caculate the estimator on the Mean method

Description

Usage

Arguments

Value

Examples

Caculate the estimator on the MMLPCA method

Description

Usage

Arguments

Value

Examples

Caculate the estimator on the MNIPALS method

Description

Usage

Arguments

Value

Examples

Caculate the estimator on the MRPCA method

Description

Usage

Arguments

Value

Examples

orange

Description

Usage

Format

Details

Source

References

Examples

ozone

Description

Usage

Format

Details

Source

References

Examples

PM2.5

Description

Usage

Format

Details

Source

References

Examples

review

Description