1. Basic R Programming
library(MASS)
e <- c(1:5)
d <- c(6:10)
e*d
## [1] 6 14 24 36 50
t(e)*d
## [,1] [,2] [,3] [,4] [,5]
## [1,] 6 14 24 36 50
sum(t(e)*d)
## [1] 130
t(e)%*%d
## [,1]
## [1,] 130
d%*%t(e)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 6 12 18 24 30
## [2,] 7 14 21 28 35
## [3,] 8 16 24 32 40
## [4,] 9 18 27 36 45
## [5,] 10 20 30 40 50
x1=matrix(1:20,nrow=5) #build the numeric matrix x1 of dimension
#5 4 with rst row 1, 6, 11, 16
x2=matrix(1:20,nrow=5,byrow=T) #build the numeric matrix x2 of dimension
#5 4 with rst row 1, 2, 3, 4
x3=t(x2) #transpose the matrix x2
b=x3%*%x2
b
## [,1] [,2] [,3] [,4]
## [1,] 565 610 655 700
## [2,] 610 660 710 760
## [3,] 655 710 765 820
## [4,] 700 760 820 880
sum(b)
## [1] 11380
c=x2%*%x3
c
## [,1] [,2] [,3] [,4] [,5]
## [1,] 30 70 110 150 190
## [2,] 70 174 278 382 486
## [3,] 110 278 446 614 782
## [4,] 150 382 614 846 1078
## [5,] 190 486 782 1078 1374
sum(c)
## [1] 11150
m <- runif(16)
m2 <- matrix(m, nrow=4)
m2
## [,1] [,2] [,3] [,4]
## [1,] 0.10189894 0.90168646 0.32918666 0.5923876
## [2,] 0.39018669 0.88521994 0.94812635 0.8528523
## [3,] 0.90028130 0.49344937 0.05230949 0.0183888
## [4,] 0.03540736 0.07567063 0.44852255 0.6043442
solve(m2)
## [,1] [,2] [,3] [,4]
## [1,] -0.4549106 -0.3146329 1.2651309 0.8514263
## [2,] 0.9858854 0.3718322 -0.2143544 -1.4845886
## [3,] -1.9439557 2.5941484 -0.8362530 -1.7299290
## [4,] 1.3459425 -1.9534075 0.5733549 3.0745809
solve(m2) %*% m2
## [,1] [,2] [,3] [,4]
## [1,] 1.000000e+00 -1.942890e-16 -5.551115e-17 1.110223e-16
## [2,] 2.081668e-17 1.000000e+00 1.110223e-16 1.110223e-16
## [3,] -1.665335e-16 -8.326673e-17 1.000000e+00 -2.220446e-16
## [4,] -4.163336e-17 -1.387779e-16 -4.440892e-16 1.000000e+00
m2 %*% solve(m2)
## [,1] [,2] [,3] [,4]
## [1,] 1.000000e+00 -2.220446e-16 5.551115e-17 -2.220446e-16
## [2,] 2.220446e-16 1.000000e+00 -5.551115e-17 -4.440892e-16
## [3,] 3.122502e-17 -1.387779e-17 1.000000e+00 -1.387779e-17
## [4,] 0.000000e+00 0.000000e+00 5.551115e-17 1.000000e+00
x3%*%x2
## [,1] [,2] [,3] [,4]
## [1,] 565 610 655 700
## [2,] 610 660 710 760
## [3,] 655 710 765 820
## [4,] 700 760 820 880
eigen(x3%*%x2)
## eigen() decomposition
## $values
## [1] 2.864414e+03 5.585784e+00 6.505686e-13 6.838974e-14
##
## $vectors
## [,1] [,2] [,3] [,4]
## [1,] -0.4430188 0.7097424 -0.5477226 0.0000000
## [2,] -0.4798725 0.2640499 0.7302967 0.4082483
## [3,] -0.5167262 -0.1816426 0.1825742 -0.8164966
## [4,] -0.5535799 -0.6273351 -0.3651484 0.4082483
b=matrix(1:9,ncol=3)
b
## [,1] [,2] [,3]
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
var(b)
## [,1] [,2] [,3]
## [1,] 1 1 1
## [2,] 1 1 1
## [3,] 1 1 1
b[,1]
## [1] 1 2 3
b[,2]
## [1] 4 5 6
b[,3]
## [1] 7 8 9
var(b[,1], b[,1])
## [1] 1
var(b[,1], b[,2])
## [1] 1
var(b[,1], b[,3])
## [1] 1
var(b[,2], b[,3])
## [1] 1
x=rnorm(25) #produces a N(0,1) sample of size 25
t.test(x)
##
## One Sample t-test
##
## data: x
## t = -0.71721, df = 24, p-value = 0.4802
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.5815596 0.2816076
## sample estimates:
## mean of x
## -0.149976
out=t.test(x)
names(out)
## [1] "statistic" "parameter" "p.value" "conf.int" "estimate"
## [6] "null.value" "stderr" "alternative" "method" "data.name"
Bivariate Normal distributions
Just as Normal distributions are the most important univariate distributions, joint or multivariate Normal distributions are the most important joint distributions. We mostly focus on the case of two random variables.
Jointly continuous random variables \(X\) and \(Y\) have a Bivariate Normal distribution with parameters \(\mu_X\), \(\mu_Y\), \(\sigma_X>0\), \(\sigma_Y>0\), and \(-1<\rho<1\) if the joint pdf is \[\begin{align*} f_{X, Y}(x,y) & = \frac{1}{2\pi\sigma_X\sigma_Y\sqrt{1-\rho^2}}\exp\left(-\frac{1}{2(1-\rho^2)}\left[\left(\frac{x-\mu_X}{\sigma_X}\right)^2+\left(\frac{y-\mu_Y}{\sigma_Y}\right)^2-2\rho\left(\frac{x-\mu_X}{\sigma_X}\right)\left(\frac{y-\mu_Y}{\sigma_Y}\right)\right]\right), \\ \quad -\infty <x<\infty, -\infty<y<\infty \end{align*}\] It can be shown that if the pair \((X, Y)\) has a BivariateNormal(\(\mu_X\), \(\mu_Y\), \(\sigma_X\), \(\sigma_Y\), \(\rho\)) distribution \[\begin{align*} E(X) & =\mu_X\\ E(Y) & =\mu_Y\\ SD(X) & = \sigma_X\\ SD(Y) & = \sigma_Y\\ Corr(X, Y) & = \rho \end{align*}\] A Bivariate Normal pdf is a density on \((x, y)\) pairs. The density surface looks like a mound with a peak at the point of means \((\mu_X,\mu_Y)\).
A Bivariate Normal Density has elliptical contours. For each height \(c>0\) the set \(\{(x,y): f_{X, Y}(x,y)=c\}\) is an ellipse. The density decreases as \((x, y)\) moves away from \((\mu_X, \mu_Y)\), most steeply along the minor axis of the ellipse, and least steeply along the major of the ellipse.
library(GA)
#input
mu1<-0 #mean of X_1
mu2<-0 #mean of X_2
sigma11<-1 #variance of X_1
sigma22<-1 #variance of X_2
sigma12<-0.7 #covariance of X_1 and X_2
# 3D plot
x1 <- seq(mu1-3, mu1+3, length= 500)
x2 <- seq(mu2-3, mu2+3, length= 500)
z <- function(x1,x2){ z <- exp(-(sigma22*(x1-mu1)^2+sigma11*(x2-mu2)^2-2*sigma12*(x1-mu1)*(x2-mu2))/(2*(sigma11*sigma22-sigma12^2)))/(2*pi*sqrt(sigma11*sigma22-sigma12^2)) }
f <- outer(x1,x2,z)
persp3D(x1, x2, f, theta = 30, phi = 30, expand = 0.5)
# contour plot
library(plotly)
z <- function(x1,x2){ z <- exp(-(sigma22*(x1-mu1)^2+sigma11*(x2-mu2)^2-2*sigma12*(x1-mu1)*(x2-mu2))/(2*(sigma11*sigma22-sigma12^2)))/(2*pi*sqrt(sigma11*sigma22-sigma12^2)) }
f <- t(outer(x1,x2,z))
plot_ly(x=x1,y=x2,z=f,type = "contour")%>%layout(xaxis=list(title="x1"),yaxis=list(title="x2"))