12 min read

Algorithms in SICP

knitr::knit_engines$set(Racket = function(options) {
  code <- paste(options$code, collapse = '\n')
  out  <- system2(
    'Racket',  c('-e', shQuote(code)), stdout = TRUE
  )
  knitr::engine_output(options, code, out)
})

The book

1 Building Abstractions with Procedures

1.2 Procedures and the Processes They Generate

1.2.1 Linear Recursion and Iteration

# Recursive function to find factorial
recursive.factorial <- function(x) {
  if (x == 0)    return (1)
  else           return (x * recursive.factorial(x-1))
}
recursive.factorial(5)
## [1] 120

1.3 Formulating Abstractions with Higher-Order Procedures

1.3.1 Procedures as Arguments

# Recursive function for new_sum and sum_cubes
increase1 <- function(x) {
  return (x + 1)
}
cube <- function(x) {
  return (x*x*x)
}
new_sum <- function(term, a, nextstep, b) {
  if (a>b)    return (0)
  else           return (term(a)+
                         new_sum(term, nextstep(a), nextstep, b))
}

sum_cubes <- function(a,b) {
  new_sum(cube, a, increase1, b)
}
sum_cubes(1,10)
## [1] 3025
sum_cubes(10,10)
## [1] 1000
# Recursive function for sum_integers
increase1 <- function(x) {
  return (x + 1)
}
identity <- function(x) {
  return (x)
}
new_sum <- function(term, a, nextstep, b) {
  if (a>b)    return (0)
  else           return (term(a)+
                         new_sum(term, nextstep(a), nextstep, b))
}

sum_integers <- function(a,b) {
  new_sum(identity, a, increase1, b)
}
sum_integers(1,10)
## [1] 55
sum_integers(10,10)
## [1] 10

Since \[\frac{\pi}{8}=\frac{1}{1\cdot3}+\frac{1}{5\cdot7}+\frac{1}{9\cdot11}+\cdots\]

# Recursive function for pi-sum
pi_next <- function(x) {
  return (x + 4)
}
pi_term <- function(x) {
  return (1.0/(x*(x+2)))
}
new_sum <- function(term, a, nextstep, b) {
  if (a>b)    return (0)
  else           return (term(a)+
                         new_sum(term, nextstep(a), nextstep, b))
}
pi_sum <- function(a, b) {
  return (8*new_sum(pi_term, a, pi_next, b))
}

pi_sum(1,1000)
## [1] 3.139593

Since the integral can be approximated numerically using the formula \[\int_a^b f=\Bigl[f\Bigl(a+\frac{dx}{2}\Bigr)+f\Bigl(a+dx+\frac{dx}{2}\Bigr)+f\Bigl(a+2dx+\frac{dx}{2}\Bigr)+\cdots\Bigr]dx\]

# Recursive function for pi-sum

new_sum <- function(term, a, nextstep, b) {
  if (a>b)    return (0)
  else           return (term(a)+
                         new_sum(term, nextstep(a), nextstep, b))
}
integral <- function(f, a, b, dx) {
  add_dx <- function(x) {
  return (x + dx)
}
  return (new_sum(f, (a+dx/2.0), add_dx, b)*dx)
}

integral(cube, 0, 1, 0.01)
## [1] 0.2499875
integral(cube, 0, 1, 0.001)
## [1] 0.2499999

1.3.2 Constructing Procedures Using lambda

The lambda or anonymous functions in R have no identity, no name, and they will not live in the global environment.

(function(x)  x + 4 )(1)
## [1] 5
(function(x) (1.0/(x*(x+2))))(1)
## [1] 0.3333333

Then the pi-sum procedure can be expressed without defining any auxiliary procedures as:

new_sum <- function(term, a, nextstep, b) {
  if (a>b)    return (0)
  else           return (term(a)+
                         new_sum(term, nextstep(a), nextstep, b))
}
pi_sum <- function(a, b) {
  return (8*new_sum((function(x) (1.0/(x*(x+2)))), a, (function(x)  x + 4), b))
}
pi_sum(1,1000)
## [1] 3.139593

We can write the integral procedure without having to define the auxiliary procedure add-dx

# Recursive function for pi-sum
new_sum <- function(term, a, nextstep, b) {
  if (a>b)    return (0)
  else        return (term(a)+
                         new_sum(term, nextstep(a), nextstep, b))
}
integral <- function(f, a, b, dx) {
  return (new_sum(f, (a+dx/2.0), function(x)(x + dx), b)*dx)
}

integral(cube, 0, 1, 0.01)
## [1] 0.2499875
integral(cube, 0, 1, 0.001)
## [1] 0.2499999

1.3.3 Procedures as General Methods

The half-interval method is a simple but powerful technique for finding roots of an equation \(f(x) = 0\), where \(f\) is a continuous function. The idea is that, if we are given points \(a\) and \(b\) such that \(f (a) < 0 < f (b)\), then \(f\) must have at least one zero between \(a\) and \(b\). To locate a zero, let \(x\) be the average of \(a\) and \(b\), and compute \(f (x)\). If \(f (x) > 0\), then \(f\) must have a zero between \(a\) and \(x\). If \(f (x) < 0\), then \(f\) must have a zero between \(x\) and \(b\). Continuing in this way, we can identify smaller and smaller intervals on which \(f\) must have a zero. Since the interval of uncertainty is reduced by half at each step of the process, the number of steps required grows as \(\Theta(log(L/T))\), where \(L\) is the length of the original interval and \(T\) is the error tolerance (that is, the size of the interval we will consider “small enough”).
The procedure checks to see which of the endpoints has a negative function value and which has a positive value, and calls the search procedure accordingly. If the function has the same sign on the two given points, the half-interval method cannot be used, in which case the procedure signals an error. Since \(\pi\) is the root of \(\sin(x)=0,\quad 0<x<2\pi\)

search <- function(f, neg_point, pos_point) {
  midpoint <- (neg_point + pos_point)/2.0
  if(abs(neg_point-pos_point) < 0.0001) return(midpoint)
  else if (f(midpoint) > 0)    return(search(f, neg_point, midpoint))
  else if (f(midpoint) < 0)   return(search(f, midpoint, pos_point))
  else return(midpoint)
}

half_interval_method <- function(f, a, b){
  a_value <- f(a)
  b_value <- f(b)
  if (a_value < 0 && b_value > 0)    return(search(f, a, b))
  else if (a_value > 0 && b_value < 0)    return(search(f, b, a))
  else return("error: Function values are not of opposite sign")
}

half_interval_method(sin, 2.0, 4.0)
## [1] 3.141571
half_interval_method(sin, 3.5, 4.0)
## [1] "error: Function values are not of opposite sign"

Using the half-interval method to search for a root of the equation \[x^3-2x-3=0, \quad 1<x<2\]

half_interval_method(function(x)(x^3-2*x-3), 1.0, 2.0)
## [1] 1.89328

A number \(x\) is called a fixed point of a function \(f\) if \(x\) satisfies the equation \(f (x) = x\). we can devise a procedure fixed-point that takes as inputs a function and an initial guess and produces an approximation to a fixed point of the function. We apply the function repeatedly until we find two successive values whose difference is less than some prescribed tolerance:

tolerance <- 0.00001
fixed_point <- function(f, first_guess) {
  close_enough <- function(v1, v2){
    return(abs(v1-v2) < tolerance)
  }
  tryguess <- function(guess) {
    nextvalue <- f(guess)
    if (close_enough(guess, nextvalue)) return(nextvalue)
    else tryguess(nextvalue)
  }
  tryguess(first_guess)
}
fixed_point(cos, 1.0)
## [1] 0.7390823

We can find a solution to the equation \[y=\sin y+\cos y\]

fixed_point(function(y)(sin(y)+cos(y)), 1.0)
## [1] 1.258732

We can formulate the square-root computation as a fixed-point search. If \(y=\sqrt{x}\) then \(y^2=x\) and \(y=x/y\), we can therefore try to compute square roots as

square_root <- function(x){
  fixed_point(function(y)(x/y), 1.0)
}

Unfortunately, this fixed-point search does not converge. Consider an initial guess \(y_1\). The next guess is \(y_2 = x/y_1\) and the next guess is \(y_3 =x/y_2 = x/(x/y_1) = y_1\). This results in an infinite loop in which the two guesses \(y_1\) and \(y_2\) repeat over and over, oscillating about the answer. Since the answer is always between our guess \(y\) and \(x/y\), we can make a new guess that is not as far from \(y\) as \(x/y\) by averaging \(y\) with \(x/y\), so that the next guess after \(y\) is \(\frac{1}{2} (y + x/y)\) instead of \(x/y\). This approach of averaging successive approximations to a solution, a technique that we call average damping, often aids the convergence of fixed-point searches.

square_root <- function(x){
  fixed_point(function(y)((y+x/y)/2.0), 1.0)
}
square_root(2)
## [1] 1.414214

1.3.4 Procedures as Returned Values

We can express the idea of average damping by means of the following procedure:

average_damp <- function(f){
  function(x)((x+f(x))/2.0)
}
square <- function(x){
  return(x^2)
}
(average_damp(square))(10)
## [1] 55

Average-damp is a procedure that takes a procedure \(f\) as its argument and returns a procedure (produced by the lambda function) as its value that, when applied to a number \(x\), produces the average of \(x\) and \(f(x)\). Using average-damp, we can reformulate the square-root procedure as follows:

square_root <- function(x){
  fixed_point(average_damp(function(y)(x/y)), 1.0)
}
square_root(2)
## [1] 1.414214
square_root(3)
## [1] 1.732051

Notice that the cube root of \(x\) is a fixed point of the function \(y \mapsto x/y^2\), so we can immediately generalize our square-root procedure to one that extracts cube roots:

cube_root <- function(x){
  fixed_point(average_damp(function(y)(x/(y^2))), 1.0)
}
cube_root(2)
## [1] 1.259923
cube_root(3)
## [1] 1.442252

Newton’s method uses tangent lines of the graph of \(y = ƒ(x)\) near the points where \(ƒ(x)=0\) to estimate the solution. The goal of Newton’s method for estimating a solution of an equation \(ƒ(x) = 0\) is to produce a sequence of approximations that approach the solution. The initial estimate, \(x_0\), may be found by just plain guessing. The method then uses the tangent to the curve \(y = ƒ(x)\) at \((x_0, ƒ(x_0))\) to approximate the curve, calling the point \(x_1\) where the tangent meets the \(x\)-axis. The number \(x_1\) is usually a better approximation to the solution than is \(x_0\). The point \(x_2\) where the tangent to the curve at \((x_1, ƒ(x_1))\) crosses the \(x\)-axis is the next approximation in the sequence. We continue on, using each approximation to generate the next, until we are close enough to the root to stop.
Given the approximation \(x_n\), the point-slope equation for the tangent to the curve at \((x_n,ƒ(x_n))\) is \[y = ƒ(x_n) + ƒ'(x_n)(x - x_n)\] We can find where it crosses the \(x\)-axis by setting \(y=0\), \[0 = ƒ(x_n) + ƒ'(x_n)(x - x_n)\] \[x - x_n=-\frac{ƒ(x_n)}{ƒ'(x_n)}\] \[x=x_n-\frac{ƒ(x_n)}{ƒ'(x_n)}\] This value of \(x\) is the next approximation \(x_{n+1}\). Then a solution of the equation \(f(x)=0\) is a fixed point of the function \[x \mapsto x-\frac{f(x)}{f'(x)}\] Newton’s method is the use of the fixed-point method to approximate a solution of the equation by finding a fixed point of the function \[x-\frac{f(x)}{f'(x)}\]
We can express the idea of derivative (taking \(dx\) to be, say, 0.00001) as the procedure:

dx <- 0.00001
deriv <- function(f){
  function(x)((f(x+dx)-f(x))/dx)
}
cube <- function(x) {
  return (x*x*x)
}
deriv(cube)(5)
## [1] 75.00015
(3*5^2)
## [1] 75

Like average-damp, deriv is a procedure that takes a procedure as argument and returns a procedure as value. With the aid of deriv, we can express Newton’s method as a fixed-point process:

newton_transform <- function(f){
  function(x)(x-f(x)/deriv(f)(x))
}
newtons_method <- function(f, guess){
  fixed_point(newton_transform(f), guess)
}

The newtons-method takes as arguments a procedure that computes the function for which we want to find a zero, together with an initial guess. For instance, to find the square root of \(x\), we can use Newton’s method to find a zero of the function \(y \mapsto y^2 - x\) starting with an initial guess of \(1\). This provides yet another form of the square-root procedure:

square_root2 <- function(x){
  newtons_method(function(y)(y^2-x), 1.0)
}
square_root2(2)
## [1] 1.414214

Since Newton’s method was itself expressed as a fixed-point process, we actually saw two ways to compute square roots as fixed points. Each method begins with a function and finds a fixed point of some transformation of the function. We can express this general idea itself as a procedure:

fixed_point_of_transform <- function(f, transform, guess){
  fixed_point(transform(f), guess)
}

This very general procedure takes as its arguments a procedure \(f\) that computes some function, a procedure that transforms \(f\), and an initial guess. The returned result is a fixed point of the transformed function.
The first square-root computation is a fixed point of the average-damped version of \(y \mapsto x/y\),

square_root3 <- function(x){
  fixed_point_of_transform(function(y)(x/y), average_damp, 1.0)
}
square_root3(2)
## [1] 1.414214

The second square-root computation is a fixed point of the Newton transform of \(y \mapsto y^2-x\),

square_root4 <- function(x){
  fixed_point_of_transform(function(y)(y^2-x), newton_transform, 1.0)
}
square_root4(2)
## [1] 1.414214

2 Building Abstractions with Data

2.1 Introduction to Data Abstraction

2.1.1 Example: Arithmetic Operations for Rational Numbers

The name cons stands for “construct.” The names “car” and “cdr” derive from the original implementation of Lisp on the IBM 704. That machine had an addressing scheme that allowed one to reference the “address” and “decrement” parts of a memory location. “car” stands for “Contents of Address part of Register” and “cdr” (pronounced “could-er”) stands for “Contents of Decrement part of Register.”

knitr::knit_engines$set(Racket = function(options) {
  code <- paste(options$code, collapse = '\n')
  out  <- system2(
    'Racket',  c('-e', shQuote(code)), stdout = TRUE
  )
  knitr::engine_output(options, code, out)
})
(define x (cons 1 2))
(car x)
(cdr x)
## 1
## 2
(define x (cons 1 2))
(define y (cons 3 4))
(define z (cons x y))
z
(car z)
(car (car z))
(car (cdr z))
## '((1 . 2) 3 . 4)
## '(1 . 2)
## 1
## 3

Using R:

a <- list(1, 2) 
b <- list(3, 4) 
x <- list(a,b) 
cons <- function(a, b) {
  list <- list(a, b)
}
cons(a,b)
length(x)
## [1] 2
car <- function(x) x[[1]] 
cdr <- function(x) x[[2:length(x)]]
car(car(x))
## [1] 1
cdr(car(x))
## [1] 2
car(cdr(x))
## [1] 3
cdr(cdr(x))
## [1] 4

Representing rational numbers

make_rat <- function(a, b) cons(a,b)
numer <- function(x) car(x)
denom <- function(x) cdr(x)
print_rat <- function(x){
  paste0(numer(x),"/",denom(x))
}
x <- make_rat(1,2)
print_rat(x)
## [1] "1/2"

Since \[\frac{n_1}{d_1}+\frac{n_2}{d_2}=\frac{n_1d_2+n_2d_1}{d_1d_2}\]

one_half <- make_rat(1,2)
one_third <- make_rat(1,3)
add_rat <- function(x, y){
  make_rat(numer(x)*denom(y)+numer(y)*denom(x), denom(x)*denom(y))
}
print_rat(add_rat(one_half, one_third))
## [1] "5/6"

We also can only use procedures for the data construct:

(define (cons x y)
  (define (dispatch m)
    (cond ((= m 0) x)
      ((= m 1) y)
      (else (error "Argument not 0 or 1: CONS" m))))
dispatch)
(define (car z) (z 0))
(define (cdr z) (z 1))
((cons 8 9) 0)
((cons 8 9) 1)
(car(cons 8 9))
(cdr(cons 8 9))
## 8
## 9
## 8
## 9
cons2 <- function(x, y){
  dispatch <- function(m){
    if(m==0) x
    else if(m==1) y
      else ("error: Argument not 0 or 1")
  }
  dispatch
}
car2 <- function(z) z(0) 
cdr2 <- function(z) z(1)
cons2(4,5)(1)
## [1] 5
cons2(4,5)(2)
## [1] "error: Argument not 0 or 1"
car2(cons2(4,5))
## [1] 4
cdr2(cons2(4,5))
## [1] 5

2.2 Hierarchical Data and the Closure Property

In Racket:

(cons (cons 1 2) (cons 3 4))
## '((1 . 2) 3 . 4)

In R:

cons((cons(1, 2)), (cons(3, 4)))
z <- cons((cons(1, 2)), (cons(3, 4)))
car(z)
## [[1]]
## [1] 1
## 
## [[2]]
## [1] 2
car(car(z))
## [1] 1
y <- cons((cons(1, (cons(2, 3)))), 4)
car(y)
## [[1]]
## [1] 1
## 
## [[2]]
## [[2]][[1]]
## [1] 2
## 
## [[2]][[2]]
## [1] 3

2.2.1 Representing Sequences: list

List is equivalent to a sequence of nested constructions (which is different with list in R):

(cons 1 (cons 2 (cons 3 (cons 4 null)) ))
(list 1 2 3 4)
(define one-through-four (list 1 2 3 4))
one-through-four
(car one-through-four)
(cdr one-through-four)
(cons 10 one-through-four)
## '(1 2 3 4)
## '(1 2 3 4)
## '(1 2 3 4)
## 1
## '(2 3 4)
## '(10 1 2 3 4)
cons <- function(x, y) {
  list <- list(x, y)
}
z <- cons(1, cons(2, cons(3, 4)))
cdr(z)
## [[1]]
## [1] 2
## 
## [[2]]
## [[2]][[1]]
## [1] 3
## 
## [[2]][[2]]
## [1] 4
cdr(cdr(z))
## [[1]]
## [1] 3
## 
## [[2]]
## [1] 4
cdr(cdr(cdr(z)))
## [1] 4

BIBLIOGRAPHY

1. Abelson GJ Harold; Sussman. Structure and interpretation of computer programs. The MIT Press. ISBN 9780262510875; 1996.