Max R. P. Grossmann

Max R. P. Grossmann

Rectified Gaussian distribution in R

Posted: 2021-10-14 · Last updated: 2022-03-04

See here for further details and here for an application. Not to be confused with the truncated normal distribution.

a is the lower bound, b the upper bound; mean and sd are expectation and standard deviation of the underlying unrestricted (i.e. a = -Inf and b = +Inf) normal distribution.

drectnorm <- function(x, a = 0, b = 1, mean = 0, sd = 1) {
    if (!all(a <= b)) stop("Please set b such that it is less than or equal to a.")

    ifelse(x < a | x > b,
        0,
        ifelse(x == a,
            pnorm(a, mean, sd),
            ifelse(x == b,
                1 - pnorm(b, mean, sd),
                dnorm(x, mean, sd))))
}

prectnorm <- function(q, a = 0, b = 1, mean = 0, sd = 1) {
    if (!all(a <= b)) stop("Please set b such that it is less than or equal to a.")

    ifelse(q < a,
        0,
        ifelse(q >= b,
            1,
            pnorm(q, mean, sd)))
}

qrectnorm <- function(p, a = 0, b = 1, mean = 0, sd = 1) {
    if (!all(a <= b)) stop("Please set b such that it is less than or equal to a.")

    ifelse(p < 0 | p > 1,
        NaN,
        ifelse(p <= pnorm(a, mean, sd),
            a,
            ifelse(p >= pnorm(b, mean, sd),
                b,
                qnorm(p, mean, sd))))
}

rrectnorm <- function(n, a = 0, b = 1, mean = 0, sd = 1) {
    if (!all(a <= b)) stop("Please set b such that it is less than or equal to a.")

    pmax(a,
        pmin(b,
            rnorm(n, mean, sd)))
}

Application: Rediscovering the Tobit model

set.seed(12345)

# generate some data
x <- rnorm(1000)
y <- rrectnorm(1000, a = 0, b = 5, mean = -1 + 5*x, sd = 3)

# OLS would not be the correct model:
summary(lm(y ~ x))

# let's build our own:
library(bbmle)

# to avoid infinite values in the likelihood function:
punish.inf <- function (x) ifelse(is.infinite(x), sign(x)*1000, x)

negll <- function (b0, b1, sigma) {
    -sum(punish.inf(log(drectnorm(y, 0, 5, b0 + b1*x, sigma))))
    # note that we model mean and sd of the latent (uncensored) variable
}

m1 <- mle2(negll,
    start = list(b0 = 1, b1 = 1, sigma = 1),
    method = "L-BFGS-B",
    lower = list(b0 = -10, b1 = -10, sigma = 0.01),
    upper = list(b0 = 10, b1 = 10, sigma = 100))

# looks very good:
summary(m1)
# the true values are, see above,
# b0 = -1, b1 = 5, sigma = 3

# let's now compare this to a Tobit model:
library(VGAM)

m2 <- vglm(y ~ x, tobit(Lower = 0, Upper = 5))

summary(m2)

# looks quite familiar!!!!! such a coincidence!