元理系が勉強: 12月 2015

2015年12月27日日曜日

[Rの]Andrew先生の機械学習2-2[練習]

2回目後半は線形分離できない2次元データのロジスティック回帰。
黄色○と+を識別したいが、どう見ても直線では識別できなさそう。

n次の空間に射影し、(n + 2)!/(n! * 2!)次元空間で線形分離する。
講義の問題と同様n = 6としてみる

R
ex2_2 <- function()
{
    # ----- load data -----
    data = read.csv('ex2data2.txt', header = F)
    X <- data[, 1:2]
    y <- data[, 3]

    # ----- plot -----
    plot(0, 0, type = "n", xlim = c(0.9 * min(X[, 1]), 1.1 * max(X[, 1])), ylim = c(0.9 * min(X[, 2]), 1.1 * max(X[, 2])),xlab = "x1", ylab = "x2")
    points(X, col = ifelse(y == 1, "black", "yellow"), pch = ifelse(y == 1, 3, 16))
    legend("topleft", legend = c("y = 1", "y = 0"), pch = c(3, 16), col = c("black", "yellow"))
    par(new = T)

    # ----- variables -----
    mappedX = mapFeature(X[, 1], X[, 2])
    m = length(mappedX[,1])
    n = length(mappedX[1,])

    # ----- mapFeatureでバイアス項が足されている
    initial_theta = rep(0, n)

    # ----- optimize -----
    print("start optimization")
    lambda = 1
    result <- optim(par = initial_theta, fn = costFunctionReg, gr = costGr, method = "CG", X = mappedX, y = y, lambda = lambda)
    theta = result$par
    plotDB2(theta, X, y)

    p = round(sigmoid(mappedX %*% theta))
    cat("Train Accuracy : ", mean(p == y) * 100, "\n")
    return(result)
}

mapFeature <- function(X1, X2)
{
    degree = 6
    out = matrix(1, length(X1), 1)
    for (i in 1 : degree) {
        for (j in 0 : i) {
            out = cbind(out, X1 ^ (i-j) * X2 ^ j)
        }
    }
    return(out)
}

costFunctionReg <- function(theta, X, y, lambda)
{
    m = length(y)
    predicts = X %*% theta

    probs = sigmoid(predicts)
    costs = -y * log(probs) - (1 - y) * log(1 - probs)
    J = sum(costs) / m

    # ----- regularize -----
    J = J + lambda / (2 * m) * sum(theta[-1] ^ 2) # theta[-1]は1番目以外の要素
    return(J)
}
costGr <- function(theta, X, y, lambda)
{
    m = length(y)
    predicts = X %*% theta

    probs = sigmoid(predicts)
    grad = (t(X) %*% (probs - y)) / m + lambda / m * theta
    grad[1] = (t(X)[1, ] %*% (probs - y)) / m
    return(grad)
}

plotDB2 <- function(theta, X, y)
{
    num_points = 50
    u <- seq(0.9 * min(X[, 1]), 1.1 * max(X[, 1]), length = num_points)
    v <- seq(0.9 * min(X[, 2]), 1.1 * max(X[, 2]), length = num_points)
    z <- matrix(0, nrow = length(u), ncol = length(v))
    for (i in 1 : length(u)) {
        for (j in 1 : length(v)) {
            z[i, j] = mapFeature(u[i], v[j]) %*% theta
        }
    }
    contour(u, v, z, xlim = c(0.9 * min(X[, 1]), 1.1 * max(X[, 1])), ylim = c(0.9 * min(X[, 2]), 1.1 * max(X[, 2])))

}
optim関数にmethod = "CG"を指定しているのは、デフォルトだと識別があまりよくならなかったため。

結果

Train Accuracy : 83.05085

n = 2でやってみた

　　Train Accuracy : 81.35593
こっちでもいいんでないかという気もするレベル。
nを決めるには横軸n、縦軸TrainAccuracyのグラフを描いてみればいいんだったか。
あるいは縦軸は汎化能力か

コスト関数の値と勾配を別の関数で計算しているせいで、無駄な計算をしている。
matlabの最適化関数では、関数の返り値を多次元にして勾配を返してやるとそれを使って最適化してくれていたのだけど、Rのoptimは同じことはできないらしい。

fn
A function to be minimized (or maximized), with first argument the vector of parameters over which minimization is to take place. It should return a scalar result.

2015年12月23日水曜日

[Rの]Andrew先生の機械学習2-1[練習]

2回目のお題はロジスティック回帰
簡単な関数いくつか作って、後はＲの最適化関数を使うだけ

ex2 <- function()
{
    # ----- load data -----
    data = read.csv('ex2data1.txt', header = F)
    X <- data[, 1:2]
    y <- data[, 3]

    # ----- plot -----
    plot(0, 0, type = "n", xlim = c(0.9 * min(X[, 1]), 1.1 * max(X[, 1])), ylim = c(0.9 * min(X[, 2]), 1.1 * max(X[, 2])),xlab = "x1", ylab = "x2")
    points(X, col = ifelse(y == 1, "black", "yellow"), pch = ifelse(y == 1, 3, 16))

    # ----- variables -----
    X_mat <- cbind(X[, 1],X[, 2])
    m <- length(X[,1])
    n <- length(X[1,])

    # ----- add bias -----
    initial_theta <- rep(0, n + 1)
    X_mat <- cbind(rep(1, m), X_mat)

    # ----- optimize -----
    result <- optim(par = initial_theta, costFunction, X = X_mat, y = y)
    theta <- result$par
    plotDecisionBoundary(theta, X, y)
}

sigmoid <- function(x)
{
    return(1 / (1 + exp(-x)))
}

costFunction <- function(theta, X, y)
{
    m <- length(y)
    predicts <- X %*% theta

    probs <- sigmoid(predicts)
    costs <- -y * log(probs) - (1 - y) * log(1 - probs)
    J <- sum(costs) / m

    return(J)
}

plotDecisionBoundary <- function(theta, X, y)
{
    # ----- plot -----
    plot(0, 0, type = "n", xlim = c(0.9 * min(X[, 1]), 1.1 * max(X[, 1])), ylim = c(0.9 * min(X[, 2]), 1.1 * max(X[, 2])),xlab = "x1", ylab = "x2")
    points(X, col = ifelse(y == 1, "black", "yellow"), pch = ifelse(y == 1, 3, 16))
    par(new=T)

    x1 <- c(min(X[, 1]), max(X, 1))
    x2 <- -1 / theta[3] *( theta[2] * x1 +theta[1])
    plot(x1, x2, ,type = 'l', xlim = c(0.9 * min(X[, 1]), 1.1 * max(X[, 1])), ylim = c(0.9 * min(X[, 2]), 1.1 * max(X[, 2])))
}

----- 追記 -----
ちなみにglm関数を使うと
ex2_glm <- function()
{
    # ----- load data -----
    data = read.csv('ex2data1.txt', header = F)
    X <- data[, 1:2]
    y <- data[, 3]

    # ----- GLM -----
    fit <- glm(formula = y ~ V1 + V2 + 1, family = binomial, data = X)
    theta <- fit$coef
    plotDecisionBoundary(theta, X, y)
    print(summary(fit))
    return(fit)
}

2015年12月21日月曜日

[Rの]Andrew先生の機械学習1[練習]

折角Ａｎｄｒｅｗ先生の講義を一通り終えたので、復習とRの練習のため、練習問題をRに移植。
matlabに慣れていたのもあって、講義の問題はだいたい2時間以内で終わっていたけれども、Ｒではもっと時間がかかりそう。最後まで続けられるか微妙。

最初のお題は線形回帰
Rの組み込み関数lmを使った。
plot関数を重ねて使うとあまりよくないらしい。

ex1 <- function()
{
    data = read.csv('ex1data1.txt', header = F)
    X <- data[, 1]
    y <- data[, 2]
    plot(X, y, xlim = c(0.9 * min(X), 1.1 * max(X)), ylim = c(0.9 * min(y), 1.1 * max(y)))
    par(new=T)

    m = length(y) # number of training examples
    fit <- lm(y ~ X)
    theta <- c(coef(fit))
    print(theta)

    xx <- c(min(X), max(X))
    yy <- theta[2] * xx + theta[1]
    # if it were not for xlim & ylim the plotted axis is worngly overlapped
    plot(xx, yy, type = 'l', xlim = c(0.9 * min(X), 1.1 * max(X)), ylim = c(0.9 * min(y), 1.1 * max(y)), xlab = "", ylab = "")
}

bloggerでは手軽にコードを記載する方法が見つからなかった。
はてなではとても簡単にコードを色分けして表示できそうでかっこよかった。