/*
 * Decompiled with CFR 0.152.
 */
package org.apache.sysml.scripts.algorithms;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import org.apache.sysml.api.mlcontext.MLResults;
import org.apache.sysml.api.mlcontext.Matrix;
import org.apache.sysml.api.mlcontext.Script;
import org.apache.sysml.scripts.algorithms.stepglm.Binomial_probability_two_column_output;
import org.apache.sysml.scripts.algorithms.stepglm.Get_CG_Steihaug_point_output;
import org.apache.sysml.scripts.algorithms.stepglm.Get_trust_boundary_point_output;
import org.apache.sysml.scripts.algorithms.stepglm.Glm_dist_output;
import org.apache.sysml.scripts.algorithms.stepglm.Glm_initialize_output;
import org.apache.sysml.scripts.algorithms.stepglm.Glm_log_likelihood_part_output;

public class StepGLM
extends Script {
    public StepGLM() {
        String string = "scripts/algorithms/StepGLM.dml";
        InputStream inputStream = Script.class.getResourceAsStream(new StringBuffer().append("/").append(string).toString());
        InputStreamReader inputStreamReader = new InputStreamReader(inputStream);
        char[] cArray = new char[1024];
        StringBuilder stringBuilder = new StringBuilder();
        try {
            int n;
            while ((n = inputStreamReader.read(cArray)) > 0) {
                stringBuilder.append(cArray, 0, n);
            }
        }
        catch (IOException iOException) {
            iOException.printStackTrace();
        }
        this.setScriptString(stringBuilder.toString());
    }

    public Get_trust_boundary_point_output get_trust_boundary_point(Object object, Object object2, Object object3, Object object4, Object object5, Object object6, Object object7, Object object8) {
        String string = "source('scripts/algorithms/StepGLM.dml') as mlcontextns;[new_z, f_change] = mlcontextns::get_trust_boundary_point(g, z, p, q, r, pp, pq, trust_delta_sq);";
        Script script = new Script(string);
        script.in("g", object).in("z", object2).in("p", object3).in("q", object4).in("r", object5).in("pp", object6).in("pq", object7).in("trust_delta_sq", object8).out("new_z").out("f_change");
        MLResults mLResults = script.execute();
        Matrix matrix = mLResults.getMatrix("new_z");
        double d = mLResults.getDouble("f_change");
        Get_trust_boundary_point_output get_trust_boundary_point_output = new Get_trust_boundary_point_output(matrix, d);
        return get_trust_boundary_point_output;
    }

    public String get_trust_boundary_point__docs() {
        String string = "get_trust_boundary_point = \n  function (Matrix[double] g, Matrix[double] z, Matrix[double] p, \n            Matrix[double] q, Matrix[double] r, double pp, double pq, \n            double trust_delta_sq)\n    return (Matrix[double] new_z, double f_change)\n{\n      zz = sum (z * z);  pz = sum (p * z);\n      sq_root_d = sqrt (pz * pz - pp * (zz - trust_delta_sq));\n      tau_1 = (- pz + sq_root_d) / pp;\n      tau_2 = (- pz - sq_root_d) / pp;\n      zq = sum (z * q);  gp = sum (g * p);\n      f_extra = 0.5 * sum (z * (r + g));\n      f_change_1 = f_extra + (0.5 * tau_1 * pq + zq + gp) * tau_1;\n      f_change_2 = f_extra + (0.5 * tau_2 * pq + zq + gp) * tau_2;\n      ind1 = as.integer(f_change_1 < f_change_2);\n      ind2 = as.integer(f_change_1 >= f_change_2);\n      new_z = z + ((ind1 * tau_1 + ind2 * tau_2) * p);\n      f_change = ind1 * f_change_1 + ind2 * f_change_2;\n}\n";
        return string;
    }

    public String get_trust_boundary_point__source() {
        String string = "get_trust_boundary_point = \n  function (Matrix[double] g, Matrix[double] z, Matrix[double] p, \n            Matrix[double] q, Matrix[double] r, double pp, double pq, \n            double trust_delta_sq)\n    return (Matrix[double] new_z, double f_change)\n{\n      zz = sum (z * z);  pz = sum (p * z);\n      sq_root_d = sqrt (pz * pz - pp * (zz - trust_delta_sq));\n      tau_1 = (- pz + sq_root_d) / pp;\n      tau_2 = (- pz - sq_root_d) / pp;\n      zq = sum (z * q);  gp = sum (g * p);\n      f_extra = 0.5 * sum (z * (r + g));\n      f_change_1 = f_extra + (0.5 * tau_1 * pq + zq + gp) * tau_1;\n      f_change_2 = f_extra + (0.5 * tau_2 * pq + zq + gp) * tau_2;\n      ind1 = as.integer(f_change_1 < f_change_2);\n      ind2 = as.integer(f_change_1 >= f_change_2);\n      new_z = z + ((ind1 * tau_1 + ind2 * tau_2) * p);\n      f_change = ind1 * f_change_1 + ind2 * f_change_2;\n}\n";
        return string;
    }

    public Get_CG_Steihaug_point_output get_CG_Steihaug_point(Object object, Object object2, Object object3, Object object4, Object object5, Object object6, Object object7, Object object8, Object object9) {
        String string = "source('scripts/algorithms/StepGLM.dml') as mlcontextns;[z, neg_log_l_change, i_CG, reached_trust_boundary] = mlcontextns::get_CG_Steihaug_point(X, scale_X, shift_X, w, g, beta, lambda, trust_delta, max_iter_CG);";
        Script script = new Script(string);
        script.in("X", object).in("scale_X", object2).in("shift_X", object3).in("w", object4).in("g", object5).in("beta", object6).in("lambda", object7).in("trust_delta", object8).in("max_iter_CG", object9).out("z").out("neg_log_l_change").out("i_CG").out("reached_trust_boundary");
        MLResults mLResults = script.execute();
        Matrix matrix = mLResults.getMatrix("z");
        double d = mLResults.getDouble("neg_log_l_change");
        long l = mLResults.getLong("i_CG");
        long l2 = mLResults.getLong("reached_trust_boundary");
        Get_CG_Steihaug_point_output get_CG_Steihaug_point_output = new Get_CG_Steihaug_point_output(matrix, d, l, l2);
        return get_CG_Steihaug_point_output;
    }

    public String get_CG_Steihaug_point__docs() {
        String string = "get_CG_Steihaug_point =\n  function (Matrix[double] X, Matrix[double] scale_X, Matrix[double] shift_X, Matrix[double] w,\n            Matrix[double] g, Matrix[double] beta, Matrix[double] lambda, double trust_delta, int max_iter_CG)\n    return (Matrix[double] z, double neg_log_l_change, int i_CG, int reached_trust_boundary)\n{\n      trust_delta_sq = trust_delta ^ 2;\n      size_CG = nrow (g);\n      z = matrix (0.0, rows = size_CG, cols = 1);\n      neg_log_l_change = 0.0;\n      reached_trust_boundary = 0;\n      g_reg = g + lambda * beta;\n      r_CG = g_reg;\n      p_CG = -r_CG;\n      rr_CG = sum(r_CG * r_CG);\n      eps_CG = rr_CG * min (0.25, sqrt (rr_CG));\n      converged_CG = 0;\n      if (rr_CG < eps_CG) {\n        converged_CG = 1;\n      }\n      \n      max_iteration_CG = max_iter_CG;\n      if (max_iteration_CG <= 0) {\n        max_iteration_CG = size_CG;\n      }\n      i_CG = 0;\n      while (converged_CG == 0)\n      {\n        i_CG = i_CG + 1;\n        ssX_p_CG = diag (scale_X) %*% p_CG;\n        ssX_p_CG [size_CG, ] = ssX_p_CG [size_CG, ] + t(shift_X) %*% p_CG;\n        temp_CG = t(X) %*% (w * (X %*% ssX_p_CG));\n        q_CG = (lambda * p_CG) + diag (scale_X) %*% temp_CG + shift_X %*% temp_CG [size_CG, ];\n        pq_CG = sum (p_CG * q_CG);\n        if (pq_CG <= 0) {\n          pp_CG = sum (p_CG * p_CG);  \n          if (pp_CG > 0) {\n            [z, neg_log_l_change] = \n              get_trust_boundary_point (g_reg, z, p_CG, q_CG, r_CG, pp_CG, pq_CG, trust_delta_sq);\n            reached_trust_boundary = 1;\n          } else {\n            neg_log_l_change = 0.5 * sum (z * (r_CG + g_reg));\n          }\n          converged_CG = 1;\n        }\n        if (converged_CG == 0) {\n          alpha_CG = rr_CG / pq_CG;\n          new_z = z + alpha_CG * p_CG;\n          if (sum(new_z * new_z) >= trust_delta_sq) {\n            pp_CG = sum (p_CG * p_CG);  \n            [z, neg_log_l_change] = \n              get_trust_boundary_point (g_reg, z, p_CG, q_CG, r_CG, pp_CG, pq_CG, trust_delta_sq);\n            reached_trust_boundary = 1;\n            converged_CG = 1;\n          }\n          if (converged_CG == 0) {\n            z = new_z;\n            old_rr_CG = rr_CG;\n            r_CG = r_CG + alpha_CG * q_CG;\n            rr_CG = sum(r_CG * r_CG);\n            if (i_CG == max_iteration_CG | rr_CG < eps_CG) {\n              neg_log_l_change = 0.5 * sum (z * (r_CG + g_reg));\n              reached_trust_boundary = 0;\n              converged_CG = 1;\n            }\n            if (converged_CG == 0) {\n              p_CG = -r_CG + (rr_CG / old_rr_CG) * p_CG;\n            }   }   }   }   }\n";
        return string;
    }

    public String get_CG_Steihaug_point__source() {
        String string = "get_CG_Steihaug_point =\n  function (Matrix[double] X, Matrix[double] scale_X, Matrix[double] shift_X, Matrix[double] w,\n            Matrix[double] g, Matrix[double] beta, Matrix[double] lambda, double trust_delta, int max_iter_CG)\n    return (Matrix[double] z, double neg_log_l_change, int i_CG, int reached_trust_boundary)\n{\n      trust_delta_sq = trust_delta ^ 2;\n      size_CG = nrow (g);\n      z = matrix (0.0, rows = size_CG, cols = 1);\n      neg_log_l_change = 0.0;\n      reached_trust_boundary = 0;\n      g_reg = g + lambda * beta;\n      r_CG = g_reg;\n      p_CG = -r_CG;\n      rr_CG = sum(r_CG * r_CG);\n      eps_CG = rr_CG * min (0.25, sqrt (rr_CG));\n      converged_CG = 0;\n      if (rr_CG < eps_CG) {\n        converged_CG = 1;\n      }\n      \n      max_iteration_CG = max_iter_CG;\n      if (max_iteration_CG <= 0) {\n        max_iteration_CG = size_CG;\n      }\n      i_CG = 0;\n      while (converged_CG == 0)\n      {\n        i_CG = i_CG + 1;\n        ssX_p_CG = diag (scale_X) %*% p_CG;\n        ssX_p_CG [size_CG, ] = ssX_p_CG [size_CG, ] + t(shift_X) %*% p_CG;\n        temp_CG = t(X) %*% (w * (X %*% ssX_p_CG));\n        q_CG = (lambda * p_CG) + diag (scale_X) %*% temp_CG + shift_X %*% temp_CG [size_CG, ];\n        pq_CG = sum (p_CG * q_CG);\n        if (pq_CG <= 0) {\n          pp_CG = sum (p_CG * p_CG);  \n          if (pp_CG > 0) {\n            [z, neg_log_l_change] = \n              get_trust_boundary_point (g_reg, z, p_CG, q_CG, r_CG, pp_CG, pq_CG, trust_delta_sq);\n            reached_trust_boundary = 1;\n          } else {\n            neg_log_l_change = 0.5 * sum (z * (r_CG + g_reg));\n          }\n          converged_CG = 1;\n        }\n        if (converged_CG == 0) {\n          alpha_CG = rr_CG / pq_CG;\n          new_z = z + alpha_CG * p_CG;\n          if (sum(new_z * new_z) >= trust_delta_sq) {\n            pp_CG = sum (p_CG * p_CG);  \n            [z, neg_log_l_change] = \n              get_trust_boundary_point (g_reg, z, p_CG, q_CG, r_CG, pp_CG, pq_CG, trust_delta_sq);\n            reached_trust_boundary = 1;\n            converged_CG = 1;\n          }\n          if (converged_CG == 0) {\n            z = new_z;\n            old_rr_CG = rr_CG;\n            r_CG = r_CG + alpha_CG * q_CG;\n            rr_CG = sum(r_CG * r_CG);\n            if (i_CG == max_iteration_CG | rr_CG < eps_CG) {\n              neg_log_l_change = 0.5 * sum (z * (r_CG + g_reg));\n              reached_trust_boundary = 0;\n              converged_CG = 1;\n            }\n            if (converged_CG == 0) {\n              p_CG = -r_CG + (rr_CG / old_rr_CG) * p_CG;\n            }   }   }   }   }\n";
        return string;
    }

    public Matrix straightenX(Object object, Object object2, Object object3) {
        String string = "source('scripts/algorithms/StepGLM.dml') as mlcontextns;w = mlcontextns::straightenX(X, eps, max_iter_CG);";
        Script script = new Script(string);
        script.in("X", object).in("eps", object2).in("max_iter_CG", object3).out("w");
        MLResults mLResults = script.execute();
        Matrix matrix = mLResults.getMatrix("w");
        return matrix;
    }

    public String straightenX__docs() {
        String string = "straightenX =\n  function (Matrix[double] X, double eps, int max_iter_CG)\n    return   (Matrix[double] w)\n{\n      w_X = t(colSums(X));\n      lambda_LS = 0.000001 * sum(X ^ 2) / ncol(X);\n      eps_LS = eps * nrow(X);\n      \n      # BEGIN LEAST SQUARES\n      \n      r_LS = - w_X;\n      z_LS = matrix (0.0, rows = ncol(X), cols = 1);\n      p_LS = - r_LS;\n      norm_r2_LS = sum (r_LS ^ 2);\n      i_LS = 0;\n      while (i_LS < max_iter_CG & i_LS < ncol(X) & norm_r2_LS >= eps_LS)\n      {\n        q_LS = t(X) %*% X %*% p_LS;\n        q_LS = q_LS + lambda_LS * p_LS;\n        alpha_LS = norm_r2_LS / sum (p_LS * q_LS);\n        z_LS = z_LS + alpha_LS * p_LS;\n        old_norm_r2_LS = norm_r2_LS;\n        r_LS = r_LS + alpha_LS * q_LS;\n        norm_r2_LS = sum (r_LS ^ 2);\n        p_LS = -r_LS + (norm_r2_LS / old_norm_r2_LS) * p_LS;\n        i_LS = i_LS + 1;\n      }\n      \n      # END LEAST SQUARES\n      \n      w = (nrow(X) / sum (w_X * z_LS)) * z_LS;\n    }\n";
        return string;
    }

    public String straightenX__source() {
        String string = "straightenX =\n  function (Matrix[double] X, double eps, int max_iter_CG)\n    return   (Matrix[double] w)\n{\n      w_X = t(colSums(X));\n      lambda_LS = 0.000001 * sum(X ^ 2) / ncol(X);\n      eps_LS = eps * nrow(X);\n      \n      # BEGIN LEAST SQUARES\n      \n      r_LS = - w_X;\n      z_LS = matrix (0.0, rows = ncol(X), cols = 1);\n      p_LS = - r_LS;\n      norm_r2_LS = sum (r_LS ^ 2);\n      i_LS = 0;\n      while (i_LS < max_iter_CG & i_LS < ncol(X) & norm_r2_LS >= eps_LS)\n      {\n        q_LS = t(X) %*% X %*% p_LS;\n        q_LS = q_LS + lambda_LS * p_LS;\n        alpha_LS = norm_r2_LS / sum (p_LS * q_LS);\n        z_LS = z_LS + alpha_LS * p_LS;\n        old_norm_r2_LS = norm_r2_LS;\n        r_LS = r_LS + alpha_LS * q_LS;\n        norm_r2_LS = sum (r_LS ^ 2);\n        p_LS = -r_LS + (norm_r2_LS / old_norm_r2_LS) * p_LS;\n        i_LS = i_LS + 1;\n      }\n      \n      # END LEAST SQUARES\n      \n      w = (nrow(X) / sum (w_X * z_LS)) * z_LS;\n    }\n";
        return string;
    }

    public Binomial_probability_two_column_output binomial_probability_two_column(Object object, Object object2, Object object3) {
        String string = "source('scripts/algorithms/StepGLM.dml') as mlcontextns;[Y_prob, isNaN] = mlcontextns::binomial_probability_two_column(linear_terms, link_type, link_power);";
        Script script = new Script(string);
        script.in("linear_terms", object).in("link_type", object2).in("link_power", object3).out("Y_prob").out("isNaN");
        MLResults mLResults = script.execute();
        Matrix matrix = mLResults.getMatrix("Y_prob");
        long l = mLResults.getLong("isNaN");
        Binomial_probability_two_column_output binomial_probability_two_column_output = new Binomial_probability_two_column_output(matrix, l);
        return binomial_probability_two_column_output;
    }

    public String binomial_probability_two_column__docs() {
        String string = "binomial_probability_two_column =\n  function (Matrix[double] linear_terms, int link_type, double link_power)\n    return   (Matrix[double] Y_prob, int isNaN)\n{\n      isNaN = 0;\n      num_records = nrow (linear_terms);\n      \n      # Define some auxiliary matrices\n      \n      ones_2 = matrix (1.0, rows = 1, cols = 2);\n      p_one_m_one = ones_2;\n      p_one_m_one [1, 2] = -1.0;\n      m_one_p_one = ones_2;\n      m_one_p_one [1, 1] = -1.0;\n      zero_one = ones_2;\n      zero_one [1, 1] = 0.0;\n      one_zero = ones_2;\n      one_zero [1, 2] = 0.0;\n      \n      zeros_r = matrix (0.0, rows = num_records, cols = 1);\n      ones_r = 1.0 + zeros_r;\n      \n      # Begin the function body\n      \n      Y_prob = zeros_r %*% ones_2;\n      if (link_type == 1) { # Binomial.power\n        if          (link_power == 0) { # Binomial.log\n          Y_prob = exp (linear_terms) %*% p_one_m_one + ones_r %*% zero_one;    \n        } else { if (link_power == 0.5) { # Binomial.sqrt\n          Y_prob = (linear_terms ^ 2) %*% p_one_m_one + ones_r %*% zero_one;    \n        } else {                          # Binomial.power_nonlog\n          if (sum (linear_terms < 0) == 0) {\n            Y_prob = (linear_terms ^ (1.0 / link_power)) %*% p_one_m_one + ones_r %*% zero_one;    \n          } else {isNaN = 1;}\n        }}\n      } else {              # Binomial.non_power\n        is_LT_pos_infinite = (linear_terms == Inf);\n        is_LT_neg_infinite = (linear_terms == -Inf);\n        is_LT_infinite = is_LT_pos_infinite %*% one_zero + is_LT_neg_infinite %*% zero_one;\n        finite_linear_terms = replace (target =        linear_terms, pattern =  Inf, replacement = 0);\n        finite_linear_terms = replace (target = finite_linear_terms, pattern = -Inf, replacement = 0);\n        if (link_type == 2)             { # Binomial.logit\n          Y_prob = exp (finite_linear_terms) %*% one_zero + ones_r %*% zero_one;\n          Y_prob = Y_prob / (rowSums (Y_prob) %*% ones_2);\n        } else { if (link_type == 3)    { # Binomial.probit\n          lt_pos_neg = (finite_linear_terms >= 0) %*% p_one_m_one + ones_r %*% zero_one;\n          t_gp = 1.0 / (1.0 + abs (finite_linear_terms) * 0.231641888);  # 0.231641888 = 0.3275911 / sqrt (2.0)\n          pt_gp = t_gp * ( 0.254829592 \n                           + t_gp * (-0.284496736 # \"Handbook of Mathematical Functions\", ed. by M. Abramowitz and I.A. Stegun,\n                                     + t_gp * ( 1.421413741 # U.S. Nat-l Bureau of Standards, 10th print (Dec 1972), Sec. 7.1.26, p. 299\n                                                + t_gp * (-1.453152027 \n                                                          + t_gp *   1.061405429))));\n          the_gauss_exp = exp (- (finite_linear_terms ^ 2) / 2.0);\n          Y_prob = lt_pos_neg + ((the_gauss_exp * pt_gp) %*% ones_2) * (0.5 - lt_pos_neg);\n        } else { if (link_type == 4)    { # Binomial.cloglog\n          the_exp = exp (finite_linear_terms);\n          the_exp_exp = exp (- the_exp);\n          is_too_small = ((10000000 + the_exp) == 10000000);\n          Y_prob [, 1] = (1 - is_too_small) * (1 - the_exp_exp) + is_too_small * the_exp * (1 - the_exp / 2);\n          Y_prob [, 2] = the_exp_exp;\n        } else { if (link_type == 5)    { # Binomial.cauchit\n          Y_prob = 0.5 + (atan (finite_linear_terms) %*% p_one_m_one) / pi;\n        } else {\n          isNaN = 1;\n        }}}}\n        Y_prob = Y_prob * ((1.0 - rowSums (is_LT_infinite)) %*% ones_2) + is_LT_infinite;\n      }   }            \n";
        return string;
    }

    public String binomial_probability_two_column__source() {
        String string = "binomial_probability_two_column =\n  function (Matrix[double] linear_terms, int link_type, double link_power)\n    return   (Matrix[double] Y_prob, int isNaN)\n{\n      isNaN = 0;\n      num_records = nrow (linear_terms);\n      \n      # Define some auxiliary matrices\n      \n      ones_2 = matrix (1.0, rows = 1, cols = 2);\n      p_one_m_one = ones_2;\n      p_one_m_one [1, 2] = -1.0;\n      m_one_p_one = ones_2;\n      m_one_p_one [1, 1] = -1.0;\n      zero_one = ones_2;\n      zero_one [1, 1] = 0.0;\n      one_zero = ones_2;\n      one_zero [1, 2] = 0.0;\n      \n      zeros_r = matrix (0.0, rows = num_records, cols = 1);\n      ones_r = 1.0 + zeros_r;\n      \n      # Begin the function body\n      \n      Y_prob = zeros_r %*% ones_2;\n      if (link_type == 1) { # Binomial.power\n        if          (link_power == 0) { # Binomial.log\n          Y_prob = exp (linear_terms) %*% p_one_m_one + ones_r %*% zero_one;    \n        } else { if (link_power == 0.5) { # Binomial.sqrt\n          Y_prob = (linear_terms ^ 2) %*% p_one_m_one + ones_r %*% zero_one;    \n        } else {                          # Binomial.power_nonlog\n          if (sum (linear_terms < 0) == 0) {\n            Y_prob = (linear_terms ^ (1.0 / link_power)) %*% p_one_m_one + ones_r %*% zero_one;    \n          } else {isNaN = 1;}\n        }}\n      } else {              # Binomial.non_power\n        is_LT_pos_infinite = (linear_terms == Inf);\n        is_LT_neg_infinite = (linear_terms == -Inf);\n        is_LT_infinite = is_LT_pos_infinite %*% one_zero + is_LT_neg_infinite %*% zero_one;\n        finite_linear_terms = replace (target =        linear_terms, pattern =  Inf, replacement = 0);\n        finite_linear_terms = replace (target = finite_linear_terms, pattern = -Inf, replacement = 0);\n        if (link_type == 2)             { # Binomial.logit\n          Y_prob = exp (finite_linear_terms) %*% one_zero + ones_r %*% zero_one;\n          Y_prob = Y_prob / (rowSums (Y_prob) %*% ones_2);\n        } else { if (link_type == 3)    { # Binomial.probit\n          lt_pos_neg = (finite_linear_terms >= 0) %*% p_one_m_one + ones_r %*% zero_one;\n          t_gp = 1.0 / (1.0 + abs (finite_linear_terms) * 0.231641888);  # 0.231641888 = 0.3275911 / sqrt (2.0)\n          pt_gp = t_gp * ( 0.254829592 \n                           + t_gp * (-0.284496736 # \"Handbook of Mathematical Functions\", ed. by M. Abramowitz and I.A. Stegun,\n                                     + t_gp * ( 1.421413741 # U.S. Nat-l Bureau of Standards, 10th print (Dec 1972), Sec. 7.1.26, p. 299\n                                                + t_gp * (-1.453152027 \n                                                          + t_gp *   1.061405429))));\n          the_gauss_exp = exp (- (finite_linear_terms ^ 2) / 2.0);\n          Y_prob = lt_pos_neg + ((the_gauss_exp * pt_gp) %*% ones_2) * (0.5 - lt_pos_neg);\n        } else { if (link_type == 4)    { # Binomial.cloglog\n          the_exp = exp (finite_linear_terms);\n          the_exp_exp = exp (- the_exp);\n          is_too_small = ((10000000 + the_exp) == 10000000);\n          Y_prob [, 1] = (1 - is_too_small) * (1 - the_exp_exp) + is_too_small * the_exp * (1 - the_exp / 2);\n          Y_prob [, 2] = the_exp_exp;\n        } else { if (link_type == 5)    { # Binomial.cauchit\n          Y_prob = 0.5 + (atan (finite_linear_terms) %*% p_one_m_one) / pi;\n        } else {\n          isNaN = 1;\n        }}}}\n        Y_prob = Y_prob * ((1.0 - rowSums (is_LT_infinite)) %*% ones_2) + is_LT_infinite;\n      }   }            \n";
        return string;
    }

    public Glm_initialize_output glm_initialize(Object object, Object object2, Object object3, Object object4, Object object5, Object object6, Object object7, Object object8) {
        String string = "source('scripts/algorithms/StepGLM.dml') as mlcontextns;[beta, saturated_log_l, isNaN] = mlcontextns::glm_initialize(X, Y, dist_type, var_power, link_type, link_power, icept_status, max_iter_CG);";
        Script script = new Script(string);
        script.in("X", object).in("Y", object2).in("dist_type", object3).in("var_power", object4).in("link_type", object5).in("link_power", object6).in("icept_status", object7).in("max_iter_CG", object8).out("beta").out("saturated_log_l").out("isNaN");
        MLResults mLResults = script.execute();
        Matrix matrix = mLResults.getMatrix("beta");
        double d = mLResults.getDouble("saturated_log_l");
        long l = mLResults.getLong("isNaN");
        Glm_initialize_output glm_initialize_output = new Glm_initialize_output(matrix, d, l);
        return glm_initialize_output;
    }

    public String glm_initialize__docs() {
        String string = "glm_initialize = function (Matrix[double] X, Matrix[double] Y, int dist_type, double var_power, int link_type, double link_power, int icept_status, int max_iter_CG)\n  return (Matrix[double] beta, double saturated_log_l, int isNaN)\n{\n    saturated_log_l = 0.0;\n    isNaN = 0;\n    y_corr = Y [, 1];\n    if (dist_type == 2) {\n      n_corr = rowSums (Y);\n      is_n_zero = (n_corr == 0);\n      y_corr = Y [, 1] / (n_corr + is_n_zero) + (0.5 - Y [, 1]) * is_n_zero;    \n    }\n    linear_terms = y_corr;\n    if (dist_type == 1 & link_type == 1) { # POWER DISTRIBUTION\n      if          (link_power ==  0) {\n        if (sum (y_corr < 0) == 0) {\n          is_zero_y_corr = (y_corr == 0);\n          linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);\n        } else { isNaN = 1; }\n      } else { if (link_power ==  1.0) {\n        linear_terms = y_corr;\n      } else { if (link_power == -1.0) {\n        linear_terms = 1.0 / y_corr;\n      } else { if (link_power ==  0.5) {\n        if (sum (y_corr < 0) == 0) {\n          linear_terms = sqrt (y_corr);\n        } else { isNaN = 1; }\n      } else { if (link_power >   0) {\n        if (sum (y_corr < 0) == 0) {\n          is_zero_y_corr = (y_corr == 0);\n          linear_terms = (y_corr + is_zero_y_corr) ^ link_power - is_zero_y_corr;\n        } else { isNaN = 1; }\n      } else {\n        if (sum (y_corr <= 0) == 0) {\n          linear_terms = y_corr ^ link_power;\n        } else { isNaN = 1; }\n      }}}}}\n    }\n    if (dist_type == 2 & link_type >= 1 & link_type <= 5)\n    { # BINOMIAL/BERNOULLI DISTRIBUTION\n      if          (link_type == 1 & link_power == 0)  { # Binomial.log\n        if (sum (y_corr < 0) == 0) {\n          is_zero_y_corr = (y_corr == 0);\n          linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);\n        } else { isNaN = 1; }\n      } else { if (link_type == 1 & link_power >  0)  { # Binomial.power_nonlog pos\n        if (sum (y_corr < 0) == 0) {\n          is_zero_y_corr = (y_corr == 0);\n          linear_terms = (y_corr + is_zero_y_corr) ^ link_power - is_zero_y_corr;\n        } else { isNaN = 1; }\n      } else { if (link_type == 1)                      { # Binomial.power_nonlog neg\n        if (sum (y_corr <= 0) == 0) {\n          linear_terms = y_corr ^ link_power;\n        } else { isNaN = 1; }\n      } else { \n        is_zero_y_corr = (y_corr <= 0);\n        is_one_y_corr  = (y_corr >= 1.0);\n        y_corr = y_corr * (1.0 - is_zero_y_corr) * (1.0 - is_one_y_corr) + 0.5 * (is_zero_y_corr + is_one_y_corr);\n        if (link_type == 2)                           { # Binomial.logit\n          linear_terms = log (y_corr / (1.0 - y_corr)) \n          + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);\n        } else { if (link_type == 3)                  { # Binomial.probit\n          y_below_half = y_corr + (1.0 - 2.0 * y_corr) * (y_corr > 0.5);\n          t = sqrt (- 2.0 * log (y_below_half));\n          approx_inv_Gauss_CDF = - t + (2.515517 + t * (0.802853 + t * 0.010328)) / (1.0 + t * (1.432788 + t * (0.189269 + t * 0.001308)));\n          linear_terms = approx_inv_Gauss_CDF * (1.0 - 2.0 * (y_corr > 0.5))\n          + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);\n        } else { if (link_type == 4)                  { # Binomial.cloglog\n          linear_terms = log (- log (1.0 - y_corr))\n          - log (- log (0.5)) * (is_zero_y_corr + is_one_y_corr)\n          + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);\n        } else { if (link_type == 5)                  { # Binomial.cauchit\n          linear_terms = tan ((y_corr - 0.5) * pi)\n          + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);\n        }}  }}}}}\n    }\n    \n    if (isNaN == 0) {\n      [saturated_log_l, isNaN] = \n        glm_log_likelihood_part (linear_terms, Y, dist_type, var_power, link_type, link_power);\n    }\n    \n    if ((dist_type == 1 & link_type == 1 & link_power == 0) |\n          (dist_type == 2 & link_type >= 2))\n    {    \n      desired_eta = 0.0;\n    } else { if (link_type == 1 & link_power == 0) {\n      desired_eta = log (0.5);\n    } else { if (link_type == 1) {\n      desired_eta = 0.5 ^ link_power;\n    } else {\n      desired_eta = 0.5;\n    }}}\n    \n    beta = matrix (0.0, rows = ncol(X), cols = 1);\n    \n    if (desired_eta != 0) {\n      if (icept_status == 1 | icept_status == 2) {\n        beta [nrow(beta), 1] = desired_eta;\n      } else {\n        # We want: avg (X %*% ssX_transform %*% beta) = desired_eta\n        # Note that \"ssX_transform\" is trivial here, hence ignored\n        \n        beta = straightenX (X, 0.000001, max_iter_CG);  \n        beta = beta * desired_eta;\n      }   }   }\n";
        return string;
    }

    public String glm_initialize__source() {
        String string = "glm_initialize = function (Matrix[double] X, Matrix[double] Y, int dist_type, double var_power, int link_type, double link_power, int icept_status, int max_iter_CG)\n  return (Matrix[double] beta, double saturated_log_l, int isNaN)\n{\n    saturated_log_l = 0.0;\n    isNaN = 0;\n    y_corr = Y [, 1];\n    if (dist_type == 2) {\n      n_corr = rowSums (Y);\n      is_n_zero = (n_corr == 0);\n      y_corr = Y [, 1] / (n_corr + is_n_zero) + (0.5 - Y [, 1]) * is_n_zero;    \n    }\n    linear_terms = y_corr;\n    if (dist_type == 1 & link_type == 1) { # POWER DISTRIBUTION\n      if          (link_power ==  0) {\n        if (sum (y_corr < 0) == 0) {\n          is_zero_y_corr = (y_corr == 0);\n          linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);\n        } else { isNaN = 1; }\n      } else { if (link_power ==  1.0) {\n        linear_terms = y_corr;\n      } else { if (link_power == -1.0) {\n        linear_terms = 1.0 / y_corr;\n      } else { if (link_power ==  0.5) {\n        if (sum (y_corr < 0) == 0) {\n          linear_terms = sqrt (y_corr);\n        } else { isNaN = 1; }\n      } else { if (link_power >   0) {\n        if (sum (y_corr < 0) == 0) {\n          is_zero_y_corr = (y_corr == 0);\n          linear_terms = (y_corr + is_zero_y_corr) ^ link_power - is_zero_y_corr;\n        } else { isNaN = 1; }\n      } else {\n        if (sum (y_corr <= 0) == 0) {\n          linear_terms = y_corr ^ link_power;\n        } else { isNaN = 1; }\n      }}}}}\n    }\n    if (dist_type == 2 & link_type >= 1 & link_type <= 5)\n    { # BINOMIAL/BERNOULLI DISTRIBUTION\n      if          (link_type == 1 & link_power == 0)  { # Binomial.log\n        if (sum (y_corr < 0) == 0) {\n          is_zero_y_corr = (y_corr == 0);\n          linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);\n        } else { isNaN = 1; }\n      } else { if (link_type == 1 & link_power >  0)  { # Binomial.power_nonlog pos\n        if (sum (y_corr < 0) == 0) {\n          is_zero_y_corr = (y_corr == 0);\n          linear_terms = (y_corr + is_zero_y_corr) ^ link_power - is_zero_y_corr;\n        } else { isNaN = 1; }\n      } else { if (link_type == 1)                      { # Binomial.power_nonlog neg\n        if (sum (y_corr <= 0) == 0) {\n          linear_terms = y_corr ^ link_power;\n        } else { isNaN = 1; }\n      } else { \n        is_zero_y_corr = (y_corr <= 0);\n        is_one_y_corr  = (y_corr >= 1.0);\n        y_corr = y_corr * (1.0 - is_zero_y_corr) * (1.0 - is_one_y_corr) + 0.5 * (is_zero_y_corr + is_one_y_corr);\n        if (link_type == 2)                           { # Binomial.logit\n          linear_terms = log (y_corr / (1.0 - y_corr)) \n          + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);\n        } else { if (link_type == 3)                  { # Binomial.probit\n          y_below_half = y_corr + (1.0 - 2.0 * y_corr) * (y_corr > 0.5);\n          t = sqrt (- 2.0 * log (y_below_half));\n          approx_inv_Gauss_CDF = - t + (2.515517 + t * (0.802853 + t * 0.010328)) / (1.0 + t * (1.432788 + t * (0.189269 + t * 0.001308)));\n          linear_terms = approx_inv_Gauss_CDF * (1.0 - 2.0 * (y_corr > 0.5))\n          + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);\n        } else { if (link_type == 4)                  { # Binomial.cloglog\n          linear_terms = log (- log (1.0 - y_corr))\n          - log (- log (0.5)) * (is_zero_y_corr + is_one_y_corr)\n          + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);\n        } else { if (link_type == 5)                  { # Binomial.cauchit\n          linear_terms = tan ((y_corr - 0.5) * pi)\n          + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - is_zero_y_corr);\n        }}  }}}}}\n    }\n    \n    if (isNaN == 0) {\n      [saturated_log_l, isNaN] = \n        glm_log_likelihood_part (linear_terms, Y, dist_type, var_power, link_type, link_power);\n    }\n    \n    if ((dist_type == 1 & link_type == 1 & link_power == 0) |\n          (dist_type == 2 & link_type >= 2))\n    {    \n      desired_eta = 0.0;\n    } else { if (link_type == 1 & link_power == 0) {\n      desired_eta = log (0.5);\n    } else { if (link_type == 1) {\n      desired_eta = 0.5 ^ link_power;\n    } else {\n      desired_eta = 0.5;\n    }}}\n    \n    beta = matrix (0.0, rows = ncol(X), cols = 1);\n    \n    if (desired_eta != 0) {\n      if (icept_status == 1 | icept_status == 2) {\n        beta [nrow(beta), 1] = desired_eta;\n      } else {\n        # We want: avg (X %*% ssX_transform %*% beta) = desired_eta\n        # Note that \"ssX_transform\" is trivial here, hence ignored\n        \n        beta = straightenX (X, 0.000001, max_iter_CG);  \n        beta = beta * desired_eta;\n      }   }   }\n";
        return string;
    }

    public double glm(Object object, Object object2, Object object3, Object object4, Object object5, Object object6) {
        String string = "source('scripts/algorithms/StepGLM.dml') as mlcontextns;AIC = mlcontextns::glm(X, Y, intercept_status, num_features_orig, Selected, fileB);";
        Script script = new Script(string);
        script.in("X", object).in("Y", object2).in("intercept_status", object3).in("num_features_orig", object4).in("Selected", object5).in("fileB", object6).out("AIC");
        MLResults mLResults = script.execute();
        double d = mLResults.getDouble("AIC");
        return d;
    }

    public String glm__docs() {
        String string = "glm = function (Matrix[Double] X, Matrix[Double] Y, Int intercept_status, Double num_features_orig, Matrix[Double] Selected, String fileB) return (Double AIC) {\n\t\t\n\t# distribution family code: 1 = Power, 2 = Bernoulli/Binomial; currently only Bernouli distribution family is supported!\t\t\n\tdistribution_type = 2;                \t\t# $dfam = 2;\n\tvariance_as_power_of_the_mean = 0.0;  \t\t# $vpow = 0.0;\n\t# link function code: 0 = canonical (depends on distribution), 1 = Power, 2 = Logit, 3 = Probit, 4 = Cloglog, 5 = Cauchit;\n\t# currently only log (link = 1), logit (link = 2), probit (link = 3), and cloglog (link = 4) are supported!\n\tlink_type = ifdef ($link, 2);         \t\t# $link = 2;\n\tlink_as_power_of_the_mean = 0.0;      \t\t# $lpow = 0.0;\n\n\tdispersion = ifdef ($disp, 0.0);            # $disp = 0.0;\n\teps = ifdef ($tol, 0.000001);               # $tol  = 0.000001;\n\tmax_iteration_IRLS = ifdef ($moi, 200);     # $moi  = 200;\n\tmax_iteration_CG = ifdef ($mii, 0);         # $mii  = 0;\n\n\tvariance_as_power_of_the_mean = as.double (variance_as_power_of_the_mean);\n\tlink_as_power_of_the_mean = as.double (link_as_power_of_the_mean);\n\n\tdispersion = as.double (dispersion);\n\teps = as.double (eps);              \n\n\t# Default values for output statistics:\n\tregularization = 0.0;\n\ttermination_code     = 0.0;\n\tmin_beta             = NaN;\n\ti_min_beta           = NaN;\n\tmax_beta             = NaN;\n\ti_max_beta           = NaN;\n\tintercept_value      = NaN;\n\tdispersion           = NaN;\n\testimated_dispersion = NaN;\n\tdeviance_nodisp      = NaN;\n\tdeviance             = NaN;\n                  \n\t#####   INITIALIZE THE PARAMETERS   #####\n                  \n    num_records  = nrow (X);\n    num_features = ncol (X);\n    zeros_r = matrix (0, rows = num_records, cols = 1);\n    ones_r = 1 + zeros_r;\n                  \n    # Introduce the intercept, shift and rescale the columns of X if needed\n                  \n    if (intercept_status == 1 | intercept_status == 2) { # add the intercept column\n\t\tX = cbind (X, ones_r);\n        num_features = ncol (X);\n\t}\n                  \n    scale_lambda = matrix (1, rows = num_features, cols = 1);\n    if (intercept_status == 1 | intercept_status == 2) {\n\t\tscale_lambda [num_features, 1] = 0;\n    }\n                  \n    if (intercept_status == 2) {  # scale-&-shift X columns to mean 0, variance 1\n\t\t# Important assumption: X [, num_features] = ones_r\n        avg_X_cols = t(colSums(X)) / num_records;\n        var_X_cols = (t(colSums (X ^ 2)) - num_records * (avg_X_cols ^ 2)) / (num_records - 1);\n        is_unsafe = (var_X_cols <= 0);\n        scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);\n        scale_X [num_features, 1] = 1;\n        shift_X = - avg_X_cols * scale_X;\n        shift_X [num_features, 1] = 0;\n        rowSums_X_sq = (X ^ 2) %*% (scale_X ^ 2) + X %*% (2 * scale_X * shift_X) + sum (shift_X ^ 2);\n\t} else {\n        scale_X = matrix (1, rows = num_features, cols = 1);\n        shift_X = matrix (0, rows = num_features, cols = 1);\n        rowSums_X_sq = rowSums (X ^ 2);\n    }\t\n                  \n    # Henceforth we replace \"X\" with \"X %*% (SHIFT/SCALE TRANSFORM)\" and rowSums(X ^ 2)\n    # with \"rowSums_X_sq\" in order to preserve the sparsity of X under shift and scale.\n    # The transform is then associatively applied to the other side of the expression,\n    # and is rewritten via \"scale_X\" and \"shift_X\" as follows:\n    #\n    # ssX_A  = (SHIFT/SCALE TRANSFORM) %*% A    --- is rewritten as:\n    # ssX_A  = diag (scale_X) %*% A;\n    # ssX_A [num_features, ] = ssX_A [num_features, ] + t(shift_X) %*% A;\n    #\n    # tssX_A = t(SHIFT/SCALE TRANSFORM) %*% A   --- is rewritten as:\n    # tssX_A = diag (scale_X) %*% A + shift_X %*% A [num_features, ];\n                  \n    # Initialize other input-dependent parameters\n                  \n    lambda = scale_lambda * regularization;\n    if (max_iteration_CG == 0) {\n\t\tmax_iteration_CG = num_features;\n    }\n                  \n    # Set up the canonical link, if requested [Then we have: Var(mu) * (d link / d mu) = const]\n                  \n    if (link_type == 0) {\n\t\tif (distribution_type == 1) {\n\t\t\tlink_type = 1;\n            link_as_power_of_the_mean = 1.0 - variance_as_power_of_the_mean;\n\t\t} else { \n\t\t\tif (distribution_type == 2) {\n\t\t\t\tlink_type = 2;\n\t\t\t}   \n\t\t}   \n\t}\n                                \n    # For power distributions and/or links, we use two constants,\n    # \"variance as power of the mean\" and \"link_as_power_of_the_mean\",\n    # to specify the variance and the link as arbitrary powers of the\n    # mean.  However, the variance-powers of 1.0 (Poisson family) and\n    # 2.0 (Gamma family) have to be treated as special cases, because\n    # these values integrate into logarithms.  The link-power of 0.0\n    # is also special as it represents the logarithm link.\n                  \n    num_response_columns = ncol (Y);\n    is_supported = 0;              \n\tif (num_response_columns == 2 & distribution_type == 2 & link_type >= 1 & link_type <= 4) { # BERNOULLI DISTRIBUTION\n\t\tis_supported = 1;\t\t\t  \n\t}\n\tif (num_response_columns == 1 & distribution_type == 2) {\n\t\tprint (\"Error: Bernoulli response matrix has not been converted into two-column format.\");\n    }\n\n\tif (is_supported == 1) {\n                    \n\t\t#####   INITIALIZE THE BETAS   #####\n                    \n        [beta, saturated_log_l, isNaN] = \n\t\t\tglm_initialize (X, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean, intercept_status, max_iteration_CG);\n\t\t\t\t\t  \n\t\t# print(\" --- saturated logLik \" + saturated_log_l);\n\t\t\t\t\t  \n        if (isNaN == 0) {\n                      \n\t\t\t#####  START OF THE MAIN PART  #####\n                      \n            sum_X_sq = sum (rowSums_X_sq);\n            trust_delta = 0.5 * sqrt (num_features) / max (sqrt (rowSums_X_sq));\n            ###  max_trust_delta = trust_delta * 10000.0;\n            log_l = 0.0;\n            deviance_nodisp = 0.0;\n            new_deviance_nodisp = 0.0;\n            isNaN_log_l = 2;\n            newbeta = beta;\n            g = matrix (0.0, rows = num_features, cols = 1);\n            g_norm = sqrt (sum ((g + lambda * beta) ^ 2));\n            accept_new_beta = 1;\n            reached_trust_boundary = 0;\n            neg_log_l_change_predicted = 0.0;\n            i_IRLS = 0;\n                      \n            # print (\"BEGIN IRLS ITERATIONS...\");\n                      \n            ssX_newbeta = diag (scale_X) %*% newbeta;\n            ssX_newbeta [num_features, ] = ssX_newbeta [num_features, ] + t(shift_X) %*% newbeta;\n            all_linear_terms = X %*% ssX_newbeta;\n                      \n            [new_log_l, isNaN_new_log_l] = glm_log_likelihood_part\n\t\t\t\t(all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);\n\t\t\t\t\t  \n            if (isNaN_new_log_l == 0) {\n\t\t\t\tnew_deviance_nodisp = 2.0 * (saturated_log_l - new_log_l);\n                new_log_l = new_log_l - 0.5 * sum (lambda * newbeta ^ 2);\n            }\n                      \n            while (termination_code == 0) {\n\t\t\t\taccept_new_beta = 1;\n                        \n                if (i_IRLS > 0) {\n\t\t\t\t\tif (isNaN_log_l == 0) {\n\t\t\t\t\t\taccept_new_beta = 0;\n                    }\n                          \n                    # Decide whether to accept a new iteration point and update the trust region\n                    # See Alg. 4.1 on p. 69 of \"Numerical Optimization\" 2nd ed. by Nocedal and Wright\n                          \n                    rho = (- new_log_l + log_l) / neg_log_l_change_predicted;\n\t\t\t\t\tif (rho < 0.25 | isNaN_new_log_l == 1) {\n\t\t\t\t\t\ttrust_delta = 0.25 * trust_delta;\n\t\t\t\t\t}\n\t\t\t\t\tif (rho > 0.75 & isNaN_new_log_l == 0 & reached_trust_boundary == 1) {\n\t\t\t\t\t\ttrust_delta = 2 * trust_delta;\n                            \n\t\t\t\t\t\t### if (trust_delta > max_trust_delta) {\n\t\t\t\t\t\t###     trust_delta = max_trust_delta;\n\t\t\t\t\t\t### }\n\t\t\t\t\t}\n\t\t\t\t\tif (rho > 0.1 & isNaN_new_log_l == 0) {\n\t\t\t\t\t\taccept_new_beta = 1;\n\t\t\t\t\t}\n\t\t\t\t}\n                        \n                if (accept_new_beta == 1) {\n\t\t\t\t\tbeta = newbeta;  log_l = new_log_l;  deviance_nodisp = new_deviance_nodisp;  isNaN_log_l = isNaN_new_log_l;\n                          \n\t\t\t\t\t[g_Y, w] = glm_dist (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);\n                          \n\t\t\t\t\t# We introduced these variables to avoid roundoff errors:\n\t\t\t\t\t#     g_Y = y_residual / (y_var * link_grad);\n\t\t\t\t\t#     w   = 1.0 / (y_var * link_grad * link_grad);\n                          \n\t\t\t\t\tgXY = - t(X) %*% g_Y;\n\t\t\t\t\tg = diag (scale_X) %*% gXY + shift_X %*% gXY [num_features, ];\n\t\t\t\t\tg_norm = sqrt (sum ((g + lambda * beta) ^ 2));\n\t\t\t\t}\n                        \n                [z, neg_log_l_change_predicted, num_CG_iters, reached_trust_boundary] = \n\t\t\t\t\tget_CG_Steihaug_point (X, scale_X, shift_X, w, g, beta, lambda, trust_delta, max_iteration_CG);\n                        \n\t\t\t\tnewbeta = beta + z;\n                        \n\t\t\t\tssX_newbeta = diag (scale_X) %*% newbeta;\n\t\t\t\tssX_newbeta [num_features, ] = ssX_newbeta [num_features, ] + t(shift_X) %*% newbeta;\n\t\t\t\tall_linear_terms = X %*% ssX_newbeta;\n                        \n\t\t\t\t[new_log_l, isNaN_new_log_l] = glm_log_likelihood_part\n\t\t\t\t\t(all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);\n                        \n\t\t\t\tif (isNaN_new_log_l == 0) {\n\t\t\t\t\tnew_deviance_nodisp = 2.0 * (saturated_log_l - new_log_l);\n\t\t\t\t\tnew_log_l = new_log_l - 0.5 * sum (lambda * newbeta ^ 2);\n\t\t\t\t}\n                        \n\t\t\t\tlog_l_change = new_log_l - log_l;               # R's criterion for termination: |dev - devold|/(|dev| + 0.1) < eps\n                       \n\t\t\t\tif (reached_trust_boundary == 0 & isNaN_new_log_l == 0 & \n\t\t\t\t   (2.0 * abs (log_l_change) < eps * (deviance_nodisp + 0.1) | abs (log_l_change) < (abs (log_l) + abs (new_log_l)) * 0.00000000000001) ) {\n\t\t\t\t\ttermination_code = 1;\n\t\t\t\t}\n\t\t\t\trho = - log_l_change / neg_log_l_change_predicted;\n\t\t\t\tz_norm = sqrt (sum (z * z));\n                                              \n\t\t\t\ti_IRLS = i_IRLS + 1;                        \n                        \n\t\t\t\tif (i_IRLS == max_iteration_IRLS) {\n\t\t\t\t\ttermination_code = 2;\n\t\t\t\t}\n\t\t\t}\n                      \n            beta = newbeta;\n            log_l = new_log_l;\n            deviance_nodisp = new_deviance_nodisp;\n                     \n            #---------------------------- last part\n\n\t\t\tif (termination_code != 1) {\n\t\t\t\tprint (\"One of the runs of GLM did not converged in \" + i_IRLS + \" steps!\");\n\t\t\t}\n                      \n            ##### COMPUTE AIC ##### \n            \n\t\t\tif (distribution_type == 2 & link_type >= 1 & link_type <= 4) {\t\t\t\n\t\t\t\tAIC = -2 * log_l;\n\t\t\t\tif (sum (X) != 0) {\n\t\t\t\t\tAIC = AIC + 2 * num_features;\t\n\t\t\t\t}\n\t\t\t} else {\n\t\t\t\tstop (\"Currently only the Bernoulli distribution family the following link functions are supported: log, logit, probit, and cloglog!\");\n\t\t\t}\n\t\t\t\t\t           \n            if (fileB != \" \") {\n\t\t\t\tfileO = ifdef ($O, \" \");\n\t\t\t\tfileS = $S;\n\t\t\t\tfmt  = ifdef ($fmt, \"text\");\t\n\t\t\t\n\t\t\t\t# Output which features give the best AIC and are being used for linear regression \n\t\t\t\twrite (Selected, fileS, format=fmt);\n\t\t\n\t\t\t\tssX_beta = diag (scale_X) %*% beta;\n                ssX_beta [num_features, ] = ssX_beta [num_features, ] + t(shift_X) %*% beta;\n\t\t\t\tif (intercept_status == 2) {\n\t\t\t\t\tbeta_out = cbind (ssX_beta, beta);\n                } else {\n\t\t\t\t\tbeta_out = ssX_beta;\n                }\n                \n                if (intercept_status == 0 & num_features == 1) {\n\t\t\t\t\tp = sum (X == 1);\n\t\t\t\t\tif (p == num_records) {\n\t\t\t\t\t\tbeta_out = beta_out[1,];\n\t\t\t\t\t}\t\t\t\t\t\n                } \n\n\t\t\t\t\t\t\t\t\n                if (intercept_status == 1 | intercept_status == 2) {\n\t\t\t\t\tintercept_value = as.scalar (beta_out [num_features, 1]);\n                    beta_noicept = beta_out [1 : (num_features - 1), 1];\n                } else {\n\t\t\t\t\tbeta_noicept = beta_out [1 : num_features, 1];\n                }\n                min_beta = min (beta_noicept);\n                max_beta = max (beta_noicept);\n                tmp_i_min_beta = rowIndexMin (t(beta_noicept))\n                i_min_beta = as.scalar (tmp_i_min_beta [1, 1]);\n                tmp_i_max_beta = rowIndexMax (t(beta_noicept))\n                i_max_beta = as.scalar (tmp_i_max_beta [1, 1]);\n                        \n                #####  OVER-DISPERSION PART  #####\n                      \n                all_linear_terms = X %*% ssX_beta;\n\t\t\t\t[g_Y, w] = glm_dist (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);\n                        \n                pearson_residual_sq = g_Y ^ 2 / w;\n                pearson_residual_sq = replace (target = pearson_residual_sq, pattern = NaN, replacement = 0);\n                # pearson_residual_sq = (y_residual ^ 2) / y_var;\n                        \n                if (num_records > num_features) {\n\t\t\t\t\testimated_dispersion = sum (pearson_residual_sq) / (num_records - num_features);\n                }\n                if (dispersion <= 0) {\n\t\t\t\t\tdispersion = estimated_dispersion;\n                }\n                deviance = deviance_nodisp / dispersion;\n                        \n                #####  END OF THE MAIN PART  #####\n                        \n                str = \"BETA_MIN,\" + min_beta;\n                str = append (str, \"BETA_MIN_INDEX,\" + i_min_beta);\n                str = append (str, \"BETA_MAX,\" + max_beta);\n                str = append (str, \"BETA_MAX_INDEX,\" + i_max_beta);\n                str = append (str, \"INTERCEPT,\" + intercept_value);\n                str = append (str, \"DISPERSION,\" + dispersion);\n                str = append (str, \"DISPERSION_EST,\" + estimated_dispersion);\n                str = append (str, \"DEVIANCE_UNSCALED,\" + deviance_nodisp);\n                str = append (str, \"DEVIANCE_SCALED,\" + deviance);\n                        \n                if (fileO != \" \") {\n\t\t\t\t\twrite (str, fileO);\n                } \n\t\t\t\telse {\n\t\t\t\t\tprint (str);\n                }\n\t\t\t\n\t\t\t\t# Prepare the output matrix\n\t\t\t\tprint (\"Writing the output matrix...\");\n                if (intercept_status == 0 & num_features == 1) { \n\t\t\t\t\tif (p == num_records) {\n\t\t\t\t\t\tbeta_out_tmp = matrix (0, rows = num_features_orig + 1, cols = 1); \n\t\t\t\t\t\tbeta_out_tmp[num_features_orig + 1,] = beta_out;\n\t\t\t\t\t\tbeta_out = beta_out_tmp;\n\t\t\t\t\t\twrite (beta_out, fileB, format=fmt);\n\t\t\t\t\t\tstop (\"\");\n\t\t\t\t\t} else if (sum (X) == 0){\n\t\t\t\t\t\tbeta_out = matrix (0, rows = num_features_orig, cols = 1);\n\t\t\t\t\t\twrite (beta_out, fileB, format=fmt);\n\t\t\t\t\t\tstop (\"\");\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\tno_selected = ncol (Selected);\n\t\t\t\tmax_selected = max (Selected);\n\t\t\t\tlast = max_selected + 1;\t\n\t\t\n\t\t\t\tif (intercept_status != 0) {\n\t\t\n\t\t\t\t\tSelected_ext = cbind (Selected, as.matrix (last));\t\t\t\n\t\t\t\t\tP1 = table (seq (1, ncol (Selected_ext)), t(Selected_ext)); \n\n\t\t\t\t\tif (intercept_status == 2) {\n\t\t\t\n\t\t\t\t\t\tP1_ssX_beta = P1 * ssX_beta;\n\t\t\t\t\t\tP2_ssX_beta = colSums (P1_ssX_beta);\n\t\t\t\t\t\tP1_beta = P1 * beta;\n\t\t\t\t\t\tP2_beta = colSums (P1_beta);\n\t\t\t\t\n\t\t\t\t\t\tif (max_selected < num_features_orig) {\n\t\t\t\t\t\t\n\t\t\t\t\t\t\tP2_ssX_beta = cbind (P2_ssX_beta, matrix (0, rows=1, cols=(num_features_orig - max_selected)));\n\t\t\t\t\t\t\tP2_beta = cbind (P2_beta, matrix (0, rows=1, cols=(num_features_orig - max_selected)));\n\t\t\t\t\t\t\t\n\t\t\t\t\t\t\tP2_ssX_beta[1, num_features_orig+1] = P2_ssX_beta[1, max_selected + 1]; \n\t\t\t\t\t\t\tP2_ssX_beta[1, max_selected + 1] = 0;\n\t\t\t\t\t\t\t\n\t\t\t\t\t\t\tP2_beta[1, num_features_orig+1] = P2_beta[1, max_selected + 1]; \n\t\t\t\t\t\t\tP2_beta[1, max_selected + 1] = 0;\n\n\t\t\t\t\t\t}\n\t\t\t\t\t\tbeta_out = cbind (t(P2_ssX_beta), t(P2_beta));\n\t\t\t\t\n\t\t\t\t\t} else {\n\t\t\t\n\t\t\t\t\t\tP1_beta = P1 * beta;\n\t\t\t\t\t\tP2_beta = colSums (P1_beta);\n\t\t\t\t\n\t\t\t\t\t\tif (max_selected < num_features_orig) {\n\t\t\t\t\t\t\tP2_beta = cbind (P2_beta, matrix (0, rows=1, cols=(num_features_orig - max_selected)));\n\t\t\t\t\t\t\tP2_beta[1, num_features_orig+1] = P2_beta[1, max_selected + 1] ; \n\t\t\t\t\t\t\tP2_beta[1, max_selected + 1] = 0;\n\t\t\t\t\t\t}\n\t\t\t\t\t\tbeta_out = t(P2_beta);\n\t\t\t\t\n\t\t\t\t\t}\n\t\t\t\t} else {\n\t\t\n\t\t\t\t\tP1 = table (seq (1, no_selected), t(Selected)); \n\t\t\t\t\tP1_beta = P1 * beta;\n\t\t\t\t\tP2_beta = colSums (P1_beta);\t\n\n\t\t\t\t\tif (max_selected < num_features_orig) {\n\t\t\t\t\t\tP2_beta = cbind (P2_beta, matrix (0, rows=1, cols=(num_features_orig - max_selected)));\n\t\t\t\t\t}\t\t\n\n\t\t\t\t\tbeta_out = t(P2_beta);\t\n\t\t\t\t}\n\t\n\t\t\t\twrite ( beta_out, fileB, format=fmt );\n\t\t\t\n\t\t\t}\n                      \n\t\t} else { \n\t\t\tstop (\"Input matrices X and/or Y are out of range!\"); \n        }\n\t} else { \n\t\tstop (\"Response matrix with \" + num_response_columns + \" columns, distribution family (\" + distribution_type + \", \" + variance_as_power_of_the_mean\n               + \") and link family (\" + link_type + \", \" + link_as_power_of_the_mean + \") are NOT supported together.\");\n    }\n}\n";
        return string;
    }

    public String glm__source() {
        String string = "glm = function (Matrix[Double] X, Matrix[Double] Y, Int intercept_status, Double num_features_orig, Matrix[Double] Selected, String fileB) return (Double AIC) {\n\t\t\n\t# distribution family code: 1 = Power, 2 = Bernoulli/Binomial; currently only Bernouli distribution family is supported!\t\t\n\tdistribution_type = 2;                \t\t# $dfam = 2;\n\tvariance_as_power_of_the_mean = 0.0;  \t\t# $vpow = 0.0;\n\t# link function code: 0 = canonical (depends on distribution), 1 = Power, 2 = Logit, 3 = Probit, 4 = Cloglog, 5 = Cauchit;\n\t# currently only log (link = 1), logit (link = 2), probit (link = 3), and cloglog (link = 4) are supported!\n\tlink_type = ifdef ($link, 2);         \t\t# $link = 2;\n\tlink_as_power_of_the_mean = 0.0;      \t\t# $lpow = 0.0;\n\n\tdispersion = ifdef ($disp, 0.0);            # $disp = 0.0;\n\teps = ifdef ($tol, 0.000001);               # $tol  = 0.000001;\n\tmax_iteration_IRLS = ifdef ($moi, 200);     # $moi  = 200;\n\tmax_iteration_CG = ifdef ($mii, 0);         # $mii  = 0;\n\n\tvariance_as_power_of_the_mean = as.double (variance_as_power_of_the_mean);\n\tlink_as_power_of_the_mean = as.double (link_as_power_of_the_mean);\n\n\tdispersion = as.double (dispersion);\n\teps = as.double (eps);              \n\n\t# Default values for output statistics:\n\tregularization = 0.0;\n\ttermination_code     = 0.0;\n\tmin_beta             = NaN;\n\ti_min_beta           = NaN;\n\tmax_beta             = NaN;\n\ti_max_beta           = NaN;\n\tintercept_value      = NaN;\n\tdispersion           = NaN;\n\testimated_dispersion = NaN;\n\tdeviance_nodisp      = NaN;\n\tdeviance             = NaN;\n                  \n\t#####   INITIALIZE THE PARAMETERS   #####\n                  \n    num_records  = nrow (X);\n    num_features = ncol (X);\n    zeros_r = matrix (0, rows = num_records, cols = 1);\n    ones_r = 1 + zeros_r;\n                  \n    # Introduce the intercept, shift and rescale the columns of X if needed\n                  \n    if (intercept_status == 1 | intercept_status == 2) { # add the intercept column\n\t\tX = cbind (X, ones_r);\n        num_features = ncol (X);\n\t}\n                  \n    scale_lambda = matrix (1, rows = num_features, cols = 1);\n    if (intercept_status == 1 | intercept_status == 2) {\n\t\tscale_lambda [num_features, 1] = 0;\n    }\n                  \n    if (intercept_status == 2) {  # scale-&-shift X columns to mean 0, variance 1\n\t\t# Important assumption: X [, num_features] = ones_r\n        avg_X_cols = t(colSums(X)) / num_records;\n        var_X_cols = (t(colSums (X ^ 2)) - num_records * (avg_X_cols ^ 2)) / (num_records - 1);\n        is_unsafe = (var_X_cols <= 0);\n        scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);\n        scale_X [num_features, 1] = 1;\n        shift_X = - avg_X_cols * scale_X;\n        shift_X [num_features, 1] = 0;\n        rowSums_X_sq = (X ^ 2) %*% (scale_X ^ 2) + X %*% (2 * scale_X * shift_X) + sum (shift_X ^ 2);\n\t} else {\n        scale_X = matrix (1, rows = num_features, cols = 1);\n        shift_X = matrix (0, rows = num_features, cols = 1);\n        rowSums_X_sq = rowSums (X ^ 2);\n    }\t\n                  \n    # Henceforth we replace \"X\" with \"X %*% (SHIFT/SCALE TRANSFORM)\" and rowSums(X ^ 2)\n    # with \"rowSums_X_sq\" in order to preserve the sparsity of X under shift and scale.\n    # The transform is then associatively applied to the other side of the expression,\n    # and is rewritten via \"scale_X\" and \"shift_X\" as follows:\n    #\n    # ssX_A  = (SHIFT/SCALE TRANSFORM) %*% A    --- is rewritten as:\n    # ssX_A  = diag (scale_X) %*% A;\n    # ssX_A [num_features, ] = ssX_A [num_features, ] + t(shift_X) %*% A;\n    #\n    # tssX_A = t(SHIFT/SCALE TRANSFORM) %*% A   --- is rewritten as:\n    # tssX_A = diag (scale_X) %*% A + shift_X %*% A [num_features, ];\n                  \n    # Initialize other input-dependent parameters\n                  \n    lambda = scale_lambda * regularization;\n    if (max_iteration_CG == 0) {\n\t\tmax_iteration_CG = num_features;\n    }\n                  \n    # Set up the canonical link, if requested [Then we have: Var(mu) * (d link / d mu) = const]\n                  \n    if (link_type == 0) {\n\t\tif (distribution_type == 1) {\n\t\t\tlink_type = 1;\n            link_as_power_of_the_mean = 1.0 - variance_as_power_of_the_mean;\n\t\t} else { \n\t\t\tif (distribution_type == 2) {\n\t\t\t\tlink_type = 2;\n\t\t\t}   \n\t\t}   \n\t}\n                                \n    # For power distributions and/or links, we use two constants,\n    # \"variance as power of the mean\" and \"link_as_power_of_the_mean\",\n    # to specify the variance and the link as arbitrary powers of the\n    # mean.  However, the variance-powers of 1.0 (Poisson family) and\n    # 2.0 (Gamma family) have to be treated as special cases, because\n    # these values integrate into logarithms.  The link-power of 0.0\n    # is also special as it represents the logarithm link.\n                  \n    num_response_columns = ncol (Y);\n    is_supported = 0;              \n\tif (num_response_columns == 2 & distribution_type == 2 & link_type >= 1 & link_type <= 4) { # BERNOULLI DISTRIBUTION\n\t\tis_supported = 1;\t\t\t  \n\t}\n\tif (num_response_columns == 1 & distribution_type == 2) {\n\t\tprint (\"Error: Bernoulli response matrix has not been converted into two-column format.\");\n    }\n\n\tif (is_supported == 1) {\n                    \n\t\t#####   INITIALIZE THE BETAS   #####\n                    \n        [beta, saturated_log_l, isNaN] = \n\t\t\tglm_initialize (X, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean, intercept_status, max_iteration_CG);\n\t\t\t\t\t  \n\t\t# print(\" --- saturated logLik \" + saturated_log_l);\n\t\t\t\t\t  \n        if (isNaN == 0) {\n                      \n\t\t\t#####  START OF THE MAIN PART  #####\n                      \n            sum_X_sq = sum (rowSums_X_sq);\n            trust_delta = 0.5 * sqrt (num_features) / max (sqrt (rowSums_X_sq));\n            ###  max_trust_delta = trust_delta * 10000.0;\n            log_l = 0.0;\n            deviance_nodisp = 0.0;\n            new_deviance_nodisp = 0.0;\n            isNaN_log_l = 2;\n            newbeta = beta;\n            g = matrix (0.0, rows = num_features, cols = 1);\n            g_norm = sqrt (sum ((g + lambda * beta) ^ 2));\n            accept_new_beta = 1;\n            reached_trust_boundary = 0;\n            neg_log_l_change_predicted = 0.0;\n            i_IRLS = 0;\n                      \n            # print (\"BEGIN IRLS ITERATIONS...\");\n                      \n            ssX_newbeta = diag (scale_X) %*% newbeta;\n            ssX_newbeta [num_features, ] = ssX_newbeta [num_features, ] + t(shift_X) %*% newbeta;\n            all_linear_terms = X %*% ssX_newbeta;\n                      \n            [new_log_l, isNaN_new_log_l] = glm_log_likelihood_part\n\t\t\t\t(all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);\n\t\t\t\t\t  \n            if (isNaN_new_log_l == 0) {\n\t\t\t\tnew_deviance_nodisp = 2.0 * (saturated_log_l - new_log_l);\n                new_log_l = new_log_l - 0.5 * sum (lambda * newbeta ^ 2);\n            }\n                      \n            while (termination_code == 0) {\n\t\t\t\taccept_new_beta = 1;\n                        \n                if (i_IRLS > 0) {\n\t\t\t\t\tif (isNaN_log_l == 0) {\n\t\t\t\t\t\taccept_new_beta = 0;\n                    }\n                          \n                    # Decide whether to accept a new iteration point and update the trust region\n                    # See Alg. 4.1 on p. 69 of \"Numerical Optimization\" 2nd ed. by Nocedal and Wright\n                          \n                    rho = (- new_log_l + log_l) / neg_log_l_change_predicted;\n\t\t\t\t\tif (rho < 0.25 | isNaN_new_log_l == 1) {\n\t\t\t\t\t\ttrust_delta = 0.25 * trust_delta;\n\t\t\t\t\t}\n\t\t\t\t\tif (rho > 0.75 & isNaN_new_log_l == 0 & reached_trust_boundary == 1) {\n\t\t\t\t\t\ttrust_delta = 2 * trust_delta;\n                            \n\t\t\t\t\t\t### if (trust_delta > max_trust_delta) {\n\t\t\t\t\t\t###     trust_delta = max_trust_delta;\n\t\t\t\t\t\t### }\n\t\t\t\t\t}\n\t\t\t\t\tif (rho > 0.1 & isNaN_new_log_l == 0) {\n\t\t\t\t\t\taccept_new_beta = 1;\n\t\t\t\t\t}\n\t\t\t\t}\n                        \n                if (accept_new_beta == 1) {\n\t\t\t\t\tbeta = newbeta;  log_l = new_log_l;  deviance_nodisp = new_deviance_nodisp;  isNaN_log_l = isNaN_new_log_l;\n                          \n\t\t\t\t\t[g_Y, w] = glm_dist (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);\n                          \n\t\t\t\t\t# We introduced these variables to avoid roundoff errors:\n\t\t\t\t\t#     g_Y = y_residual / (y_var * link_grad);\n\t\t\t\t\t#     w   = 1.0 / (y_var * link_grad * link_grad);\n                          \n\t\t\t\t\tgXY = - t(X) %*% g_Y;\n\t\t\t\t\tg = diag (scale_X) %*% gXY + shift_X %*% gXY [num_features, ];\n\t\t\t\t\tg_norm = sqrt (sum ((g + lambda * beta) ^ 2));\n\t\t\t\t}\n                        \n                [z, neg_log_l_change_predicted, num_CG_iters, reached_trust_boundary] = \n\t\t\t\t\tget_CG_Steihaug_point (X, scale_X, shift_X, w, g, beta, lambda, trust_delta, max_iteration_CG);\n                        \n\t\t\t\tnewbeta = beta + z;\n                        \n\t\t\t\tssX_newbeta = diag (scale_X) %*% newbeta;\n\t\t\t\tssX_newbeta [num_features, ] = ssX_newbeta [num_features, ] + t(shift_X) %*% newbeta;\n\t\t\t\tall_linear_terms = X %*% ssX_newbeta;\n                        \n\t\t\t\t[new_log_l, isNaN_new_log_l] = glm_log_likelihood_part\n\t\t\t\t\t(all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);\n                        \n\t\t\t\tif (isNaN_new_log_l == 0) {\n\t\t\t\t\tnew_deviance_nodisp = 2.0 * (saturated_log_l - new_log_l);\n\t\t\t\t\tnew_log_l = new_log_l - 0.5 * sum (lambda * newbeta ^ 2);\n\t\t\t\t}\n                        \n\t\t\t\tlog_l_change = new_log_l - log_l;               # R's criterion for termination: |dev - devold|/(|dev| + 0.1) < eps\n                       \n\t\t\t\tif (reached_trust_boundary == 0 & isNaN_new_log_l == 0 & \n\t\t\t\t   (2.0 * abs (log_l_change) < eps * (deviance_nodisp + 0.1) | abs (log_l_change) < (abs (log_l) + abs (new_log_l)) * 0.00000000000001) ) {\n\t\t\t\t\ttermination_code = 1;\n\t\t\t\t}\n\t\t\t\trho = - log_l_change / neg_log_l_change_predicted;\n\t\t\t\tz_norm = sqrt (sum (z * z));\n                                              \n\t\t\t\ti_IRLS = i_IRLS + 1;                        \n                        \n\t\t\t\tif (i_IRLS == max_iteration_IRLS) {\n\t\t\t\t\ttermination_code = 2;\n\t\t\t\t}\n\t\t\t}\n                      \n            beta = newbeta;\n            log_l = new_log_l;\n            deviance_nodisp = new_deviance_nodisp;\n                     \n            #---------------------------- last part\n\n\t\t\tif (termination_code != 1) {\n\t\t\t\tprint (\"One of the runs of GLM did not converged in \" + i_IRLS + \" steps!\");\n\t\t\t}\n                      \n            ##### COMPUTE AIC ##### \n            \n\t\t\tif (distribution_type == 2 & link_type >= 1 & link_type <= 4) {\t\t\t\n\t\t\t\tAIC = -2 * log_l;\n\t\t\t\tif (sum (X) != 0) {\n\t\t\t\t\tAIC = AIC + 2 * num_features;\t\n\t\t\t\t}\n\t\t\t} else {\n\t\t\t\tstop (\"Currently only the Bernoulli distribution family the following link functions are supported: log, logit, probit, and cloglog!\");\n\t\t\t}\n\t\t\t\t\t           \n            if (fileB != \" \") {\n\t\t\t\tfileO = ifdef ($O, \" \");\n\t\t\t\tfileS = $S;\n\t\t\t\tfmt  = ifdef ($fmt, \"text\");\t\n\t\t\t\n\t\t\t\t# Output which features give the best AIC and are being used for linear regression \n\t\t\t\twrite (Selected, fileS, format=fmt);\n\t\t\n\t\t\t\tssX_beta = diag (scale_X) %*% beta;\n                ssX_beta [num_features, ] = ssX_beta [num_features, ] + t(shift_X) %*% beta;\n\t\t\t\tif (intercept_status == 2) {\n\t\t\t\t\tbeta_out = cbind (ssX_beta, beta);\n                } else {\n\t\t\t\t\tbeta_out = ssX_beta;\n                }\n                \n                if (intercept_status == 0 & num_features == 1) {\n\t\t\t\t\tp = sum (X == 1);\n\t\t\t\t\tif (p == num_records) {\n\t\t\t\t\t\tbeta_out = beta_out[1,];\n\t\t\t\t\t}\t\t\t\t\t\n                } \n\n\t\t\t\t\t\t\t\t\n                if (intercept_status == 1 | intercept_status == 2) {\n\t\t\t\t\tintercept_value = as.scalar (beta_out [num_features, 1]);\n                    beta_noicept = beta_out [1 : (num_features - 1), 1];\n                } else {\n\t\t\t\t\tbeta_noicept = beta_out [1 : num_features, 1];\n                }\n                min_beta = min (beta_noicept);\n                max_beta = max (beta_noicept);\n                tmp_i_min_beta = rowIndexMin (t(beta_noicept))\n                i_min_beta = as.scalar (tmp_i_min_beta [1, 1]);\n                tmp_i_max_beta = rowIndexMax (t(beta_noicept))\n                i_max_beta = as.scalar (tmp_i_max_beta [1, 1]);\n                        \n                #####  OVER-DISPERSION PART  #####\n                      \n                all_linear_terms = X %*% ssX_beta;\n\t\t\t\t[g_Y, w] = glm_dist (all_linear_terms, Y, distribution_type, variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);\n                        \n                pearson_residual_sq = g_Y ^ 2 / w;\n                pearson_residual_sq = replace (target = pearson_residual_sq, pattern = NaN, replacement = 0);\n                # pearson_residual_sq = (y_residual ^ 2) / y_var;\n                        \n                if (num_records > num_features) {\n\t\t\t\t\testimated_dispersion = sum (pearson_residual_sq) / (num_records - num_features);\n                }\n                if (dispersion <= 0) {\n\t\t\t\t\tdispersion = estimated_dispersion;\n                }\n                deviance = deviance_nodisp / dispersion;\n                        \n                #####  END OF THE MAIN PART  #####\n                        \n                str = \"BETA_MIN,\" + min_beta;\n                str = append (str, \"BETA_MIN_INDEX,\" + i_min_beta);\n                str = append (str, \"BETA_MAX,\" + max_beta);\n                str = append (str, \"BETA_MAX_INDEX,\" + i_max_beta);\n                str = append (str, \"INTERCEPT,\" + intercept_value);\n                str = append (str, \"DISPERSION,\" + dispersion);\n                str = append (str, \"DISPERSION_EST,\" + estimated_dispersion);\n                str = append (str, \"DEVIANCE_UNSCALED,\" + deviance_nodisp);\n                str = append (str, \"DEVIANCE_SCALED,\" + deviance);\n                        \n                if (fileO != \" \") {\n\t\t\t\t\twrite (str, fileO);\n                } \n\t\t\t\telse {\n\t\t\t\t\tprint (str);\n                }\n\t\t\t\n\t\t\t\t# Prepare the output matrix\n\t\t\t\tprint (\"Writing the output matrix...\");\n                if (intercept_status == 0 & num_features == 1) { \n\t\t\t\t\tif (p == num_records) {\n\t\t\t\t\t\tbeta_out_tmp = matrix (0, rows = num_features_orig + 1, cols = 1); \n\t\t\t\t\t\tbeta_out_tmp[num_features_orig + 1,] = beta_out;\n\t\t\t\t\t\tbeta_out = beta_out_tmp;\n\t\t\t\t\t\twrite (beta_out, fileB, format=fmt);\n\t\t\t\t\t\tstop (\"\");\n\t\t\t\t\t} else if (sum (X) == 0){\n\t\t\t\t\t\tbeta_out = matrix (0, rows = num_features_orig, cols = 1);\n\t\t\t\t\t\twrite (beta_out, fileB, format=fmt);\n\t\t\t\t\t\tstop (\"\");\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\tno_selected = ncol (Selected);\n\t\t\t\tmax_selected = max (Selected);\n\t\t\t\tlast = max_selected + 1;\t\n\t\t\n\t\t\t\tif (intercept_status != 0) {\n\t\t\n\t\t\t\t\tSelected_ext = cbind (Selected, as.matrix (last));\t\t\t\n\t\t\t\t\tP1 = table (seq (1, ncol (Selected_ext)), t(Selected_ext)); \n\n\t\t\t\t\tif (intercept_status == 2) {\n\t\t\t\n\t\t\t\t\t\tP1_ssX_beta = P1 * ssX_beta;\n\t\t\t\t\t\tP2_ssX_beta = colSums (P1_ssX_beta);\n\t\t\t\t\t\tP1_beta = P1 * beta;\n\t\t\t\t\t\tP2_beta = colSums (P1_beta);\n\t\t\t\t\n\t\t\t\t\t\tif (max_selected < num_features_orig) {\n\t\t\t\t\t\t\n\t\t\t\t\t\t\tP2_ssX_beta = cbind (P2_ssX_beta, matrix (0, rows=1, cols=(num_features_orig - max_selected)));\n\t\t\t\t\t\t\tP2_beta = cbind (P2_beta, matrix (0, rows=1, cols=(num_features_orig - max_selected)));\n\t\t\t\t\t\t\t\n\t\t\t\t\t\t\tP2_ssX_beta[1, num_features_orig+1] = P2_ssX_beta[1, max_selected + 1]; \n\t\t\t\t\t\t\tP2_ssX_beta[1, max_selected + 1] = 0;\n\t\t\t\t\t\t\t\n\t\t\t\t\t\t\tP2_beta[1, num_features_orig+1] = P2_beta[1, max_selected + 1]; \n\t\t\t\t\t\t\tP2_beta[1, max_selected + 1] = 0;\n\n\t\t\t\t\t\t}\n\t\t\t\t\t\tbeta_out = cbind (t(P2_ssX_beta), t(P2_beta));\n\t\t\t\t\n\t\t\t\t\t} else {\n\t\t\t\n\t\t\t\t\t\tP1_beta = P1 * beta;\n\t\t\t\t\t\tP2_beta = colSums (P1_beta);\n\t\t\t\t\n\t\t\t\t\t\tif (max_selected < num_features_orig) {\n\t\t\t\t\t\t\tP2_beta = cbind (P2_beta, matrix (0, rows=1, cols=(num_features_orig - max_selected)));\n\t\t\t\t\t\t\tP2_beta[1, num_features_orig+1] = P2_beta[1, max_selected + 1] ; \n\t\t\t\t\t\t\tP2_beta[1, max_selected + 1] = 0;\n\t\t\t\t\t\t}\n\t\t\t\t\t\tbeta_out = t(P2_beta);\n\t\t\t\t\n\t\t\t\t\t}\n\t\t\t\t} else {\n\t\t\n\t\t\t\t\tP1 = table (seq (1, no_selected), t(Selected)); \n\t\t\t\t\tP1_beta = P1 * beta;\n\t\t\t\t\tP2_beta = colSums (P1_beta);\t\n\n\t\t\t\t\tif (max_selected < num_features_orig) {\n\t\t\t\t\t\tP2_beta = cbind (P2_beta, matrix (0, rows=1, cols=(num_features_orig - max_selected)));\n\t\t\t\t\t}\t\t\n\n\t\t\t\t\tbeta_out = t(P2_beta);\t\n\t\t\t\t}\n\t\n\t\t\t\twrite ( beta_out, fileB, format=fmt );\n\t\t\t\n\t\t\t}\n                      \n\t\t} else { \n\t\t\tstop (\"Input matrices X and/or Y are out of range!\"); \n        }\n\t} else { \n\t\tstop (\"Response matrix with \" + num_response_columns + \" columns, distribution family (\" + distribution_type + \", \" + variance_as_power_of_the_mean\n               + \") and link family (\" + link_type + \", \" + link_as_power_of_the_mean + \") are NOT supported together.\");\n    }\n}\n";
        return string;
    }

    public Glm_dist_output glm_dist(Object object, Object object2, Object object3, Object object4, Object object5, Object object6) {
        String string = "source('scripts/algorithms/StepGLM.dml') as mlcontextns;[g_Y, w] = mlcontextns::glm_dist(linear_terms, Y, dist_type, var_power, link_type, link_power);";
        Script script = new Script(string);
        script.in("linear_terms", object).in("Y", object2).in("dist_type", object3).in("var_power", object4).in("link_type", object5).in("link_power", object6).out("g_Y").out("w");
        MLResults mLResults = script.execute();
        Matrix matrix = mLResults.getMatrix("g_Y");
        Matrix matrix2 = mLResults.getMatrix("w");
        Glm_dist_output glm_dist_output = new Glm_dist_output(matrix, matrix2);
        return glm_dist_output;
    }

    public String glm_dist__docs() {
        String string = "glm_dist = function (Matrix[double] linear_terms, Matrix[double] Y,\n                     int dist_type, double var_power, int link_type, double link_power)\n  return (Matrix[double] g_Y, Matrix[double] w)\n# ORIGINALLY we returned more meaningful vectors, namely:\n# Matrix[double] y_residual    : y - y_mean, i.e. y observed - y predicted\n# Matrix[double] link_gradient : derivative of the link function\n# Matrix[double] var_function  : variance without dispersion, i.e. the V(mu) function\n# BUT, this caused roundoff errors, so we had to compute \"directly useful\" vectors\n# and skip over the \"meaningful intermediaries\".  Now we output these two variables:\n#     g_Y = y_residual / (var_function * link_gradient);\n#     w   = 1.0 / (var_function * link_gradient ^ 2);\n{\n    num_records = nrow (linear_terms);\n    zeros_r = matrix (0.0, rows = num_records, cols = 1);\n    ones_r = 1 + zeros_r;\n    g_Y  = zeros_r;\n    w  = zeros_r;\n    \n    # Some constants\n    \n    one_over_sqrt_two_pi = 0.39894228040143267793994605993438;\n    ones_2 = matrix (1.0, rows = 1, cols = 2);\n    p_one_m_one = ones_2;\n    p_one_m_one [1, 2] = -1.0;\n    m_one_p_one = ones_2;\n    m_one_p_one [1, 1] = -1.0;\n    zero_one = ones_2;\n    zero_one [1, 1] = 0.0;\n    one_zero = ones_2;\n    one_zero [1, 2] = 0.0;\n    flip_pos = matrix (0, rows = 2, cols = 2);\n    flip_neg = flip_pos;\n    flip_pos [1, 2] = 1;\n    flip_pos [2, 1] = 1;\n    flip_neg [1, 2] = -1;\n    flip_neg [2, 1] = 1;\n    \n    if (dist_type == 1 & link_type == 1) { # POWER DISTRIBUTION\n      y_mean = zeros_r;\n      if          (link_power ==  0) {\n        y_mean = exp (linear_terms);\n        y_mean_pow = y_mean ^ (1 - var_power);\n        w   = y_mean_pow * y_mean;\n        g_Y = y_mean_pow * (Y - y_mean);\n      } else { if (link_power ==  1.0) {\n        y_mean = linear_terms;\n        w   = y_mean ^ (- var_power);\n        g_Y = w * (Y - y_mean);\n      } else {\n        y_mean = linear_terms ^ (1.0 / link_power);\n        c1  = (1 - var_power) / link_power - 1;\n        c2  = (2 - var_power) / link_power - 2;\n        g_Y = (linear_terms ^ c1) * (Y - y_mean) / link_power;\n        w   = (linear_terms ^ c2) / (link_power ^ 2);\n      }   }}\n    if (dist_type == 2 & link_type >= 1 & link_type <= 5)\n    { # BINOMIAL/BERNOULLI DISTRIBUTION\n      if (link_type == 1) { # BINOMIAL.POWER LINKS\n        if (link_power == 0)  { # Binomial.log\n          vec1 = 1 / (exp (- linear_terms) - 1);\n          g_Y = Y [, 1] - Y [, 2] * vec1;\n          w   = rowSums (Y) * vec1;\n        } else {                  # Binomial.nonlog\n          vec1 = zeros_r;\n          if (link_power == 0.5)  {\n            vec1 = 1 / (1 - linear_terms ^ 2);\n          } else { if (sum (linear_terms < 0) == 0) {\n            vec1 = linear_terms ^ (- 2 + 1 / link_power) / (1 - linear_terms ^ (1 / link_power));\n          } else {isNaN = 1;}}\n          # We want a \"zero-protected\" version of\n          #     vec2 = Y [, 1] / linear_terms;\n          is_y_0 = (Y [, 1] == 0);\n          vec2 = (Y [, 1] + is_y_0) / (linear_terms * (1 - is_y_0) + is_y_0) - is_y_0;\n          g_Y =  (vec2 - Y [, 2] * vec1 * linear_terms) / link_power;\n          w   =  rowSums (Y) * vec1 / link_power ^ 2;\n        }\n      } else {\n        is_LT_pos_infinite = (linear_terms == Inf);\n        is_LT_neg_infinite = (linear_terms == -Inf);\n        is_LT_infinite = is_LT_pos_infinite %*% one_zero + is_LT_neg_infinite %*% zero_one;\n        finite_linear_terms = replace (target =        linear_terms, pattern =  Inf, replacement = 0);\n        finite_linear_terms = replace (target = finite_linear_terms, pattern = -Inf, replacement = 0);\n        if (link_type == 2)                           { # Binomial.logit\n          Y_prob = exp (finite_linear_terms) %*% one_zero + ones_r %*% zero_one;\n          Y_prob = Y_prob / (rowSums (Y_prob) %*% ones_2);\n          Y_prob = Y_prob * ((1.0 - rowSums (is_LT_infinite)) %*% ones_2) + is_LT_infinite;\n          g_Y = rowSums (Y * (Y_prob %*% flip_neg));           ### = y_residual;\n          w   = rowSums (Y * (Y_prob %*% flip_pos) * Y_prob);  ### = y_variance;\n        } else { if (link_type == 3)                  { # Binomial.probit\n          is_lt_pos = (linear_terms >= 0);\n          t_gp = 1.0 / (1.0 + abs (finite_linear_terms) * 0.231641888);  # 0.231641888 = 0.3275911 / sqrt (2.0)\n          pt_gp = t_gp * ( 0.254829592 \n                           + t_gp * (-0.284496736 # \"Handbook of Mathematical Functions\", ed. by M. Abramowitz and I.A. Stegun,\n                                     + t_gp * ( 1.421413741 # U.S. Nat-l Bureau of Standards, 10th print (Dec 1972), Sec. 7.1.26, p. 299\n                                                + t_gp * (-1.453152027 \n                                                          + t_gp *   1.061405429))));\n          the_gauss_exp = exp (- (linear_terms ^ 2) / 2.0);\n          vec1 = 0.25 * pt_gp * (2 - the_gauss_exp * pt_gp);\n          vec2 = Y [, 1] - rowSums (Y) * is_lt_pos + the_gauss_exp * pt_gp * rowSums (Y) * (is_lt_pos - 0.5);\n          w   = the_gauss_exp * (one_over_sqrt_two_pi ^ 2) * rowSums (Y) / vec1;\n          g_Y = one_over_sqrt_two_pi * vec2 / vec1;\n        } else { if (link_type == 4)                  { # Binomial.cloglog\n          the_exp = exp (linear_terms)\n          the_exp_exp = exp (- the_exp);\n          is_too_small = ((10000000 + the_exp) == 10000000);\n          the_exp_ratio = (1 - is_too_small) * (1 - the_exp_exp) / (the_exp + is_too_small) + is_too_small * (1 - the_exp / 2);\n          g_Y =  (rowSums (Y) * the_exp_exp - Y [, 2]) / the_exp_ratio;\n          w   =  the_exp_exp * the_exp * rowSums (Y) / the_exp_ratio;\n        } else { if (link_type == 5)                  { # Binomial.cauchit\n          Y_prob = 0.5 + (atan (finite_linear_terms) %*% p_one_m_one) / pi;\n          Y_prob = Y_prob * ((1.0 - rowSums (is_LT_infinite)) %*% ones_2) + is_LT_infinite;\n          y_residual = Y [, 1] * Y_prob [, 2] - Y [, 2] * Y_prob [, 1];\n          var_function = rowSums (Y) * Y_prob [, 1] * Y_prob [, 2];\n          link_gradient_normalized = (1 + linear_terms ^ 2) * pi;\n          g_Y =  rowSums (Y) * y_residual / (var_function * link_gradient_normalized);\n          w   = (rowSums (Y) ^ 2) / (var_function * link_gradient_normalized ^ 2);\n        }}}}   \n      }\n    }\n  }\n";
        return string;
    }

    public String glm_dist__source() {
        String string = "glm_dist = function (Matrix[double] linear_terms, Matrix[double] Y,\n                     int dist_type, double var_power, int link_type, double link_power)\n  return (Matrix[double] g_Y, Matrix[double] w)\n# ORIGINALLY we returned more meaningful vectors, namely:\n# Matrix[double] y_residual    : y - y_mean, i.e. y observed - y predicted\n# Matrix[double] link_gradient : derivative of the link function\n# Matrix[double] var_function  : variance without dispersion, i.e. the V(mu) function\n# BUT, this caused roundoff errors, so we had to compute \"directly useful\" vectors\n# and skip over the \"meaningful intermediaries\".  Now we output these two variables:\n#     g_Y = y_residual / (var_function * link_gradient);\n#     w   = 1.0 / (var_function * link_gradient ^ 2);\n{\n    num_records = nrow (linear_terms);\n    zeros_r = matrix (0.0, rows = num_records, cols = 1);\n    ones_r = 1 + zeros_r;\n    g_Y  = zeros_r;\n    w  = zeros_r;\n    \n    # Some constants\n    \n    one_over_sqrt_two_pi = 0.39894228040143267793994605993438;\n    ones_2 = matrix (1.0, rows = 1, cols = 2);\n    p_one_m_one = ones_2;\n    p_one_m_one [1, 2] = -1.0;\n    m_one_p_one = ones_2;\n    m_one_p_one [1, 1] = -1.0;\n    zero_one = ones_2;\n    zero_one [1, 1] = 0.0;\n    one_zero = ones_2;\n    one_zero [1, 2] = 0.0;\n    flip_pos = matrix (0, rows = 2, cols = 2);\n    flip_neg = flip_pos;\n    flip_pos [1, 2] = 1;\n    flip_pos [2, 1] = 1;\n    flip_neg [1, 2] = -1;\n    flip_neg [2, 1] = 1;\n    \n    if (dist_type == 1 & link_type == 1) { # POWER DISTRIBUTION\n      y_mean = zeros_r;\n      if          (link_power ==  0) {\n        y_mean = exp (linear_terms);\n        y_mean_pow = y_mean ^ (1 - var_power);\n        w   = y_mean_pow * y_mean;\n        g_Y = y_mean_pow * (Y - y_mean);\n      } else { if (link_power ==  1.0) {\n        y_mean = linear_terms;\n        w   = y_mean ^ (- var_power);\n        g_Y = w * (Y - y_mean);\n      } else {\n        y_mean = linear_terms ^ (1.0 / link_power);\n        c1  = (1 - var_power) / link_power - 1;\n        c2  = (2 - var_power) / link_power - 2;\n        g_Y = (linear_terms ^ c1) * (Y - y_mean) / link_power;\n        w   = (linear_terms ^ c2) / (link_power ^ 2);\n      }   }}\n    if (dist_type == 2 & link_type >= 1 & link_type <= 5)\n    { # BINOMIAL/BERNOULLI DISTRIBUTION\n      if (link_type == 1) { # BINOMIAL.POWER LINKS\n        if (link_power == 0)  { # Binomial.log\n          vec1 = 1 / (exp (- linear_terms) - 1);\n          g_Y = Y [, 1] - Y [, 2] * vec1;\n          w   = rowSums (Y) * vec1;\n        } else {                  # Binomial.nonlog\n          vec1 = zeros_r;\n          if (link_power == 0.5)  {\n            vec1 = 1 / (1 - linear_terms ^ 2);\n          } else { if (sum (linear_terms < 0) == 0) {\n            vec1 = linear_terms ^ (- 2 + 1 / link_power) / (1 - linear_terms ^ (1 / link_power));\n          } else {isNaN = 1;}}\n          # We want a \"zero-protected\" version of\n          #     vec2 = Y [, 1] / linear_terms;\n          is_y_0 = (Y [, 1] == 0);\n          vec2 = (Y [, 1] + is_y_0) / (linear_terms * (1 - is_y_0) + is_y_0) - is_y_0;\n          g_Y =  (vec2 - Y [, 2] * vec1 * linear_terms) / link_power;\n          w   =  rowSums (Y) * vec1 / link_power ^ 2;\n        }\n      } else {\n        is_LT_pos_infinite = (linear_terms == Inf);\n        is_LT_neg_infinite = (linear_terms == -Inf);\n        is_LT_infinite = is_LT_pos_infinite %*% one_zero + is_LT_neg_infinite %*% zero_one;\n        finite_linear_terms = replace (target =        linear_terms, pattern =  Inf, replacement = 0);\n        finite_linear_terms = replace (target = finite_linear_terms, pattern = -Inf, replacement = 0);\n        if (link_type == 2)                           { # Binomial.logit\n          Y_prob = exp (finite_linear_terms) %*% one_zero + ones_r %*% zero_one;\n          Y_prob = Y_prob / (rowSums (Y_prob) %*% ones_2);\n          Y_prob = Y_prob * ((1.0 - rowSums (is_LT_infinite)) %*% ones_2) + is_LT_infinite;\n          g_Y = rowSums (Y * (Y_prob %*% flip_neg));           ### = y_residual;\n          w   = rowSums (Y * (Y_prob %*% flip_pos) * Y_prob);  ### = y_variance;\n        } else { if (link_type == 3)                  { # Binomial.probit\n          is_lt_pos = (linear_terms >= 0);\n          t_gp = 1.0 / (1.0 + abs (finite_linear_terms) * 0.231641888);  # 0.231641888 = 0.3275911 / sqrt (2.0)\n          pt_gp = t_gp * ( 0.254829592 \n                           + t_gp * (-0.284496736 # \"Handbook of Mathematical Functions\", ed. by M. Abramowitz and I.A. Stegun,\n                                     + t_gp * ( 1.421413741 # U.S. Nat-l Bureau of Standards, 10th print (Dec 1972), Sec. 7.1.26, p. 299\n                                                + t_gp * (-1.453152027 \n                                                          + t_gp *   1.061405429))));\n          the_gauss_exp = exp (- (linear_terms ^ 2) / 2.0);\n          vec1 = 0.25 * pt_gp * (2 - the_gauss_exp * pt_gp);\n          vec2 = Y [, 1] - rowSums (Y) * is_lt_pos + the_gauss_exp * pt_gp * rowSums (Y) * (is_lt_pos - 0.5);\n          w   = the_gauss_exp * (one_over_sqrt_two_pi ^ 2) * rowSums (Y) / vec1;\n          g_Y = one_over_sqrt_two_pi * vec2 / vec1;\n        } else { if (link_type == 4)                  { # Binomial.cloglog\n          the_exp = exp (linear_terms)\n          the_exp_exp = exp (- the_exp);\n          is_too_small = ((10000000 + the_exp) == 10000000);\n          the_exp_ratio = (1 - is_too_small) * (1 - the_exp_exp) / (the_exp + is_too_small) + is_too_small * (1 - the_exp / 2);\n          g_Y =  (rowSums (Y) * the_exp_exp - Y [, 2]) / the_exp_ratio;\n          w   =  the_exp_exp * the_exp * rowSums (Y) / the_exp_ratio;\n        } else { if (link_type == 5)                  { # Binomial.cauchit\n          Y_prob = 0.5 + (atan (finite_linear_terms) %*% p_one_m_one) / pi;\n          Y_prob = Y_prob * ((1.0 - rowSums (is_LT_infinite)) %*% ones_2) + is_LT_infinite;\n          y_residual = Y [, 1] * Y_prob [, 2] - Y [, 2] * Y_prob [, 1];\n          var_function = rowSums (Y) * Y_prob [, 1] * Y_prob [, 2];\n          link_gradient_normalized = (1 + linear_terms ^ 2) * pi;\n          g_Y =  rowSums (Y) * y_residual / (var_function * link_gradient_normalized);\n          w   = (rowSums (Y) ^ 2) / (var_function * link_gradient_normalized ^ 2);\n        }}}}   \n      }\n    }\n  }\n";
        return string;
    }

    public Glm_log_likelihood_part_output glm_log_likelihood_part(Object object, Object object2, Object object3, Object object4, Object object5, Object object6) {
        String string = "source('scripts/algorithms/StepGLM.dml') as mlcontextns;[log_l, isNaN] = mlcontextns::glm_log_likelihood_part(linear_terms, Y, dist_type, var_power, link_type, link_power);";
        Script script = new Script(string);
        script.in("linear_terms", object).in("Y", object2).in("dist_type", object3).in("var_power", object4).in("link_type", object5).in("link_power", object6).out("log_l").out("isNaN");
        MLResults mLResults = script.execute();
        double d = mLResults.getDouble("log_l");
        long l = mLResults.getLong("isNaN");
        Glm_log_likelihood_part_output glm_log_likelihood_part_output = new Glm_log_likelihood_part_output(d, l);
        return glm_log_likelihood_part_output;
    }

    public String glm_log_likelihood_part__docs() {
        String string = "glm_log_likelihood_part = function (Matrix[double] linear_terms, Matrix[double] Y,\n                                    int dist_type, double var_power, int link_type, double link_power)\n  return (double log_l, int isNaN)\n{\n    isNaN = 0;\n    log_l = 0.0;\n    num_records = nrow (Y);\n    zeros_r = matrix (0.0, rows = num_records, cols = 1);\n    \n    if (dist_type == 1 & link_type == 1)\n    { # POWER DISTRIBUTION\n      b_cumulant = zeros_r;\n      natural_parameters = zeros_r;\n      is_natural_parameter_log_zero = zeros_r;\n      if          (var_power == 1.0 & link_power == 0)  { # Poisson.log\n        b_cumulant = exp (linear_terms);\n        is_natural_parameter_log_zero = (linear_terms == -Inf);\n        natural_parameters = replace (target = linear_terms, pattern = -Inf, replacement = 0);\n      } else { if (var_power == 1.0 & link_power == 1.0)  { # Poisson.id\n        if (sum (linear_terms < 0) == 0)  {\n          b_cumulant = linear_terms;\n          is_natural_parameter_log_zero = (linear_terms == 0);\n          natural_parameters = log (linear_terms + is_natural_parameter_log_zero);\n        } else {isNaN = 1;}\n      } else { if (var_power == 1.0 & link_power == 0.5)  { # Poisson.sqrt\n        if (sum (linear_terms < 0) == 0)  {\n          b_cumulant = linear_terms ^ 2;\n          is_natural_parameter_log_zero = (linear_terms == 0);\n          natural_parameters = 2.0 * log (linear_terms + is_natural_parameter_log_zero);\n        } else {isNaN = 1;}\n      } else { if (var_power == 1.0 & link_power  > 0)  { # Poisson.power_nonlog, pos\n        if (sum (linear_terms < 0) == 0)  {\n          is_natural_parameter_log_zero = (linear_terms == 0);\n          b_cumulant = (linear_terms + is_natural_parameter_log_zero) ^ (1.0 / link_power) - is_natural_parameter_log_zero;\n          natural_parameters = log (linear_terms + is_natural_parameter_log_zero) / link_power;\n        } else {isNaN = 1;}\n      } else { if (var_power == 1.0)                      { # Poisson.power_nonlog, neg\n        if (sum (linear_terms <= 0) == 0) {\n          b_cumulant = linear_terms ^ (1.0 / link_power);\n          natural_parameters = log (linear_terms) / link_power;\n        } else {isNaN = 1;}\n      } else { if (var_power == 2.0 & link_power == -1.0) { # Gamma.inverse\n        if (sum (linear_terms <= 0) == 0) {\n          b_cumulant = - log (linear_terms);\n          natural_parameters = - linear_terms;\n        } else {isNaN = 1;}\n      } else { if (var_power == 2.0 & link_power ==  1.0) { # Gamma.id\n        if (sum (linear_terms <= 0) == 0) {\n          b_cumulant = log (linear_terms);\n          natural_parameters = - 1.0 / linear_terms;\n        } else {isNaN = 1;}\n      } else { if (var_power == 2.0 & link_power ==  0) { # Gamma.log\n        b_cumulant = linear_terms;\n        natural_parameters = - exp (- linear_terms);\n      } else { if (var_power == 2.0)                      { # Gamma.power_nonlog\n        if (sum (linear_terms <= 0) == 0) {\n          b_cumulant = log (linear_terms) / link_power;\n          natural_parameters = - linear_terms ^ (- 1.0 / link_power);\n        } else {isNaN = 1;}\n      } else { if                    (link_power ==  0) { # PowerDist.log\n        natural_parameters = exp (linear_terms * (1.0 - var_power)) / (1.0 - var_power);\n        b_cumulant = exp (linear_terms * (2.0 - var_power)) / (2.0 - var_power);\n      } else {                                              # PowerDist.power_nonlog\n        if          (-2 * link_power == 1.0 - var_power) {\n          natural_parameters = 1.0 / (linear_terms ^ 2) / (1.0 - var_power);\n        } else { if (-1 * link_power == 1.0 - var_power) {\n          natural_parameters = 1.0 / linear_terms / (1.0 - var_power);\n        } else { if (     link_power == 1.0 - var_power) {\n          natural_parameters = linear_terms / (1.0 - var_power);\n        } else { if ( 2 * link_power == 1.0 - var_power) {\n          natural_parameters = linear_terms ^ 2 / (1.0 - var_power);\n        } else {\n          if (sum (linear_terms <= 0) == 0) {\n            power = (1.0 - var_power) / link_power;\n            natural_parameters = (linear_terms ^ power) / (1.0 - var_power);\n          } else {isNaN = 1;}\n        }}}}\n        if          (-2 * link_power == 2.0 - var_power) {\n          b_cumulant = 1.0 / (linear_terms ^ 2) / (2.0 - var_power);\n        } else { if (-1 * link_power == 2.0 - var_power) {\n          b_cumulant = 1.0 / linear_terms / (2.0 - var_power);\n        } else { if (     link_power == 2.0 - var_power) {\n          b_cumulant = linear_terms / (2.0 - var_power);\n        } else { if ( 2 * link_power == 2.0 - var_power) {\n          b_cumulant = linear_terms ^ 2 / (2.0 - var_power);\n        } else {\n          if (sum (linear_terms <= 0) == 0) {\n            power = (2.0 - var_power) / link_power;\n            b_cumulant = (linear_terms ^ power) / (2.0 - var_power);\n          } else {isNaN = 1;}\n        }}}}\n      }}}}} }}}}}\n      if (sum (is_natural_parameter_log_zero * abs (Y)) > 0) {\n        log_l = -Inf;\n        isNaN = 1;\n      }\n      if (isNaN == 0)\n      {\n        log_l = sum (Y * natural_parameters - b_cumulant);\n        if (log_l != log_l | (log_l == log_l + 1.0 & log_l == log_l * 2.0)) {\n          log_l = -Inf;\n          isNaN = 1;\n        }   }   }\n    \n    if (dist_type == 2 & link_type >= 1 & link_type <= 5)\n    { # BINOMIAL/BERNOULLI DISTRIBUTION\n      \n      [Y_prob, isNaN] = binomial_probability_two_column (linear_terms, link_type, link_power);\n      \n      if (isNaN == 0) {            \n        does_prob_contradict = (Y_prob <= 0);\n        if (sum (does_prob_contradict * abs (Y)) == 0) {\n          log_l = sum (Y * log (Y_prob * (1 - does_prob_contradict) + does_prob_contradict));\n          if (log_l != log_l | (log_l == log_l + 1.0 & log_l == log_l * 2.0)) {\n            isNaN = 1;\n          }\n        } else {\n          log_l = -Inf;\n          isNaN = 1;\n        }   }   }\n    \n    if (isNaN == 1) {\n      log_l = - Inf; \n    }\n  }\n";
        return string;
    }

    public String glm_log_likelihood_part__source() {
        String string = "glm_log_likelihood_part = function (Matrix[double] linear_terms, Matrix[double] Y,\n                                    int dist_type, double var_power, int link_type, double link_power)\n  return (double log_l, int isNaN)\n{\n    isNaN = 0;\n    log_l = 0.0;\n    num_records = nrow (Y);\n    zeros_r = matrix (0.0, rows = num_records, cols = 1);\n    \n    if (dist_type == 1 & link_type == 1)\n    { # POWER DISTRIBUTION\n      b_cumulant = zeros_r;\n      natural_parameters = zeros_r;\n      is_natural_parameter_log_zero = zeros_r;\n      if          (var_power == 1.0 & link_power == 0)  { # Poisson.log\n        b_cumulant = exp (linear_terms);\n        is_natural_parameter_log_zero = (linear_terms == -Inf);\n        natural_parameters = replace (target = linear_terms, pattern = -Inf, replacement = 0);\n      } else { if (var_power == 1.0 & link_power == 1.0)  { # Poisson.id\n        if (sum (linear_terms < 0) == 0)  {\n          b_cumulant = linear_terms;\n          is_natural_parameter_log_zero = (linear_terms == 0);\n          natural_parameters = log (linear_terms + is_natural_parameter_log_zero);\n        } else {isNaN = 1;}\n      } else { if (var_power == 1.0 & link_power == 0.5)  { # Poisson.sqrt\n        if (sum (linear_terms < 0) == 0)  {\n          b_cumulant = linear_terms ^ 2;\n          is_natural_parameter_log_zero = (linear_terms == 0);\n          natural_parameters = 2.0 * log (linear_terms + is_natural_parameter_log_zero);\n        } else {isNaN = 1;}\n      } else { if (var_power == 1.0 & link_power  > 0)  { # Poisson.power_nonlog, pos\n        if (sum (linear_terms < 0) == 0)  {\n          is_natural_parameter_log_zero = (linear_terms == 0);\n          b_cumulant = (linear_terms + is_natural_parameter_log_zero) ^ (1.0 / link_power) - is_natural_parameter_log_zero;\n          natural_parameters = log (linear_terms + is_natural_parameter_log_zero) / link_power;\n        } else {isNaN = 1;}\n      } else { if (var_power == 1.0)                      { # Poisson.power_nonlog, neg\n        if (sum (linear_terms <= 0) == 0) {\n          b_cumulant = linear_terms ^ (1.0 / link_power);\n          natural_parameters = log (linear_terms) / link_power;\n        } else {isNaN = 1;}\n      } else { if (var_power == 2.0 & link_power == -1.0) { # Gamma.inverse\n        if (sum (linear_terms <= 0) == 0) {\n          b_cumulant = - log (linear_terms);\n          natural_parameters = - linear_terms;\n        } else {isNaN = 1;}\n      } else { if (var_power == 2.0 & link_power ==  1.0) { # Gamma.id\n        if (sum (linear_terms <= 0) == 0) {\n          b_cumulant = log (linear_terms);\n          natural_parameters = - 1.0 / linear_terms;\n        } else {isNaN = 1;}\n      } else { if (var_power == 2.0 & link_power ==  0) { # Gamma.log\n        b_cumulant = linear_terms;\n        natural_parameters = - exp (- linear_terms);\n      } else { if (var_power == 2.0)                      { # Gamma.power_nonlog\n        if (sum (linear_terms <= 0) == 0) {\n          b_cumulant = log (linear_terms) / link_power;\n          natural_parameters = - linear_terms ^ (- 1.0 / link_power);\n        } else {isNaN = 1;}\n      } else { if                    (link_power ==  0) { # PowerDist.log\n        natural_parameters = exp (linear_terms * (1.0 - var_power)) / (1.0 - var_power);\n        b_cumulant = exp (linear_terms * (2.0 - var_power)) / (2.0 - var_power);\n      } else {                                              # PowerDist.power_nonlog\n        if          (-2 * link_power == 1.0 - var_power) {\n          natural_parameters = 1.0 / (linear_terms ^ 2) / (1.0 - var_power);\n        } else { if (-1 * link_power == 1.0 - var_power) {\n          natural_parameters = 1.0 / linear_terms / (1.0 - var_power);\n        } else { if (     link_power == 1.0 - var_power) {\n          natural_parameters = linear_terms / (1.0 - var_power);\n        } else { if ( 2 * link_power == 1.0 - var_power) {\n          natural_parameters = linear_terms ^ 2 / (1.0 - var_power);\n        } else {\n          if (sum (linear_terms <= 0) == 0) {\n            power = (1.0 - var_power) / link_power;\n            natural_parameters = (linear_terms ^ power) / (1.0 - var_power);\n          } else {isNaN = 1;}\n        }}}}\n        if          (-2 * link_power == 2.0 - var_power) {\n          b_cumulant = 1.0 / (linear_terms ^ 2) / (2.0 - var_power);\n        } else { if (-1 * link_power == 2.0 - var_power) {\n          b_cumulant = 1.0 / linear_terms / (2.0 - var_power);\n        } else { if (     link_power == 2.0 - var_power) {\n          b_cumulant = linear_terms / (2.0 - var_power);\n        } else { if ( 2 * link_power == 2.0 - var_power) {\n          b_cumulant = linear_terms ^ 2 / (2.0 - var_power);\n        } else {\n          if (sum (linear_terms <= 0) == 0) {\n            power = (2.0 - var_power) / link_power;\n            b_cumulant = (linear_terms ^ power) / (2.0 - var_power);\n          } else {isNaN = 1;}\n        }}}}\n      }}}}} }}}}}\n      if (sum (is_natural_parameter_log_zero * abs (Y)) > 0) {\n        log_l = -Inf;\n        isNaN = 1;\n      }\n      if (isNaN == 0)\n      {\n        log_l = sum (Y * natural_parameters - b_cumulant);\n        if (log_l != log_l | (log_l == log_l + 1.0 & log_l == log_l * 2.0)) {\n          log_l = -Inf;\n          isNaN = 1;\n        }   }   }\n    \n    if (dist_type == 2 & link_type >= 1 & link_type <= 5)\n    { # BINOMIAL/BERNOULLI DISTRIBUTION\n      \n      [Y_prob, isNaN] = binomial_probability_two_column (linear_terms, link_type, link_power);\n      \n      if (isNaN == 0) {            \n        does_prob_contradict = (Y_prob <= 0);\n        if (sum (does_prob_contradict * abs (Y)) == 0) {\n          log_l = sum (Y * log (Y_prob * (1 - does_prob_contradict) + does_prob_contradict));\n          if (log_l != log_l | (log_l == log_l + 1.0 & log_l == log_l * 2.0)) {\n            isNaN = 1;\n          }\n        } else {\n          log_l = -Inf;\n          isNaN = 1;\n        }   }   }\n    \n    if (isNaN == 1) {\n      log_l = - Inf; \n    }\n  }\n";
        return string;
    }
}

