ercbk
diff --git a/‎README.Rmd‎
Lines changed: 1 addition & 1 deletion b/‎README.Rmd‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎data/fivek-simdat.pickle‎
-29 Bytes b/‎data/fivek-simdat.pickle‎
-29 Bytes
diff --git a/‎duration-experiment/kuhn-johnson/outputs/0224-results.png‎
-51.4 KB b/‎duration-experiment/kuhn-johnson/outputs/0224-results.png‎
-51.4 KB
diff --git a/‎duration-experiment/kuhn-johnson/outputs/0222-results.png‎ renamed to ‎duration-experiment/outputs/0222-results.png‎ b/‎duration-experiment/kuhn-johnson/outputs/0222-results.png‎ renamed to ‎duration-experiment/outputs/0222-results.png‎
diff --git a/‎duration-experiment/kuhn-johnson/outputs/0223-results.png‎ renamed to ‎duration-experiment/outputs/0223-results.png‎ b/‎duration-experiment/kuhn-johnson/outputs/0223-results.png‎ renamed to ‎duration-experiment/outputs/0223-results.png‎
diff --git a/‎duration-experiment/outputs/0224-results.png‎
46.7 KB b/‎duration-experiment/outputs/0224-results.png‎
46.7 KB
diff --git a/‎duration-experiment/raschka/nested-cv-kj-raschka.R‎
Lines changed: 1 addition & 22 deletions b/‎duration-experiment/raschka/nested-cv-kj-raschka.R‎
Lines changed: 1 addition & 22 deletions
diff --git a/‎duration-experiment/raschka/nested-cv-mlr3-raschka.R‎
Lines changed: 0 additions & 18 deletions b/‎duration-experiment/raschka/nested-cv-mlr3-raschka.R‎
Lines changed: 0 additions & 18 deletions
diff --git a/‎duration-experiment/raschka/nested-cv-py-raschka.py‎
Lines changed: 22 additions & 42 deletions b/‎duration-experiment/raschka/nested-cv-py-raschka.py‎
Lines changed: 22 additions & 42 deletions
@@ -45,7 +45,7 @@ Progress (duration in seconds)
 
 References  
 
-Boulesteix, AL, and C Strobl. 2009. “Optimal Classifier Selection and Negative Bias in Error Rate Estimation: An Empirical Study on High-Dimensional Prediction.” BMC Medical Research Methodology 9 (1): 85. [link](Boulesteix, AL, and C Strobl. 2009. “Optimal Classifier Selection and Negative Bias in Error Rate Estimation: An Empirical Study on High-Dimensional Prediction.” BMC Medical Research Methodology 9 (1): 85.)  
+Boulesteix, AL, and C Strobl. 2009. “Optimal Classifier Selection and Negative Bias in Error Rate Estimation: An Empirical Study on High-Dimensional Prediction.” BMC Medical Research Methodology 9 (1): 85. [link](https://www.researchgate.net/publication/40756303_Optimal_classifier_selection_and_negative_bias_in_error_rate_estimation_An_empirical_study_on_high-dimensional_prediction)  
 
 Sabastian Raschka, "STAT 479 Statistical Tests and Algorithm Comparison," (Lecture Notes, University of Wisconsin-Madison, Fall 2019). [link](https://github.com/rasbt/stat479-machine-learning-fs19/blob/master/11_eval4-algo/11-eval4-algo__notes.pdf)  
 
 
@@ -68,7 +68,7 @@ Boulesteix, AL, and C Strobl. 2009. “Optimal Classifier Selection and
 Negative Bias in Error Rate Estimation: An Empirical Study on
 High-Dimensional Prediction.” BMC Medical Research Methodology 9 (1):
 85.
-[link](Boulesteix,%20AL,%20and%20C%20Strobl.%202009.%20“Optimal%20Classifier%20Selection%20and%20Negative%20Bias%20in%20Error%20Rate%20Estimation:%20An%20Empirical%20Study%20on%20High-Dimensional%20Prediction.”%20BMC%20Medical%20Research%20Methodology%209%20\(1\):%2085.)
+[link](https://www.researchgate.net/publication/40756303_Optimal_classifier_selection_and_negative_bias_in_error_rate_estimation_An_empirical_study_on_high-dimensional_prediction)
 
 Sabastian Raschka, “STAT 479 Statistical Tests and Algorithm
 Comparison,” (Lecture Notes, University of Wisconsin-Madison, Fall
 
@@ -2,35 +2,14 @@
 
 
 # Raschka method
-# kj
+# ranger-kj
 
 
 
 # Notes
 # 1. *** Make sure the target column is last in dataframe ***
 
 
-# Available Choices
-# 1. Data
-# 2. Algorithms
-# 3. Hyperparameter value grids
-# 4. Outer-Loop CV strategy
-# 5. Inner-Loop CV strategy
-# 6. Tuning strategy 
-
-
-
-# Experiment
-# 4 core, 16GB RAM 
-# rf, elastic net algorithms with 40x2 and 200x2 latin hypercube grids respectively
-# 5000 obs, 10 features, outer-loop = 5 k-fold, inner-loop = 2 k-fold
-# 268.96 sec (4.48 min)
-# MAE: k-fold error = 1.40926 
-# test error = 1.3475
-# Best parameters for ranger:
-# mtry = 4 and trees = 234
-
-
 # Sections
 # 1. Set-Up
 # 2. Error function 
 
@@ -13,24 +13,6 @@
 # 4. The batch arg in the tuner function allows you to specify how you want to parallelize for each algorithm which is nice.
 
 
-
-# Choices
-# 1. Data
-# 2. Algorithms
-# 3. Hyperparameter value grids
-# 4. Outer-Loop CV strategy
-# 5. Inner-Loop CV strategy
-# 6. Tuning strategy 
-
-
-
-# Experiment:
-# 4 core, 16GB RAM 
-# rf, glmnet algorithms with 100x2 hyperparameter grids
-# 100 obs, 10 features, repeats = 2, outer loop = 10 folds, inner loop = 25 resamples
-#  sec ( min)
-
-
 # Sections:
 # 1. Set-Up and Data
 # 2. Functions Used in the Loops
 
@@ -18,30 +18,6 @@
 # (6. cont.) and I'm just worried about fairly testing the speed of implentations.
 
 
-# Choices
-# 1. Data
-# 2. Algorithms
-# 3. Hyperparameter value grids
-# 4. Outer-Loop CV strategy
-# 5. Inner-Loop CV strategy
-# 6. Tuning strategy 
-
-
-# Experiment:
-# 4 core, 16GB RAM 
-# rf, elastic net algorithms with 40x2 and 200x2 latin hypercube grids. 
-# 5000 obs, 10 features,  outer loop = 5 folds, inner loop = 2 folds 
-# 941.49 sec (15.69 min)
-
-# Results for 5000 obs:
-# MAE: 2.07222 (Average of K-fold Cv test folds)
-# Training Error: 2.06839
-# Test Error: 2.09252
-# Best parameter for chosen algorithm, Elastic Net:
-# Alpha = 1.20342e-10
-# L1 ratio = 0.94502
-
-
 # Sections
 # 1. Set-up
 # 2. Data
@@ -59,30 +35,34 @@
 ###################################
 
 
-# Necessary in order to run in parallel.
-# Was told this must be ran before other modules imported.
-# Update executable path in sys module.
-import sys
-import os
-exe = os.path.join(sys.exec_prefix, "pythonw.exe")
-sys.executable = exe
-sys._base_executable = exe
-# update executable path in multiprocessing module
-import multiprocessing
-multiprocessing.set_executable(exe)
+# If in RStudio or using reticulate::source_python, necessary in order
+# to run in parallel.
+# Should be ran before other modules imported.
+# Updates executable path in sys module.
+# import sys
+# import os
+# exe = os.path.join(sys.exec_prefix, "pythonw.exe")
+# sys.executable = exe
+# sys._base_executable = exe
+# # update executable path in multiprocessing module
+# import multiprocessing
+# multiprocessing.set_executable(exe)
 
 
-import subprocess
-import time
-subprocess.Popen('mlflow server')
-time.sleep(10)
+# If in RStudio or using reticulate::source_python, necessary in order
+# start MLflow's server
+# import subprocess
+# import time
+# subprocess.Popen('mlflow server')
+# time.sleep(10)
 
 
 from pytictoc import TicToc
 t = TicToc()
 t.tic()
 
 from pushbullet import Pushbullet
+import os
 import mlflow
 import pickle
 import numpy as np
@@ -111,15 +91,15 @@
 
 # load simulated data
 # r = read mode, b = binary; pickle is binary
-with open('C:/Users/tbats/Documents/R/Projects/nested-cross-validation-comparison/data/fivek-simdat.pickle', 'rb') as fried:
+with open('./data/fivek-simdat.pickle', 'rb') as fried:
       pdat = pickle.load(fried)
 
 # load penalyzed regression hyperparameter values
-with open('C:/Users/tbats/Documents/R/Projects/nested-cross-validation-comparison/grids/elast-latin-params.pickle', 'rb') as elastp:
+with open('./grids/elast-latin-params.pickle', 'rb') as elastp:
       elast_params = pickle.load(elastp)
 
 # load random forest hyperparater values
-with open('C:/Users/tbats/Documents/R/Projects/nested-cross-validation-comparison/grids/rf-latin-params.pickle', 'rb') as rfp:
+with open('./grids/rf-latin-params.pickle', 'rb') as rfp:
       rf_params = pickle.load(rfp)