Archive for the ‘Programming’ Category

LightGBM Grid Search Example in R

data(agaricus.train, package = "lightgbm")
train <- agaricus.traindtrain <- lgb.Dataset(train$data, label = train$label, free_raw_data = FALSE)
data(agaricus.test, package = "lightgbm")
test <- agaricus.testdtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
valids <- list(test = dtest)

grid_search <- expand.grid(Depth = 8,
                           L1 = 0:5,
                           L2 = 0:5)

model <- list()
perf <- numeric(nrow(grid_search))

for (i in 1:nrow(grid_search)) {
  model[[i]] <- lgb.train(list(objective = "regression",
                          metric = "l2",
                          lambda_l1 = grid_search[i, "L1"],
                          lambda_l2 = grid_search[i, "L2"],
                          max_depth = grid_search[i, "Depth"]),
                     min_data = 1,
                     learning_rate = 1,
                     early_stopping_rounds = 5)
  perf[i] <- min(rbindlist(model[[i]]$record_evals$test$l2))

> cat("Model ", which.min(perf), " is lowest loss: ", min(perf), sep = "")
Model 1 is lowest loss: 1.972152e-31> print(grid_search[which.min(perf), ])
  Depth L1 L21     8  0  0

Example XGboost Grid Search in Python

import sys
import math
import numpy as np
from sklearn.grid_search import GridSearchCV
sys.path.append('xgboost/wrapper/')import xgboost as xgb
class XGBoostClassifier():
    def __init__(self, num_boost_round=10, **params):
        self.clf = None
        self.num_boost_round = num_boost_round
        self.params = params
        self.params.update({'objective': 'multi:softprob'})
    def fit(self, X, y, num_boost_round=None):
        num_boost_round = num_boost_round or self.num_boost_round
        self.label2num = dict((label, i) for i, label in enumerate(sorted(set(y))))
        dtrain = xgb.DMatrix(X, label=[self.label2num[label] for label in y])
        self.clf = xgb.train(params=self.params, dtrain=dtrain, num_boost_round=num_boost_round)
    def predict(self, X):
        num2label = dict((i, label)for label, i in self.label2num.items())
        Y = self.predict_proba(X)
        y = np.argmax(Y, axis=1)
        return np.array([num2label[i] for i in y])
    def predict_proba(self, X):
        dtest = xgb.DMatrix(X)
        return self.clf.predict(dtest)
    def score(self, X, y):
        Y = self.predict_proba(X)
        return 1 / logloss(y, Y)
    def get_params(self, deep=True):
        return self.params
    def set_params(self, **params):
        if 'num_boost_round' in params:
            self.num_boost_round = params.pop('num_boost_round')
        if 'objective' in params:
            del params['objective']
        return self
def logloss(y_true, Y_pred):
    label2num = dict((name, i) for i, name in enumerate(sorted(set(y_true))))
    return -1 * sum(math.log(y[label2num[label]]) if y[label2num[label]] > 0 else -np.inf for y, label in zip(Y_pred, y_true)) / len(Y_pred)

def main():
    clf = XGBoostClassifier(
        eval_metric = 'auc',
        num_class = 2,
        nthread = 4,
        eta = 0.1,
        num_boost_round = 80,
        max_depth = 12,
        subsample = 0.5,
        colsample_bytree = 1.0,
        silent = 1,
    parameters = {
        'num_boost_round': [100, 250, 500],
        'eta': [0.05, 0.1, 0.3],
        'max_depth': [6, 9, 12],
        'subsample': [0.9, 1.0],
        'colsample_bytree': [0.9, 1.0],
    clf = GridSearchCV(clf, parameters, n_jobs=1, cv=2)[[1,2], [3,4], [2,1], [4,3], [1,0], [4,5]], ['a', 'b', 'a', 'b', 'a', 'b'])
    best_parameters, score, _ = max(clf.grid_scores_, key=lambda x: x[1])
    for param_name in sorted(best_parameters.keys()):
        print("%s: %r" % (param_name, best_parameters[param_name]))

if __name__ == '__main__':

Raspberry Pi #antisec LED Alert Script

Just a little Python script I wrote to make an LED blink on a Raspberry Pi and to print a message to the screen when there’s a #antisec tweet:

# Jason D. Miller

from twython import TwythonStreamer
import RPi.GPIO as GPIO
import time

C_KEY =  ""
A_TOKEN = "-"


def blink():
    GPIO.output(18, GPIO.HIGH)
    GPIO.output(18, GPIO.LOW)

class MyStreamer(TwythonStreamer):
     def on_success(self, data):
         if 'text' in data:
              print("Antisec Tweet detected. Call the FBI.")

stream = MyStreamer(C_KEY, C_SECRET, A_TOKEN, A_SECRET)


Happy Pi Day 2016!

Has it really been a whole year?

On Pi Day 2015 posted a tribute to Pi (π) Day, published on GitHub, wherein we created fractals in R based on π, scraped and displayed information on Pi and other fun stuff.

This year, find out how Fibonacci numbers, which are sequences of integers, have a freaky relationship with π! View the entire script on GitHub.

# Pi Fibonacci Sequence ---------------------------------------------------
cat("This year, we'll look at the relationship between Pi and Fibonacci sequences. \n")
cat("Until very recently there were just two methods used to compute pi (π),
one invented by the Greek mathematician Archimedes,
and the other by the Scottish mathematician James Gregory. \n")

cat("If we use Sir Gregory's arc tangent method, you'll notice a pattern...")


pi/4 == atan(1)

atan(1/5)  + atan(1/8)

atan(1/13) + atan(1/21)

cat("We can combine what we saw above")
atan(1/2) + atan(1/3)
atan(1/2) + atan(1/5) + atan(1/8)

atan(1/34) + atan(1/55)

cat("You'll notice that the pattern is a Fibonacci sequence! \n")

cat(" We have just seen that there are infinitely many formulae for π using the Fibonacci numbers!")


R: beginning and end of month

date.end.month <- seq(as.Date("2012-02-01"),length=4,by="months")-1
## —————————       ##
## Function takes a vector of  ##
## dates as its input.                 ##
##                                                  ##
## It produces a vector of        ##
## dates that are the first         ##
## in their respective months ##
## —————————       ##
  day = format(x,format=”%d”)
  monthYr = format(x,format=”%Y-%m”)
  y = tapply(day,monthYr, min)

R: cbind fill for data.table

cbind.fill <- function(…){
nm <- list(…)
nm <- lapply(nm, as.matrix)
n <- max(sapply(nm, nrow)), lapply(nm, function (x)
rbind(x, matrix(, n-nrow(x), ncol(x)))))


R: Better detach package function

<div>detach_package <- function(pkg, character.only = FALSE)</div>
<div>    if(!character.only)</div>
<div>    {</div>
<div>        pkg <- deparse(substitute(pkg))</div>
<div>    }</div>
<div>    search_item <- paste("package", pkg, sep = ":")</div>
<div>    while(search_item %in% search())</div>
<div>    {</div>
<div>        detach(search_item, unload = TRUE, character.only = TRUE)</div>
<div>    }</div>


R: Remove constant and identical features programmatically

<div>##### Removing constant features</div>
<div>cat("\n## Removing the constants features.\n")</div>
<div>for (f in names(train)) {</div>
<div>  if (length(unique(train[[f]])) == 1) {</div>
<div>    cat(f, "is constant in train. We delete it.\n")</div>
<div>    train[[f]] <- NULL</div>
<div>    test[[f]] <- NULL</div>
<div>  }</div>
<div>##### Removing identical features</div>
<div>features_pair <- combn(names(train), 2, simplify = F)</div>
<div>toRemove <- c()</div>
<div>for(pair in features_pair) {</div>
<div>  f1 <- pair[1]</div>
<div>  f2 <- pair[2]</div>
<div>  if (!(f1 %in% toRemove) & !(f2 %in% toRemove)) {</div>
<div>    if (all(train[[f1]] == train[[f2]])) {</div>
<div>      cat(f1, "and", f2, "are equals.\n")</div>
<div>      toRemove <- c(toRemove, f2)</div>
<div>    }</div>
<div>  }</div>


PyDev (Eclipse) key bindings

press CTRL+ALT+ENTER to send the selected lines to the interactive console
EditRectangular editionShift+Alt+AEditing Text
EditFind NextCtrl+KEditing Text
EditFind and ReplaceCtrl+FIn Windows
EditWord CompletionAlt+/Editing Text
FileCloseCtrl+F4In Windows
FileNewCtrl+NIn Windows
FileSaveCtrl+SIn Windows
NavigateBackward HistoryAlt+LeftIn Windows
NavigateForward HistoryAlt+RightIn Windows
NavigateGo to LineCtrl+LEditing Text
NavigateLast Edit LocationCtrl+QIn Windows
NavigateOpen ResourceCtrl+Shift+RIn Windows
NavigateShow In menuAlt+Shift+WIn Windows
NavigateGo to next problem markerCtrl+.In Windows
PyDev – EditorQuick Fix / Content AssistantsCtrl+1PyDev editor scope
PyDev – EditorCode CompletionCtrl+SpacePyDev editor scope
PyDev – EditorAdd Comment BlockCtrl+4PyDev editor scope
PyDev – EditorAdd Single Comment BlockCtrl+Shift+4PyDev editor scope
PyDev – EditorConvert space-tabs to tabsCtrl+Shift+TabPyDev editor scope
PyDev – EditorConvert tabs to space-tabsCtrl+TabPyDev editor scope
PyDev – EditorNext Method or ClassCtrl+Shift+DownPyDev editor scope
PyDev – EditorPrevious Method or ClassCtrl+Shift+UpPyDev editor scope
PyDev – EditorPython CollapseCtrl+-PyDev editor scope
PyDev – EditorPython Collapse AllCtrl+9PyDev editor scope
PyDev – EditorToggle Mark OccurrencesAlt+Shift+OPyDev editor scope
PyDev – EditorPython Format CodeCtrl+Shift+FPyDev editor scope
PyDev – EditorPython Go To DefinitionF3PyDev editor scope
PyDev – EditorPython Organize ImportsCtrl+Shift+OPyDev editor scope
PyDev – EditorPython Show Quick OutlineCtrl+OPyDev editor scope
PyDev – EditorPython Open Definition Quick OutlineCtrl+Shift+TPyDev editor scope
PyDev – EditorPython Un CollapseCtrl+=PyDev editor scope
PyDev – EditorPython Un Collapse AllCtrl+0PyDev editor scope
PyDev – EditorPython Comment/UncommentCtrl+/ (for both)PyDev editor scope
PyDev – EditorRemove Comment BlockCtrl+5PyDev editor scope
PyDev – EditorStart interactive sessionCtrl+Alt+EnterPyDev editor scope
PyDev – EditorSend line to interactive sessionF2PyDev editor scope
PyDev – EditorList commands binded to Ctrl+2 (only the most important are below)Ctrl+2+helpPyDev editor scope
PyDev – EditorAssigns method parameters to attributes of selfCtrl+2+aPyDev editor scope
PyDev – EditorForces code analysis in the current editorCtrl+2+cPyDev editor scope
PyDev – EditorKills all the python shells spawned by PyDevCtrl+2+killPyDev editor scope
PyDev – EditorRenames local occurrencesCtrl+2+rPyDev editor scope
PyDev – EditorWraps the current selected paragraphCtrl+2+wPyDev editor scope
PyDev – EditorEnables/Disables wrapping in the editorCtrl+2+setwrapPyDev editor scope
PyDev – EditorPasses the passed command to in DjangoCtrl+2+dj commandPyDev editor scope
PyDev – EditorPuts a new line after each comma (split lines)Ctrl+2+slPyDev editor scope
PyDev – EditorTransforms an import into a string (import string)Ctrl+2+isPyDev editor scope
PyDev – EditorSearches the passed (or selected) string in the open editorsCtrl+2+s string_to_findPyDev editor scope
PyDev – SearchFind referencesCtrl+Shift+GPyDev editor scope
PyDev – RefactorPython Extract Local VariableAlt+Shift+LPyDev editor scope
PyDev – RefactorPython Extract MethodAlt+Shift+MPyDev editor scope
PyDev – RefactorPython Inline Local VariableAlt+Shift+IPyDev editor scope
PyDev – RefactorPython RenameAlt+Shift+RPyDev editor scope
Run/DebugRun current editorF9PyDev editor scope
Run/DebugRun current editor unit-testsCtrl+F9PyDev editor scope
Run/DebugDebug Last LaunchedF11In Windows
Run/DebugRun Last LaunchedCtrl+F11In Windows
Run/DebugStep IntoF5Debugging
Run/DebugStep OverF6Debugging
Run/DebugStep ReturnF7Debugging
SearchOpen Search DialogCtrl+HIn Windows
Text EditingCopy LinesCtrl+Alt+DownEditing Text
Text EditingDelete LineCtrl+DEditing Text
Text EditingDelete Next WordCtrl+DeleteEditing Text
Text EditingDuplicate LinesCtrl+Alt+UpEditing Text
Text EditingInsert Line Below Current LineShift+EnterEditing Text
Text EditingMove Lines DownAlt+DownEditing Text
Text EditingMove Lines UpAlt+UpEditing Text
Text EditingScroll Line DownCtrl+DownEditing Text
Text EditingScroll Line UpCtrl+UpEditing Text
Text EditingTo Lower CaseCtrl+Shift+YEditing Text
Text EditingTo Upper CaseCtrl+Shift+XEditing Text
WindowActivate EditorF12In Windows
WindowMaximize Active View or EditorCtrl+MIn Windows
WindowNext EditorCtrl+F6 (LiClipse adds Ctrl+Tab too)In Windows
WindowNext PerspectiveCtrl+F8In Windows
WindowNext ViewCtrl+F7In Windows
WindowOpen Editor Drop DownCtrl+EIn Windows
WindowShow Key AssistCtrl+Shift+LIn Dialogs and Windows
WindowShow Ruler Context MenuCtrl+F10Editing Text
WindowShow System MenuAlt+-In Windows
WindowShow View MenuCtrl+F10In Windows
WindowSwitch to EditorCtrl+Shift+EIn Windows

R: microbenchmark, reshaping big data features

pacman::p_load(data.table, microbenchmark )

train train_mat

f1 f2