Archive for the ‘Data Science’ Category

LightGBM Grid Search Example in R

library(data.table)
library(lightgbm)
data(agaricus.train, package = "lightgbm")
train <- agaricus.traindtrain <- lgb.Dataset(train$data, label = train$label, free_raw_data = FALSE)
data(agaricus.test, package = "lightgbm")
test <- agaricus.testdtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
valids <- list(test = dtest)

grid_search <- expand.grid(Depth = 8,
                           L1 = 0:5,
                           L2 = 0:5)

model <- list()
perf <- numeric(nrow(grid_search))

for (i in 1:nrow(grid_search)) {
  model[[i]] <- lgb.train(list(objective = "regression",
                          metric = "l2",
                          lambda_l1 = grid_search[i, "L1"],
                          lambda_l2 = grid_search[i, "L2"],
                          max_depth = grid_search[i, "Depth"]),
                     dtrain,
                     2,
                     valids,
                     min_data = 1,
                     learning_rate = 1,
                     early_stopping_rounds = 5)
  perf[i] <- min(rbindlist(model[[i]]$record_evals$test$l2))
}

Result:
> cat("Model ", which.min(perf), " is lowest loss: ", min(perf), sep = "")
Model 1 is lowest loss: 1.972152e-31> print(grid_search[which.min(perf), ])
  Depth L1 L21     8  0  0

Example XGboost Grid Search in Python

import sys
import math
 
import numpy as np
from sklearn.grid_search import GridSearchCV
 
sys.path.append('xgboost/wrapper/')import xgboost as xgb
 
 
class XGBoostClassifier():
    def __init__(self, num_boost_round=10, **params):
        self.clf = None
        self.num_boost_round = num_boost_round
        self.params = params
        self.params.update({'objective': 'multi:softprob'})
 
    def fit(self, X, y, num_boost_round=None):
        num_boost_round = num_boost_round or self.num_boost_round
        self.label2num = dict((label, i) for i, label in enumerate(sorted(set(y))))
        dtrain = xgb.DMatrix(X, label=[self.label2num[label] for label in y])
        self.clf = xgb.train(params=self.params, dtrain=dtrain, num_boost_round=num_boost_round)
 
    def predict(self, X):
        num2label = dict((i, label)for label, i in self.label2num.items())
        Y = self.predict_proba(X)
        y = np.argmax(Y, axis=1)
        return np.array([num2label[i] for i in y])
 
    def predict_proba(self, X):
        dtest = xgb.DMatrix(X)
        return self.clf.predict(dtest)
 
    def score(self, X, y):
        Y = self.predict_proba(X)
        return 1 / logloss(y, Y)
 
    def get_params(self, deep=True):
        return self.params
 
    def set_params(self, **params):
        if 'num_boost_round' in params:
            self.num_boost_round = params.pop('num_boost_round')
        if 'objective' in params:
            del params['objective']
        self.params.update(params)
        return self
   
   
def logloss(y_true, Y_pred):
    label2num = dict((name, i) for i, name in enumerate(sorted(set(y_true))))
    return -1 * sum(math.log(y[label2num[label]]) if y[label2num[label]] > 0 else -np.inf for y, label in zip(Y_pred, y_true)) / len(Y_pred)


def main():
    clf = XGBoostClassifier(
        eval_metric = 'auc',
        num_class = 2,
        nthread = 4,
        eta = 0.1,
        num_boost_round = 80,
        max_depth = 12,
        subsample = 0.5,
        colsample_bytree = 1.0,
        silent = 1,
        )
    parameters = {
        'num_boost_round': [100, 250, 500],
        'eta': [0.05, 0.1, 0.3],
        'max_depth': [6, 9, 12],
        'subsample': [0.9, 1.0],
        'colsample_bytree': [0.9, 1.0],
    }
    clf = GridSearchCV(clf, parameters, n_jobs=1, cv=2)
   
    clf.fit([[1,2], [3,4], [2,1], [4,3], [1,0], [4,5]], ['a', 'b', 'a', 'b', 'a', 'b'])
    best_parameters, score, _ = max(clf.grid_scores_, key=lambda x: x[1])
    print(score)
    for param_name in sorted(best_parameters.keys()):
        print("%s: %r" % (param_name, best_parameters[param_name]))
               
    print(clf.predict([[1,2]]))


if __name__ == '__main__':
    main()

Raspberry Pi #antisec LED Alert Script

Just a little Python script I wrote to make an LED blink on a Raspberry Pi and to print a message to the screen when there’s a #antisec tweet:

# Jason D. Miller
# github.com/hack-r

from twython import TwythonStreamer
import RPi.GPIO as GPIO
import time

C_KEY =  ""
C_SECRET = ""
A_TOKEN = "-"
A_SECRET = ""

GPIO.setmode(GPIO.BCM)
GPIO.setwarnings(False)
GPIO.setup(18,GPIO.OUT)

def blink():
    GPIO.output(18, GPIO.HIGH)
    time.sleep(1)
    GPIO.output(18, GPIO.LOW)

class MyStreamer(TwythonStreamer):
     def on_success(self, data):
         if 'text' in data:
              blink()
              print("Antisec Tweet detected. Call the FBI.")

stream = MyStreamer(C_KEY, C_SECRET, A_TOKEN, A_SECRET)

stream.statuses.filter(track="#antisec")

Happy Pi Day 2016!

Has it really been a whole year?

On Pi Day 2015 hack-r.com posted a tribute to Pi (π) Day, published on GitHub, wherein we created fractals in R based on π, scraped and displayed information on Pi and other fun stuff.

This year, find out how Fibonacci numbers, which are sequences of integers, have a freaky relationship with π! View the entire script on GitHub.


# Pi Fibonacci Sequence ---------------------------------------------------
cat("This year, we'll look at the relationship between Pi and Fibonacci sequences. \n")
cat("Until very recently there were just two methods used to compute pi (π),
one invented by the Greek mathematician Archimedes,
and the other by the Scottish mathematician James Gregory. \n")

cat("If we use Sir Gregory's arc tangent method, you'll notice a pattern...")

pi/4
atan(1)

pi/4 == atan(1)

atan(1/3)
atan(1/5)  + atan(1/8)

atan(1/8)
atan(1/13) + atan(1/21)

cat("We can combine what we saw above")
pi/4
atan(1/2) + atan(1/3)
atan(1/2) + atan(1/5) + atan(1/8)

atan(1/21)
atan(1/34) + atan(1/55)

cat("You'll notice that the pattern is a Fibonacci sequence! \n")

cat(" We have just seen that there are infinitely many formulae for π using the Fibonacci numbers!")

pi

R: cbind fill for data.table

cbind.fill <- function(…){
nm <- list(…)
nm <- lapply(nm, as.matrix)
n <- max(sapply(nm, nrow))
do.call(cbind, lapply(nm, function (x)
rbind(x, matrix(, n-nrow(x), ncol(x)))))
}

RStudio-icon

R: Remove constant and identical features programmatically

<div>##### Removing constant features</div>
<div>cat("\n## Removing the constants features.\n")</div>
<div>for (f in names(train)) {</div>
<div>  if (length(unique(train[[f]])) == 1) {</div>
<div>    cat(f, "is constant in train. We delete it.\n")</div>
<div>    train[[f]] <- NULL</div>
<div>    test[[f]] <- NULL</div>
<div>  }</div>
<div>}</div>
<div></div>
<div>##### Removing identical features</div>
<div>features_pair <- combn(names(train), 2, simplify = F)</div>
<div>toRemove <- c()</div>
<div>for(pair in features_pair) {</div>
<div>  f1 <- pair[1]</div>
<div>  f2 <- pair[2]</div>
<div></div>
<div>  if (!(f1 %in% toRemove) & !(f2 %in% toRemove)) {</div>
<div>    if (all(train[[f1]] == train[[f2]])) {</div>
<div>      cat(f1, "and", f2, "are equals.\n")</div>
<div>      toRemove <- c(toRemove, f2)</div>
<div>    }</div>
<div>  }</div>
<div>}</div>
<div></div>
<div>

RStudio-icon

R: microbenchmark, reshaping big data features


pacman::p_load(data.table, microbenchmark )

train train_mat

f1 f2

microbenchmark(f1(),f2(),times=10)

RStudio-icon

Kaggle – my brief shining moment in the top 10

I started playing with the (all too addictive) Kaggle competitions this past December, on and off.

This past week I reached a personal high point by making the top 10 in a featured competition for the first time.

Capture

Since then, my ranking has dropped a bit, but there’s still time for me to take first! 😉 Just don’t hold your breath…

R: Remove constant and identical features programmatically


##### Removing constant features
cat("\n## Removing the constants features.\n")
for (f in names(train)) {
  if (length(unique(train[[f]])) == 1) {
    cat(f, "is constant in train. We delete it.\n")
    train[[f]] <- NULL
    test[[f]] <- NULL
  }
}

##### Removing identical features
features_pair <- combn(names(train), 2, simplify = F)
toRemove <- c()
for(pair in features_pair) {
  f1 <- pair[1]
  f2 <- pair[2]

  if (!(f1 %in% toRemove) & !(f2 %in% toRemove)) {
    if (all(train[[f1]] == train[[f2]])) {
      cat(f1, "and", f2, "are equals.\n")
      toRemove <- c(toRemove, f2)
    }
  }
}

RStudio-icon

Firebase Quick Tut Transcription

I copied this down from the interactive 5 minute tutorial of Firebase which is useful for storing user-submitted data for mobile apps:

 

Firebase relies on a library that you include in your app. This library gives you easy access to Firebase’s authentication and database features.

To get you started we’ve created an HTML page below. Install the Firebase JavaScript library by adding the following line into the <head> section below:

<script src='https://cdn.firebase.com/js/client/2.2.1/firebase.js'></script>
1
<html>
2
  <head>
3
    [ADD THE SCRIPT TAG HERE]
4
  </head>
5
  <body>
6
  </body>
7
</html>

2. Accessing your Realtime Database

This tutorial focuses on Firebase’s realtime database. Note that Firebase also has powerful authentication and hosting services.

To access your Firebase database, you’ll first need to create a reference.

References are created using a URL that specifies which data you want to access. We’ve already created a Firebase database specifically for you at this URL: https://u70eg119il2.firebaseio-demo.com/

To complete this step, create a reference to the root of your Firebase database as shown below:

var myDataRef = new Firebase('https://u70eg119il2.firebaseio-demo.com/');
1
<html>
2
  <head>
3
    <script src='https://cdn.firebase.com/js/client/2.2.1/firebase.js'></script>
4
  </head>
5
  <body>
6
    <script>
7
      [ADD NEW FIREBASE CODE HERE]
8
    </script>
9
  </body>
10
</html>

3. Writing Data

Let’s send a chat message

You can use the reference you just created to write data to your Firebase database using the set() function.

To make things easy, we’ve already added input text boxes for the chatter’s name and message as well as a keypress handler that will fire whenever someone tries to send a message.

For this step, write a message to your Firebase database using the set() function as shown:

myDataRef.set('User ' + name + ' says ' + text);
1
<html>
2
  <head>
3
    <script src='https://cdn.firebase.com/js/client/2.2.1/firebase.js'></script>
4
    <script src='https://ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js'></script>
5
  </head>
6
  <body>
7
    <input type='text' id='nameInput' placeholder='Name'>
8
    <input type='text' id='messageInput' placeholder='Message'>
9
    <script>
10
      var myDataRef = new Firebase('https://u70eg119il2.firebaseio-demo.com/');
11
      $('#messageInput').keypress(function (e) {
12
        if (e.keyCode == 13) {
13
          var name = $('#nameInput').val();
14
          var text = $('#messageInput').val();
15
          [ADD SET() HERE]
16
          $('#messageInput').val('');
17
        }
18
      });
19
    </script>
20
  </body>
21
</html>

4. Writing Objects

The set() function can also be used to write objects.

Try changing your code to write an object with text and name properties:

myDataRef.set({name: name, text: text});
1
<html>
2
  <head>
3
    <script src='https://cdn.firebase.com/js/client/2.2.1/firebase.js'></script>
4
    <script src='https://ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js'></script>
5
  </head>
6
  <body>
7
    <input type='text' id='nameInput' placeholder='Name'>
8
    <input type='text' id='messageInput' placeholder='Message'>
9
    <script>
10
      var myDataRef = new Firebase('https://u70eg119il2.firebaseio-demo.com/');
11
      $('#messageInput').keypress(function (e) {
12
        if (e.keyCode == 13) {
13
          var name = $('#nameInput').val();
14
          var text = $('#messageInput').val();
15
          myDataRef.set('User ' + name + ' says ' + text);
16
          $('#messageInput').val('');
17
        }
18
      });
19
    </script>
20
  </body>
21
</html>

5. Writing Lists

The Firebase database supports lists of data.

You’ve already learned how to write data to specific, named locations in the database, but your chat application will require a list of messages. The Firebase database provides a helper function called push() that makes creating lists easy.

Modify your code below to use push() instead of set() so that your chat can support a list of messages (rather than just one):

myDataRef.push({name: name, text: text});
1
<html>
2
  <head>
3
    <script src='https://cdn.firebase.com/js/client/2.2.1/firebase.js'></script>
4
    <script src='https://ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js'></script>
5
  </head>
6
  <body>
7
    <input type='text' id='nameInput' placeholder='Name'>
8
    <input type='text' id='messageInput' placeholder='Message'>
9
    <script>
10
      var myDataRef = new Firebase('https://u70eg119il2.firebaseio-demo.com/');
11
      $('#messageInput').keypress(function (e) {
12
        if (e.keyCode == 13) {
13
          var name = $('#nameInput').val();
14
          var text = $('#messageInput').val();
15
          myDataRef.set({name: name, text: text});
16
          $('#messageInput').val('');
17
        }
18
      });
19
    </script>
20
  </body>
21
</html>

6. Reading Data

Now let’s receive chat messages.

We need to tell the database to notify us when chat messages arrive. We do this by adding a callback to the list of chat messages using the on() method, as shown below:

myDataRef.on('child_added', function(snapshot) {
  //We'll fill this in later.});

This method takes two arguments: the event type and the callback function. We’ll use the ‘child_added’ event so that we are notified of the arrival of individual messages.

1
<html>
2
  <head>
3
    <script src='https://cdn.firebase.com/js/client/2.2.1/firebase.js'></script>
4
    <script src='https://ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js'></script>
5
  </head>
6
  <body>
7
    <input type='text' id='nameInput' placeholder='Name'>
8
    <input type='text' id='messageInput' placeholder='Message'>
9
    <script>
10
      var myDataRef = new Firebase('https://u70eg119il2.firebaseio-demo.com/');
11
      $('#messageInput').keypress(function (e) {
12
        if (e.keyCode == 13) {
13
          var name = $('#nameInput').val();
14
          var text = $('#messageInput').val();
15
          myDataRef.push({name: name, text: text});
16
          $('#messageInput').val('');
17
        }
18
      });
19
      [ADD YOUR CALLBACK HERE]
20
    </script>
21
  </body>
22
</html>

7. Using Snapshots

Now we need to display the chat messages on the page.

For each chat message, the database will call your callback with a snapshot containing the message’s data.

Extract the message data from the snapshot by calling the val() function and assign it to a variable. Then, call the displayChatMessage() function to display the message as shown:

var message = snapshot.val();
displayChatMessage(message.name, message.text);
1
<html>
2
  <head>
3
    <script src='https://cdn.firebase.com/js/client/2.2.1/firebase.js'></script>
4
    <script src='https://ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js'></script>
5
  </head>
6
  <body>
7
    <div id='messagesDiv'></div>
8
    <input type='text' id='nameInput' placeholder='Name'>
9
    <input type='text' id='messageInput' placeholder='Message'>
10
    <script>
11
      var myDataRef = new Firebase('https://u70eg119il2.firebaseio-demo.com/');
12
      $('#messageInput').keypress(function (e) {
13
        if (e.keyCode == 13) {
14
          var name = $('#nameInput').val();
15
          var text = $('#messageInput').val();
16
          myDataRef.push({name: name, text: text});
17
          $('#messageInput').val('');
18
        }
19
      });
20
      myDataRef.on('child_added', function(snapshot) {
21
        [MESSAGE CALLBACK CODE GOES HERE]
22
      });
23
      function displayChatMessage(name, text) {
24
        $('<div/>').text(text).prepend($('<em/>').text(name+': ')).appendTo($('#messagesDiv'));
25
        $('#messagesDiv')[0].scrollTop = $('#messagesDiv')[0].scrollHeight;
26
      };
27
    </script>
28
  </body>
29
</html>