Discovering Data
  • Home
  • Blog

#100DaysOfDataScience

Day 13 - drivendata blood donor prediction competition, code

7/18/2018

0 Comments

 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm as cm
from sklearn import tree, preprocessing
import sklearn.ensemble as ske
from sklearn.model_selection import train_test_split

df = pd.read_csv('train.csv')
def missing_values_table(df):
        mis_val = df.isnull().sum()
        mis_val_percent = 100 * df.isnull().sum() / len(df)
        mis_val_table = pd.concat([mis_val, mis_val_percent], axis=1)
        mis_val_table_ren_columns = mis_val_table.rename(
        columns = {0 : 'Missing Values', 1 : '% of Total Values'})
        mis_val_table_ren_columns = mis_val_table_ren_columns[
            mis_val_table_ren_columns.iloc[:,1] != 0].sort_values(
        '% of Total Values', ascending=False).round(1)
        print ("Your selected dataframe has " + str(df.shape[1]) + " columns.\n"      
            "There are " + str(mis_val_table_ren_columns.shape[0]) +
              " columns that have missing values.")
        return mis_val_table_ren_columns
    
df_m = missing_values_table(df)



def plot_corr(df,size=4):
    corr = df.corr()
    fig, ax = plt.subplots(figsize=(size, size))
    ax.matshow(corr,cmap=cm.Greys)
    plt.xticks(range(len(corr.columns)), corr.columns, rotation='vertical');
    plt.yticks(range(len(corr.columns)), corr.columns);
    plt.show()
    
    
plot_corr(df,10)

X = df.drop(['Made Donation in March 2007'], axis=1).values
y = df['Made Donation in March 2007'].values

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=0)

clf_rf = ske.RandomForestClassifier(n_estimators=50)
clf_rf = clf_rf.fit(X_train, y_train)
print(clf_rf.score(X_test,y_test))

clf_gb = ske.GradientBoostingClassifier(n_estimators=50)
clf_gb = clf_gb.fit(X_train, y_train)
print(clf_gb.score(X_test,y_test))

importance = clf_gb.feature_importances_
print(importance)


0 Comments



Leave a Reply.

Proudly powered by Weebly
  • Home
  • Blog