Use cases

[8]:

from imcp import mcp_score, imcp_score, plot_mcp_curve, plot_imcp_curve

import pandas as pd

from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import precision_recall_fscore_support
from sklearn.model_selection import train_test_split

MCP curve

[9]:

X, y = load_iris(return_X_y=True)
algorithm = {'NaiveBayes': GaussianNB()}

algorithm['NaiveBayes'].fit(X, y)
score = {'NaiveBayes': algorithm['NaiveBayes'].predict_proba(X)}

plot_mcp_curve(y, score)

../../_images/rst_tutorials_use_case_3_0.png

[10]:

area = mcp_score(y, score['NaiveBayes'])
print("Area under the MCP curve is equal to {}".format(area))

Area under the MCP curve is equal to 0.9312789283247503

[11]:

X, y = load_iris(return_X_y=True)
algorithms = {'NaiveBayes': GaussianNB(),
             'RandomForestClassifier': RandomForestClassifier(random_state=42),
             'GradientBoostingClassifier': GradientBoostingClassifier(),
             'SVC': SVC(probability=True),}

scores = {}
for key in algorithms:
    algorithms[key].fit(X, y)
    score = algorithms[key].predict_proba(X)
    scores[key] = score

plot_mcp_curve(y, scores)

../../_images/rst_tutorials_use_case_5_0.png

IMCP curve

[12]:

data = pd.read_csv("exp2_1000.csv", sep = "\t")

y = data["class"].to_numpy()
X = data[["X" , "Y"]].to_numpy()

algorithms = {'NaiveBayes': GaussianNB(),
             'GradientBoostingClassifier': GradientBoostingClassifier(),
             'SVC': SVC(probability=True),}
scores = {}
for key in algorithms:
    algorithms[key].fit(X, y)
    score = algorithms[key].predict_proba(X)
    scores[key] = score

plot_imcp_curve(y, scores)

../../_images/rst_tutorials_use_case_7_0.png

Compare MCP and IMCP scores with other metrics

[13]:

X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

algorithms = {'NaiveBayes': GaussianNB(),
             'RandomForestClassifier': RandomForestClassifier(random_state=42),
             'GradientBoostingClassifier': GradientBoostingClassifier(),
             'SVC': SVC(probability=True),}

results = pd.DataFrame(index=algorithms.keys(), columns=['mcp', 'imcp', 'macro_precision', 'macro_recall', 'macro_f1'])

for key in algorithms:
    algorithms[key].fit(X_train, y_train)

    score = algorithms[key].predict_proba(X_test)
    results.loc[key, 'mcp'] = mcp_score(y_test, score)
    results.loc[key, 'imcp'] = imcp_score(y_test, score)

    y_pred = algorithms[key].predict(X_test)
    results.loc[key, ['macro_precision', 'macro_recall', 'macro_f1']] = precision_recall_fscore_support(y_test, y_pred, average='macro')[:3]

results

[13]:

	mcp	imcp	macro_precision	macro_recall	macro_f1
NaiveBayes	0.898374	0.890323	0.915535	0.911111	0.910714
RandomForestClassifier	0.881336	0.873677	0.898148	0.888889	0.887767
GradientBoostingClassifier	0.942036	0.932244	0.934524	0.933333	0.933259
SVC	0.791353	0.787423	0.955556	0.955556	0.955556