Regression

This notebook presents example usage of package for solving regression problem on methane dataset. You can download training dataset here and test dataset here

This tutorial will cover topics such as:
- training model
- changing model hyperparameters
- hyperparameters tuning
- calculating metrics for model
- getting RuleKit inbuilt

Summary of the dataset

[1]:

from scipy.io import arff
import pandas as pd

train_file_name = "methane-train.arff"
test_file_name = "methane-test.arff"

train_df = pd.DataFrame(arff.loadarff(train_file_name)[0])
test_df = pd.DataFrame(arff.loadarff(test_file_name)[0])

Train file

[2]:

print("Train file overview:")
print(f"Name: {train_file_name}")
print(f"Objects number: {train_df.shape[0]}; Attributes number: {train_df.shape[1]}")
print("Basic attribute statistics:")
train_df.describe()

Train file overview:
Name: methane-train.arff
Objects number: 13368; Attributes number: 8
Basic attribute statistics:

[2]:

	MM31	MM116	AS038	PG072	PD	BA13	DMM116	MM116_pred
count	13368.000000	13368.000000	13368.000000	13368.000000	13368.000000	13368.000000	13368.000000	13368.00000
mean	0.363960	0.775007	2.294734	1.835600	0.308573	1073.443372	-0.000007	0.79825
std	0.117105	0.269366	0.142504	0.106681	0.461922	3.162811	0.043566	0.28649
min	0.170000	0.200000	1.400000	1.100000	0.000000	1067.000000	-1.800000	0.20000
25%	0.260000	0.500000	2.300000	1.800000	0.000000	1070.000000	0.000000	0.50000
50%	0.360000	0.800000	2.300000	1.800000	0.000000	1075.000000	0.000000	0.80000
75%	0.450000	1.000000	2.400000	1.900000	1.000000	1076.000000	0.000000	1.00000
max	0.820000	2.200000	2.700000	2.600000	1.000000	1078.000000	0.800000	2.20000

Test file

[3]:

# test file
print("\nTest file overview:")
print(f"Name: {test_file_name}")
print(f"Objects number: {test_df.shape[0]}; Attributes number: {test_df.shape[1]}")
print("Basic attribute statistics:")
test_df.describe()


Test file overview:
Name: methane-test.arff
Objects number: 5728; Attributes number: 8
Basic attribute statistics:

[3]:

	MM31	MM116	AS038	PG072	PD	BA13	DMM116	MM116_pred
count	5728.000000	5728.000000	5728.000000	5728.000000	5728.000000	5728.000000	5728.000000	5728.000000
mean	0.556652	1.006913	2.236627	1.819239	0.538408	1072.691690	-0.000017	1.042458
std	0.114682	0.167983	0.104913	0.078865	0.498566	2.799559	0.046849	0.171393
min	0.350000	0.500000	1.800000	1.600000	0.000000	1067.000000	-0.400000	0.600000
25%	0.460000	0.900000	2.200000	1.800000	0.000000	1071.000000	0.000000	0.900000
50%	0.550000	1.000000	2.200000	1.800000	1.000000	1073.000000	0.000000	1.000000
75%	0.640000	1.100000	2.300000	1.900000	1.000000	1075.000000	0.000000	1.200000
max	0.980000	1.600000	2.700000	2.100000	1.000000	1078.000000	0.300000	1.600000

Import RuleKit

[4]:

from rulekit import RuleKit
from rulekit.regression import RuleRegressor
from rulekit.params import Measures

Helper function for calculating metrics

[5]:

import sklearn.tree as scikit
import math
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn import metrics
import pandas as pd
import numpy as np
from typing import Tuple
from math import sqrt


def get_regression_metrics(measure: str, y_pred, y_true) -> pd.DataFrame:
    relative_error = 0
    squared_relative_error = 0
    relative_error_lenient = 0
    relative_error_strict = 0
    nae_denominator = 0
    avg = sum(y_true) / len(y_pred)

    for i in range(0, len(y_pred)):
        true = y_true[i]
        predicted = y_pred[i]

        relative_error += abs((true - predicted) / true)
        squared_relative_error += abs((true - predicted) / true) * abs((true - predicted) / true)
        relative_error_lenient += abs((true - predicted) / max(true, predicted))
        relative_error_strict += abs((true - predicted) / min(true, predicted))
        nae_denominator += abs(avg - true)
    relative_error /= len(y_pred)
    squared_relative_error /= len(y_pred)
    relative_error_lenient /= len(y_pred)
    relative_error_strict /= len(y_pred)
    nae_denominator /= len(y_pred)
    correlation = np.mean(np.corrcoef(y_true, y_pred))

    dictionary = {
        'Measure': measure,
        'absolute_error': metrics.mean_absolute_error(y_true, y_pred),
        'relative_error': relative_error,
        'relative_error_lenient': relative_error_lenient,
        'relative_error_strict': relative_error_strict,
        'normalized_absolute_error': metrics.mean_absolute_error(y_true, y_pred) / nae_denominator,
        'squared_error': metrics.mean_squared_error(y_true, y_pred),
        'root_mean_squared_error': metrics.mean_squared_error(y_true, y_pred, squared=False),
        'root_relative_squared_error': sqrt(squared_relative_error),
        'correlation': correlation,
        'squared_correlation': np.power(correlation, 2),
    }
    return pd.DataFrame.from_records([dictionary], index='Measure')

def get_ruleset_stats(measure: str, model) -> pd.DataFrame:
    tmp = model.parameters.__dict__
    del tmp['_java_object']
    return pd.DataFrame.from_records([{'Measure': measure, **tmp, **model.stats.__dict__}], index='Measure')

Rule induction on training dataset

[14]:

X_train = train_df.drop(['MM116_pred'], axis=1)
y_train = train_df['MM116_pred']

[8]:

# C2
c2_reg = RuleRegressor(
    induction_measure=Measures.C2,
    pruning_measure=Measures.C2,
    voting_measure=Measures.C2,
)
c2_reg.fit(X_train, y_train)
c2_ruleset = c2_reg.model
predictions = c2_reg.predict(X_train)

regression_metrics = get_regression_metrics('C2', predictions, y_train)
ruleset_stats = get_ruleset_stats('C2', c2_ruleset)


# Correlation
corr_reg = RuleRegressor(
    induction_measure=Measures.Correlation,
    pruning_measure=Measures.Correlation,
    voting_measure=Measures.Correlation,
    mean_based_regression=True
)
corr_reg.fit(X_train, y_train)
corr_ruleset = corr_reg.model
predictions = corr_reg.predict(X_train)

tmp = get_regression_metrics('Correlation', predictions, y_train)
regression_metrics = pd.concat([regression_metrics, tmp])
ruleset_stats = pd.concat([ruleset_stats, get_ruleset_stats('Correlation', corr_ruleset)])


# RSS
rss_reg = RuleRegressor(
    induction_measure=Measures.RSS,
    pruning_measure=Measures.RSS,
    voting_measure=Measures.RSS,
    mean_based_regression=True
)
rss_reg.fit(X_train, y_train)
rss_ruleset = rss_reg.model
predictions = rss_reg.predict(X_train)

tmp = get_regression_metrics('RSS', predictions, y_train)
regression_metrics = pd.concat([regression_metrics, tmp])
ruleset_stats = pd.concat([ruleset_stats, get_ruleset_stats('RSS', rss_ruleset)])


display(ruleset_stats)
display(regression_metrics)

	minimum_covered	maximum_uncovered_fraction	ignore_missing	pruning_enabled	max_growing_condition	time_total_s	time_growing_s	time_pruning_s	rules_count	conditions_per_rule	induced_conditions_per_rule	avg_rule_coverage	avg_rule_precision	avg_rule_quality	pvalue	FDR_pvalue	FWER_pvalue	fraction_significant	fraction_FDR_significant	fraction_FWER_significant
Measure
C2	5.0	0.0	False	True	0.0	16.881210	2.869502	13.938604	28	3.928571	24.285714	0.169234	0.943278	0.808954	0.001581	0.001581	0.001581	1.00	1.00	1.00
Correlation	5.0	0.0	False	True	0.0	56.457539	3.133929	53.277314	20	3.450000	35.250000	0.218047	0.859963	NaN	0.049860	0.049860	0.049861	0.95	0.95	0.95
RSS	5.0	0.0	False	True	0.0	3.716216	0.348308	3.352011	7	5.142857	22.571429	0.235210	0.724607	NaN	0.000004	0.000004	0.000004	1.00	1.00	1.00

	absolute_error	relative_error	relative_error_lenient	relative_error_strict	normalized_absolute_error	squared_error	root_mean_squared_error	root_relative_squared_error	correlation	squared_correlation
Measure
C2	0.085293	0.100046	0.093069	0.111329	0.362967	0.016060	0.126730	0.132221	0.949559	0.901663
Correlation	0.063712	0.074516	0.070187	0.082596	0.271129	0.011506	0.107265	0.118721	0.969484	0.939899
RSS	0.139782	0.173295	0.145610	0.199137	0.594849	0.040448	0.201118	0.249335	0.861232	0.741721

C2 Measure generated rules

[9]:

for rule in c2_ruleset.rules:
    print(rule)

IF MM31 = (-inf, 0.23) THEN MM116_pred = {0.40} [0.39,0.41]
IF MM116 = <0.35, 0.45) AND DMM116 = <-0.05, inf) AND MM31 = (-inf, 0.24) THEN MM116_pred = {0.40} [0.39,0.42]
IF MM116 = <0.35, 0.45) AND MM31 = (-inf, 0.24) THEN MM116_pred = {0.40} [0.38,0.42]
IF MM31 = <0.24, 0.25) AND BA13 = (-inf, 1076.50) THEN MM116_pred = {0.50} [0.50,0.50]
IF MM116 = (-inf, 0.45) AND DMM116 = <-0.05, inf) AND AS038 = (-inf, 2.45) AND MM31 = <0.19, 0.25) AND PG072 = (-inf, 2.05) THEN MM116_pred = {0.40} [0.38,0.42]
IF MM116 = (-inf, 0.45) THEN MM116_pred = {0.40} [0.37,0.44]
IF MM116 = (-inf, 0.55) AND MM31 = <0.19, 0.29) AND BA13 = <1072.50, inf) THEN MM116_pred = {0.45} [0.39,0.50]
IF PD = (-inf, 0.50) AND MM116 = <0.45, 0.55) AND MM31 = <0.23, inf) AND PG072 = <1.65, inf) AND BA13 = (-inf, 1077.50) THEN MM116_pred = {0.50} [0.48,0.53]
IF MM116 = <0.45, inf) AND DMM116 = <-0.05, inf) AND MM31 = <0.23, 0.30) AND BA13 = <1073.50, 1076.50) THEN MM116_pred = {0.50} [0.48,0.53]
IF MM116 = (-inf, 0.55) AND AS038 = <2.25, inf) AND MM31 = <0.29, inf) AND BA13 = <1076.50, inf) THEN MM116_pred = {0.55} [0.49,0.61]
IF MM116 = (-inf, 0.55) AND DMM116 = <-0.05, inf) AND MM31 = <0.19, 0.31) THEN MM116_pred = {0.45} [0.40,0.51]
IF MM116 = (-inf, 0.55) AND DMM116 = <-0.15, inf) AND MM31 = (-inf, 0.31) THEN MM116_pred = {0.45} [0.39,0.51]
IF MM116 = (-inf, 0.55) AND DMM116 = <-0.05, inf) AND MM31 = (-inf, 0.32) AND BA13 = <1070.50, inf) THEN MM116_pred = {0.45} [0.39,0.51]
IF MM116 = (-inf, 0.55) AND MM31 = <0.32, 0.33) AND PG072 = (-inf, 1.95) AND BA13 = <1074.50, 1076.50) THEN MM116_pred = {0.60} [0.60,0.60]
IF MM116 = (-inf, 0.55) AND MM31 = (-inf, 0.34) THEN MM116_pred = {0.45} [0.39,0.52]
IF MM116 = (-inf, 0.55) AND DMM116 = <-0.05, inf) THEN MM116_pred = {0.45} [0.39,0.52]
IF MM116 = <0.55, 0.70) THEN MM116_pred = {0.61} [0.56,0.65]
IF PD = (-inf, 0.50) AND MM116 = <0.35, inf) AND DMM116 = <-0.05, 0.05) AND MM31 = (-inf, 0.24) THEN MM116_pred = {0.41} [0.37,0.45]
IF MM116 = <0.35, inf) AND DMM116 = <-0.05, 0.15) AND AS038 = <2.05, 2.45) AND MM31 = (-inf, 0.24) THEN MM116_pred = {0.41} [0.36,0.46]
IF PD = (-inf, 0.50) AND MM116 = <0.35, 0.75) AND AS038 = (-inf, 2.45) AND MM31 = (-inf, 0.26) AND PG072 = (-inf, 2.05) THEN MM116_pred = {0.45} [0.38,0.51]
IF PD = <0.50, inf) AND MM116 = <0.55, 0.75) AND DMM116 = <-0.05, inf) AND AS038 = (-inf, 2.35) AND BA13 = <1074.50, inf) THEN MM116_pred = {0.70} [0.60,0.80]
IF MM116 = <0.75, 0.85) THEN MM116_pred = {0.83} [0.76,0.90]
IF MM116 = (-inf, 0.95) AND DMM116 = <-0.05, 0.05) AND AS038 = (-inf, 2.45) AND MM31 = <0.19, 0.26) AND PG072 = (-inf, 2.05) THEN MM116_pred = {0.45} [0.37,0.53]
IF MM116 = <0.95, inf) THEN MM116_pred = {1.15} [0.97,1.32]
IF MM116 = <0.45, 0.75) AND MM31 = <0.23, inf) THEN MM116_pred = {0.60} [0.49,0.71]
IF PD = (-inf, 0.50) AND MM116 = (-inf, 0.95) AND DMM116 = (-inf, 0.05) AND AS038 = (-inf, 2.45) AND MM31 = (-inf, 0.27) AND PG072 = (-inf, 2.05) THEN MM116_pred = {0.46} [0.38,0.54]
IF PD = <0.50, inf) AND MM116 = <0.45, 0.95) AND AS038 = (-inf, 2.35) AND MM31 = <0.26, 0.27) THEN MM116_pred = {0.84} [0.68,1.01]
IF MM116 = <0.85, inf) THEN MM116_pred = {1.06} [0.88,1.24]

Correlation Measure generated rules

[10]:

for rule in corr_ruleset.rules:
    print(rule)

IF MM31 = (-inf, 0.23) THEN MM116_pred = {0.40} [0.39,0.41]
IF MM116 = (-inf, 0.45) AND DMM116 = <-0.05, inf) AND MM31 = <0.18, 0.24) THEN MM116_pred = {0.40} [0.38,0.42]
IF MM116 = (-inf, 0.45) AND MM31 = <0.18, 0.24) THEN MM116_pred = {0.40} [0.38,0.42]
IF MM31 = (-inf, 0.25) THEN MM116_pred = {0.44} [0.37,0.51]
IF MM31 = (-inf, 0.26) THEN MM116_pred = {0.46} [0.36,0.55]
IF MM31 = (-inf, 0.28) THEN MM116_pred = {0.49} [0.37,0.61]
IF PD = (-inf, 0.50) AND MM116 = <0.25, inf) AND DMM116 = <-0.05, 0.05) AND AS038 = <2, 2.45) AND MM31 = <0.23, inf) AND BA13 = (-inf, 1075.50) THEN MM116_pred = {0.71} [0.50,0.92]
IF MM116 = <0.25, 0.45) AND MM31 = <0.18, inf) AND PG072 = (-inf, 2.05) THEN MM116_pred = {0.40} [0.38,0.43]
IF PD = (-inf, 0.50) AND MM116 = (-inf, 0.25) AND DMM116 = <-0.05, 0.05) AND AS038 = <2.35, 2.45) AND MM31 = <0.19, inf) AND PG072 = <1.75, 1.95) AND BA13 = (-inf, 1076.50) THEN MM116_pred = {0.25} [0.20,0.30]
IF MM116 = (-inf, 0.45) AND DMM116 = <-0.05, inf) AND MM31 = <0.18, inf) AND BA13 = (-inf, 1077.50) THEN MM116_pred = {0.40} [0.37,0.43]
IF MM116 = (-inf, 0.55) AND MM31 = (-inf, 0.32) THEN MM116_pred = {0.45} [0.39,0.51]
IF MM116 = (-inf, 0.55) AND DMM116 = <-0.15, inf) THEN MM116_pred = {0.45} [0.39,0.52]
IF MM116 = <0.45, 0.65) THEN MM116_pred = {0.55} [0.49,0.61]
IF MM116 = <0.45, 0.75) AND DMM116 = <-0.15, inf) THEN MM116_pred = {0.60} [0.49,0.71]
IF MM116 = <0.45, 0.85) AND DMM116 = <-0.15, inf) AND MM31 = <0.25, inf) THEN MM116_pred = {0.70} [0.56,0.84]
IF MM116 = <0.70, inf) AND DMM116 = <-0.30, 0.15) THEN MM116_pred = {0.97} [0.77,1.17]
IF MM116 = <1.05, 1.35) THEN MM116_pred = {1.19} [1.08,1.31]
IF MM116 = <1.35, 1.65) AND MM31 = <0.35, inf) THEN MM116_pred = {1.48} [1.35,1.61]
IF MM116 = <1.65, inf) THEN MM116_pred = {1.84} [1.44,2.24]
IF MM116 = <0.85, 1.15) AND DMM116 = <-0.35, inf) THEN MM116_pred = {1.00} [0.89,1.12]

RSS Measure generated rules

[11]:

for rule in rss_ruleset.rules:
    print(rule)

IF MM31 = (-inf, 0.23) THEN MM116_pred = {0.40} [0.39,0.41]
IF MM116 = (-inf, 0.45) AND MM31 = <0.18, 0.25) AND PG072 = (-inf, 2.05) THEN MM116_pred = {0.40} [0.38,0.43]
IF MM31 = (-inf, 0.26) THEN MM116_pred = {0.46} [0.36,0.55]
IF MM116 = <0.35, inf) AND MM31 = <0.26, inf) THEN MM116_pred = {0.91} [0.67,1.14]
IF PD = (-inf, 0.50) AND MM116 = <0.25, inf) AND DMM116 = <-0.95, 0.05) AND AS038 = <2, 2.45) AND MM31 = <0.23, inf) AND PG072 = <1.65, 2.05) AND BA13 = (-inf, 1075.50) THEN MM116_pred = {0.71} [0.50,0.93]
IF PD = (-inf, 0.50) AND MM116 = (-inf, 0.25) AND DMM116 = <-0.05, 0.05) AND AS038 = <2.35, 2.45) AND MM31 = <0.19, inf) AND PG072 = <1.75, 1.95) AND BA13 = (-inf, 1077.50) THEN MM116_pred = {0.25} [0.20,0.30]
IF MM116 = (-inf, 0.25) THEN MM116_pred = {0.23} [0.19,0.28]

Evaluation on a test set

[7]:

X_test = test_df.drop(['MM116_pred'], axis=1)
y_test = test_df['MM116_pred']

[13]:

# C2
c2_predictions = c2_reg.predict(X_test)
c2_regression_metrics = get_regression_metrics('C2', c2_predictions, y_test)

# Correlation
corr_predictions = corr_reg.predict(X_test)
corr_regression_metrics = get_regression_metrics('Correlation', corr_predictions, y_test)

# RSS
rss_predictions = rss_reg.predict(X_test)
rss_regression_metrics = get_regression_metrics('RSS', rss_predictions, y_test)

[14]:

display(pd.concat([c2_regression_metrics, corr_regression_metrics, rss_regression_metrics]))

	absolute_error	relative_error	relative_error_lenient	relative_error_strict	normalized_absolute_error	squared_error	root_mean_squared_error	root_relative_squared_error	correlation	squared_correlation
Measure
C2	0.101333	0.096754	0.090187	0.105576	0.698690	0.016746	0.129405	0.121616	0.829647	0.688314
Correlation	0.093286	0.083513	0.082229	0.094968	0.643208	0.015050	0.122678	0.104504	0.913500	0.834482
RSS	0.186167	0.164586	0.162720	0.212870	1.283621	0.053397	0.231079	0.193394	0.627423	0.393659

Hyperparameters tuning

This one gonna take a while…

[8]:

from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from rulekit.params import Measures


# define models and parameters
model = RuleRegressor(mean_based_regression=True)
minsupp_new = range(5, 7)
measures_choice = [Measures.C2, Measures.Correlation, Measures.RSS]

# define grid search
grid = {
    'minsupp_new': minsupp_new,
    'induction_measure': measures_choice,
    'pruning_measure': measures_choice,
    'voting_measure': measures_choice
}
cv = KFold(n_splits=3)
grid_search = GridSearchCV(estimator=model, param_grid=grid, cv=cv, scoring='neg_root_mean_squared_error', verbose=True)
grid_result = grid_search.fit(X_train, y_train)

# summarize results
print("Best RMSE: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Fitting 3 folds for each of 54 candidates, totalling 162 fits
Best RMSE: -0.129690 using {'induction_measure': <Measures.RSS: 'RSS'>, 'minsupp_new': 6, 'pruning_measure': <Measures.C2: 'C2'>, 'voting_measure': <Measures.C2: 'C2'>}

Prediction using the model selected from the tuning

[9]:

reg = grid_result.best_estimator_

[10]:

ruleset = reg.model
ruleset_stats = get_ruleset_stats('', ruleset)

Generated rules

[11]:

for rule in ruleset.rules:
    print(rule)

IF MM31 = (-inf, 0.23) THEN MM116_pred = {0.40} [0.39,0.41]
IF MM116 = <0.35, 0.45) AND DMM116 = <-0.05, inf) AND MM31 = (-inf, 0.24) THEN MM116_pred = {0.40} [0.39,0.42]
IF MM116 = <0.35, 0.45) AND MM31 = (-inf, 0.24) THEN MM116_pred = {0.40} [0.38,0.42]
IF PD = (-inf, 0.50) AND DMM116 = <-0.05, inf) AND AS038 = (-inf, 2.45) AND MM31 = <0.24, 0.25) THEN MM116_pred = {0.50} [0.47,0.54]
IF PD = <0.50, inf) AND MM116 = (-inf, 0.45) AND AS038 = (-inf, 2.45) AND MM31 = <0.24, 0.25) AND PG072 = (-inf, 2.05) THEN MM116_pred = {0.41} [0.38,0.44]
IF PD = (-inf, 0.50) AND MM31 = <0.24, 0.25) THEN MM116_pred = {0.51} [0.47,0.54]
IF DMM116 = <-0.05, 0.05) AND MM31 = (-inf, 0.26) THEN MM116_pred = {0.46} [0.36,0.55]
IF MM116 = (-inf, 0.45) THEN MM116_pred = {0.40} [0.37,0.44]
IF MM116 = <0.45, inf) AND MM31 = <0.23, 0.24) AND BA13 = (-inf, 1075.50) THEN MM116_pred = {0.50} [0.48,0.52]
IF PD = (-inf, 0.50) AND MM116 = <0.45, 0.55) AND DMM116 = <-0.05, inf) AND MM31 = <0.23, inf) AND PG072 = <1.65, inf) THEN MM116_pred = {0.51} [0.48,0.53]
IF MM116 = <0.45, 0.55) AND DMM116 = <-0.05, inf) AND MM31 = <0.23, 0.29) AND PG072 = <1.65, inf) THEN MM116_pred = {0.51} [0.48,0.53]
IF MM116 = <0.35, 0.55) AND DMM116 = (-inf, -0.05) AND MM31 = (-inf, 0.26) AND BA13 = <1077.50, inf) THEN MM116_pred = {0.54} [0.48,0.60]
IF PD = (-inf, 0.50) AND MM116 = <0.45, 0.55) AND AS038 = (-inf, 2.45) AND MM31 = <0.23, inf) AND PG072 = <1.65, inf) AND BA13 = (-inf, 1077.50) THEN MM116_pred = {0.50} [0.48,0.53]
IF PD = <0.50, inf) AND MM116 = <0.45, 0.55) AND DMM116 = <-0.05, 0.05) AND AS038 = <2.25, 2.35) AND MM31 = <0.28, 0.30) AND PG072 = <1.75, 1.95) AND BA13 = <1075.50, 1076.50) THEN MM116_pred = {0.55} [0.50,0.60]
IF PD = <0.50, inf) AND MM116 = (-inf, 0.55) AND MM31 = <0.29, 0.30) AND PG072 = (-inf, 1.95) AND BA13 = (-inf, 1076.50) THEN MM116_pred = {0.55} [0.50,0.60]
IF MM116 = (-inf, 0.55) THEN MM116_pred = {0.45} [0.39,0.52]
IF PD = (-inf, 0.50) AND MM116 = <0.55, 0.65) AND DMM116 = <-0.05, 0.05) AND AS038 = <2.25, 2.45) AND MM31 = <0.26, 0.27) AND PG072 = <1.75, 1.85) AND BA13 = <1074.50, 1077.50) THEN MM116_pred = {0.60} [NaN,NaN]
IF MM116 = <0.45, 0.65) AND MM31 = <0.23, inf) THEN MM116_pred = {0.55} [0.49,0.61]
IF MM116 = <0.55, 0.75) THEN MM116_pred = {0.67} [0.58,0.77]
IF MM116 = <0.75, 0.85) THEN MM116_pred = {0.83} [0.76,0.90]
IF MM116 = <0.85, inf) THEN MM116_pred = {1.06} [0.88,1.24]

Ruleset evaluation

[12]:

display(ruleset_stats)

	minimum_covered	maximum_uncovered_fraction	ignore_missing	pruning_enabled	max_growing_condition	time_total_s	time_growing_s	time_pruning_s	rules_count	conditions_per_rule	induced_conditions_per_rule	avg_rule_coverage	avg_rule_precision	avg_rule_quality	pvalue	FDR_pvalue	FWER_pvalue	fraction_significant	fraction_FDR_significant	fraction_FWER_significant
Measure
	6.0	0.0	False	True	0.0	17.995523	2.274811	15.69378	21	6.52381	29.809524	0.116152	0.849723	NaN	NaN	NaN	NaN	0.952381	0.952381	0.952381

Validate model on test dataset

[15]:

predictions = reg.predict(X_test)
regression_metrics = get_regression_metrics('', predictions, y_test)
display(regression_metrics.iloc[0])

absolute_error                 0.111355
relative_error                 0.103524
relative_error_lenient         0.097884
relative_error_strict          0.114888
normalized_absolute_error      0.767792
squared_error                  0.019642
root_mean_squared_error        0.140148
root_relative_squared_error    0.125609
correlation                    0.801204
squared_correlation            0.641927
Name: , dtype: float64

The Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details.