Regression

This notebook presents example usage of package for solving regression problem on methane dataset. You can download training dataset here and test dataset here

This tutorial will cover topics such as:
- training model
- changing model hyperparameters
- hyperparameters tuning
- calculating metrics for model
- getting RuleKit inbuilt

Summary of the dataset

[2]:
from scipy.io import arff
import pandas as pd

datasets_path = ""

train_file_name = "methane-train.arff"
test_file_name = "methane-test.arff"

train_df = pd.DataFrame(arff.loadarff(datasets_path + train_file_name)[0])
test_df = pd.DataFrame(arff.loadarff(datasets_path + test_file_name)[0])

Train file

[3]:
print("Train file overview:")
print(f"Name: {train_file_name}")
print(f"Objects number: {train_df.shape[0]}; Attributes number: {train_df.shape[1]}")
print("Basic attribute statistics:")
train_df.describe()
Train file overview:
Name: methane-train.arff
Objects number: 13368; Attributes number: 8
Basic attribute statistics:
[3]:
MM31 MM116 AS038 PG072 PD BA13 DMM116 MM116_pred
count 13368.000000 13368.000000 13368.000000 13368.000000 13368.000000 13368.000000 13368.000000 13368.00000
mean 0.363960 0.775007 2.294734 1.835600 0.308573 1073.443372 -0.000007 0.79825
std 0.117105 0.269366 0.142504 0.106681 0.461922 3.162811 0.043566 0.28649
min 0.170000 0.200000 1.400000 1.100000 0.000000 1067.000000 -1.800000 0.20000
25% 0.260000 0.500000 2.300000 1.800000 0.000000 1070.000000 0.000000 0.50000
50% 0.360000 0.800000 2.300000 1.800000 0.000000 1075.000000 0.000000 0.80000
75% 0.450000 1.000000 2.400000 1.900000 1.000000 1076.000000 0.000000 1.00000
max 0.820000 2.200000 2.700000 2.600000 1.000000 1078.000000 0.800000 2.20000

Test file

[4]:
# test file
print("\nTest file overview:")
print(f"Name: {test_file_name}")
print(f"Objects number: {test_df.shape[0]}; Attributes number: {test_df.shape[1]}")
print("Basic attribute statistics:")
test_df.describe()

Test file overview:
Name: methane-test.arff
Objects number: 5728; Attributes number: 8
Basic attribute statistics:
[4]:
MM31 MM116 AS038 PG072 PD BA13 DMM116 MM116_pred
count 5728.000000 5728.000000 5728.000000 5728.000000 5728.000000 5728.000000 5728.000000 5728.000000
mean 0.556652 1.006913 2.236627 1.819239 0.538408 1072.691690 -0.000017 1.042458
std 0.114682 0.167983 0.104913 0.078865 0.498566 2.799559 0.046849 0.171393
min 0.350000 0.500000 1.800000 1.600000 0.000000 1067.000000 -0.400000 0.600000
25% 0.460000 0.900000 2.200000 1.800000 0.000000 1071.000000 0.000000 0.900000
50% 0.550000 1.000000 2.200000 1.800000 1.000000 1073.000000 0.000000 1.000000
75% 0.640000 1.100000 2.300000 1.900000 1.000000 1075.000000 0.000000 1.200000
max 0.980000 1.600000 2.700000 2.100000 1.000000 1078.000000 0.300000 1.600000

Import and init RuleKit

[5]:
from rulekit import RuleKit
from rulekit.regression import RuleRegressor
from rulekit.params import Measures


RuleKit.init()

Helper function for calculating metrics

[6]:
import sklearn.tree as scikit
import math
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn import metrics
import pandas as pd
import numpy as np
from typing import Tuple
from math import sqrt


def get_regression_metrics(measure: str, y_pred, y_true) -> pd.DataFrame:
    relative_error = 0
    squared_relative_error = 0
    relative_error_lenient = 0
    relative_error_strict = 0
    nae_denominator = 0
    avg = sum(y_true) / len(y_pred)

    for i in range(0, len(y_pred)):
        true = y_true[i]
        predicted = y_pred[i]

        relative_error += abs((true - predicted) / true)
        squared_relative_error += abs((true - predicted) / true) * abs((true - predicted) / true)
        relative_error_lenient += abs((true - predicted) / max(true, predicted))
        relative_error_strict += abs((true - predicted) / min(true, predicted))
        nae_denominator += abs(avg - true)
    relative_error /= len(y_pred)
    squared_relative_error /= len(y_pred)
    relative_error_lenient /= len(y_pred)
    relative_error_strict /= len(y_pred)
    nae_denominator /= len(y_pred)
    correlation = np.mean(np.corrcoef(y_true, y_pred))

    dictionary = {
        'Measure': measure,
        'absolute_error': metrics.mean_absolute_error(y_true, y_pred),
        'relative_error': relative_error,
        'relative_error_lenient': relative_error_lenient,
        'relative_error_strict': relative_error_strict,
        'normalized_absolute_error': metrics.mean_absolute_error(y_true, y_pred) / nae_denominator,
        'squared_error': metrics.mean_squared_error(y_true, y_pred),
        'root_mean_squared_error': metrics.mean_squared_error(y_true, y_pred, squared=False),
        'root_relative_squared_error': sqrt(squared_relative_error),
        'correlation': correlation,
        'squared_correlation': np.power(correlation, 2),
    }
    return pd.DataFrame.from_records([dictionary], index='Measure')

def get_ruleset_stats(measure: str, model) -> pd.DataFrame:
    tmp = model.parameters.__dict__
    del tmp['_java_object']
    return pd.DataFrame.from_records([{'Measure': measure, **tmp, **model.stats.__dict__}], index='Measure')

Rule induction on training dataset

[7]:
x_train = train_df.drop(['MM116_pred'], axis=1)
y_train = train_df['MM116_pred']
[8]:
# C2
c2_reg = RuleRegressor(
    induction_measure=Measures.C2,
    pruning_measure=Measures.C2,
    voting_measure=Measures.C2,
)
c2_reg.fit(x_train, y_train)
c2_ruleset = c2_reg.model
predictions = c2_reg.predict(x_train)

regression_metrics = get_regression_metrics('C2', predictions, y_train)
ruleset_stats = get_ruleset_stats('C2', c2_ruleset)


# Correlation
corr_reg = RuleRegressor(
    induction_measure=Measures.Correlation,
    pruning_measure=Measures.Correlation,
    voting_measure=Measures.Correlation,
)
corr_reg.fit(x_train, y_train)
corr_ruleset = corr_reg.model
predictions = corr_reg.predict(x_train)

tmp = get_regression_metrics('Correlation', predictions, y_train)
regression_metrics = pd.concat([regression_metrics, tmp])
ruleset_stats = pd.concat([ruleset_stats, get_ruleset_stats('Correlation', corr_ruleset)])


# RSS
rss_reg = RuleRegressor(
    induction_measure=Measures.RSS,
    pruning_measure=Measures.RSS,
    voting_measure=Measures.RSS,
)
rss_reg.fit(x_train, y_train)
rss_ruleset = rss_reg.model
predictions = rss_reg.predict(x_train)

tmp = get_regression_metrics('RSS', predictions, y_train)
regression_metrics = pd.concat([regression_metrics, tmp])
ruleset_stats = pd.concat([ruleset_stats, get_ruleset_stats('RSS', rss_ruleset)])


display(ruleset_stats)
display(regression_metrics)
minimum_covered maximum_uncovered_fraction ignore_missing pruning_enabled max_growing_condition time_total_s time_growing_s time_pruning_s rules_count conditions_per_rule induced_conditions_per_rule avg_rule_coverage avg_rule_precision avg_rule_quality pvalue FDR_pvalue FWER_pvalue fraction_significant fraction_FDR_significant fraction_FWER_significant
Measure
C2 5.0 0.0 False True 0.0 162.647903 109.968198 52.479965 30 4.800000 25.266667 0.145382 0.910635 0.724619 4.967428e-03 4.967460e-03 4.968345e-03 0.966667 0.966667 0.966667
Correlation 5.0 0.0 False True 0.0 89.092752 52.286917 36.736225 14 3.857143 31.142857 0.200041 0.868585 0.850067 0.000000e+00 0.000000e+00 0.000000e+00 1.000000 1.000000 1.000000
RSS 5.0 0.0 False True 0.0 145.849569 82.719695 63.063596 14 3.071429 33.785714 0.268429 0.778758 0.835207 1.840021e-12 1.840021e-12 1.840021e-12 1.000000 1.000000 1.000000
absolute_error relative_error relative_error_lenient relative_error_strict normalized_absolute_error squared_error root_mean_squared_error root_relative_squared_error correlation squared_correlation
Measure
C2 0.113257 0.119912 0.112692 0.146711 0.481968 0.031913 0.178643 0.167711 0.910077 0.828241
Correlation 0.096903 0.099532 0.096365 0.124060 0.412373 0.028386 0.168481 0.147825 0.935456 0.875078
RSS 0.095808 0.111826 0.106694 0.133913 0.407715 0.023018 0.151715 0.151703 0.944984 0.892996

C2 Measure generated rules

[9]:
for rule in c2_ruleset.rules:
    print(rule)
IF PD = (-inf, 0.50) AND AS038 = (-inf, 2.35) AND MM31 = <0.21, 0.22) AND BA13 = <1075.50, inf) THEN MM116_pred = {0.40} [0.40,0.40]
IF MM116 = <0.35, 0.45) AND DMM116 = <-0.05, inf) AND MM31 = (-inf, 0.24) THEN MM116_pred = {0.40} [0.38,0.42]
IF MM116 = (-inf, 0.45) AND MM31 = <0.18, 0.24) THEN MM116_pred = {0.40} [0.38,0.42]
IF MM31 = <0.24, 0.25) AND BA13 = (-inf, 1076.50) THEN MM116_pred = {0.50} [0.50,0.50]
IF MM116 = (-inf, 0.45) AND DMM116 = <-0.05, inf) AND AS038 = (-inf, 2.45) AND MM31 = <0.19, 0.25) AND PG072 = (-inf, 2.05) THEN MM116_pred = {0.40} [0.38,0.42]
IF MM116 = (-inf, 0.45) AND DMM116 = (-inf, 0.05) THEN MM116_pred = {0.40} [0.37,0.43]
IF MM116 = <0.35, inf) AND MM31 = (-inf, 0.23) THEN MM116_pred = {0.40} [0.39,0.41]
IF MM116 = <0.35, inf) AND DMM116 = <-0.05, 0.05) AND MM31 = (-inf, 0.24) AND BA13 = (-inf, 1077.50) THEN MM116_pred = {0.40} [0.37,0.43]
IF MM116 = <0.35, inf) AND DMM116 = <-0.05, inf) AND AS038 = <2.05, inf) AND MM31 = (-inf, 0.24) AND BA13 = (-inf, 1077.50) THEN MM116_pred = {0.40} [0.37,0.43]
IF MM116 = <0.35, 0.70) AND DMM116 = <-0.05, 0.05) AND MM31 = (-inf, 0.24) THEN MM116_pred = {0.40} [0.36,0.44]
IF PD = (-inf, 0.50) AND MM116 = <0.35, inf) AND DMM116 = <-0.05, 0.05) AND MM31 = (-inf, 0.24) THEN MM116_pred = {0.40} [0.36,0.44]
IF MM116 = <0.55, inf) AND DMM116 = (-inf, 0.05) THEN MM116_pred = {0.90} [0.69,1.11]
IF MM116 = <0.45, inf) AND MM31 = <0.23, 0.27) AND PG072 = <1.65, inf) AND BA13 = (-inf, 1075.50) THEN MM116_pred = {0.50} [0.49,0.51]
IF PD = (-inf, 0.50) AND MM116 = <0.45, 0.55) AND DMM116 = <-0.05, inf) AND MM31 = <0.23, inf) AND PG072 = <1.65, inf) THEN MM116_pred = {0.50} [0.47,0.53]
IF MM116 = <0.45, 0.55) AND DMM116 = <-0.05, inf) AND PG072 = <1.65, inf) THEN MM116_pred = {0.50} [0.47,0.53]
IF PD = (-inf, 0.50) AND MM116 = <0.45, 0.55) AND AS038 = (-inf, 2.45) AND PG072 = <1.65, inf) THEN MM116_pred = {0.50} [0.47,0.53]
IF MM116 = <0.55, 0.65) AND DMM116 = <0.05, inf) AND MM31 = (-inf, 0.26) AND PG072 = (-inf, 1.85) THEN MM116_pred = {0.60} [0.60,0.60]
IF MM116 = <0.55, 0.95) AND DMM116 = <0.05, inf) AND MM31 = (-inf, 0.27) AND BA13 = <1075.50, inf) THEN MM116_pred = {0.70} [0.59,0.81]
IF MM116 = (-inf, 1.05) AND DMM116 = (-inf, 0.15) AND AS038 = (-inf, 2.45) AND MM31 = (-inf, 0.27) AND PG072 = (-inf, 2.05) THEN MM116_pred = {0.40} [0.30,0.50]
IF PD = (-inf, 0.50) AND DMM116 = <0.05, inf) AND AS038 = <2.35, inf) AND MM31 = <0.27, 0.28) THEN MM116_pred = {0.60} [0.60,0.60]
IF PD = (-inf, 0.50) AND MM116 = <0.55, 0.75) AND DMM116 = <-0.05, inf) AND MM31 = <0.27, 0.30) THEN MM116_pred = {0.60} [0.57,0.63]
IF MM116 = <0.55, 0.85) AND MM31 = <0.27, 0.30) THEN MM116_pred = {0.60} [0.51,0.69]
IF MM116 = <0.45, 0.55) AND DMM116 = <-0.15, inf) AND MM31 = (-inf, 0.30) AND PG072 = <1.65, inf) THEN MM116_pred = {0.50} [0.47,0.53]
IF DMM116 = (-inf, 0.15) AND AS038 = (-inf, 2.55) AND MM31 = <0.19, 0.30) AND PG072 = <1.55, inf) THEN MM116_pred = {0.50} [0.37,0.63]
IF MM116 = (-inf, 0.95) AND DMM116 = <-0.30, inf) AND AS038 = <2.25, 2.45) AND MM31 = <0.28, 0.31) AND PG072 = <1.75, 1.95) AND BA13 = (-inf, 1077.50) THEN MM116_pred = {0.60} [0.50,0.70]
IF MM116 = <0.45, 1.10) AND DMM116 = <-0.15, inf) AND AS038 = <2.15, 2.45) AND MM31 = (-inf, 0.31) AND BA13 = <1072.50, 1077.50) THEN MM116_pred = {0.50} [0.40,0.60]
IF PD = (-inf, 0.50) AND MM116 = <0.45, 0.95) AND MM31 = <0.30, inf) AND BA13 = (-inf, 1076.50) THEN MM116_pred = {0.80} [0.68,0.92]
IF MM116 = <0.35, 0.65) AND AS038 = (-inf, 2.45) AND MM31 = <0.29, inf) AND BA13 = (-inf, 1076.50) THEN MM116_pred = {0.60} [0.56,0.64]
IF MM116 = <0.65, inf) AND DMM116 = <0.05, inf) AND AS038 = <2.15, inf) AND MM31 = <0.30, 0.32) AND PG072 = (-inf, 1.95) AND BA13 = <1074.50, inf) THEN MM116_pred = {1.20} [1.00,1.40]
IF MM116 = <0.45, inf) AND DMM116 = <-0.15, inf) AND MM31 = <0.32, inf) THEN MM116_pred = {0.90} [0.69,1.11]

Correlation Measure generated rules

[10]:
for rule in corr_ruleset.rules:
    print(rule)
IF MM116 = (-inf, 0.45) AND DMM116 = <-0.05, inf) AND MM31 = <0.18, 0.24) THEN MM116_pred = {0.40} [0.38,0.42]
IF MM116 = (-inf, 0.45) AND MM31 = <0.18, 0.24) THEN MM116_pred = {0.40} [0.38,0.42]
IF MM31 = (-inf, 0.25) THEN MM116_pred = {0.40} [0.33,0.47]
IF DMM116 = (-inf, 0.05) AND AS038 = (-inf, 2.45) AND MM31 = (-inf, 0.26) THEN MM116_pred = {0.40} [0.31,0.49]
IF MM31 = <0.18, 0.28) THEN MM116_pred = {0.50} [0.38,0.62]
IF MM116 = <0.25, 0.45) AND DMM116 = <-0.05, inf) AND MM31 = <0.18, inf) AND BA13 = (-inf, 1077.50) THEN MM116_pred = {0.40} [0.38,0.42]
IF MM116 = (-inf, 0.45) AND DMM116 = <-0.05, inf) AND MM31 = <0.18, inf) AND BA13 = (-inf, 1077.50) THEN MM116_pred = {0.40} [0.37,0.43]
IF MM116 = <0.45, 0.55) AND DMM116 = <-0.05, inf) AND PG072 = <1.65, inf) THEN MM116_pred = {0.50} [0.47,0.53]
IF MM116 = <0.55, 0.65) AND DMM116 = <-0.15, inf) AND MM31 = <0.24, inf) THEN MM116_pred = {0.60} [0.56,0.64]
IF MM116 = <0.45, 0.75) AND DMM116 = <-0.05, inf) AND MM31 = (-inf, 0.29) AND BA13 = <1072.50, 1077.50) THEN MM116_pred = {0.50} [0.47,0.53]
IF MM116 = <0.45, 0.75) AND DMM116 = <-0.05, inf) AND AS038 = (-inf, 2.45) AND MM31 = (-inf, 0.30) AND BA13 = <1072.50, 1077.50) THEN MM116_pred = {0.50} [0.46,0.54]
IF PD = (-inf, 0.50) AND MM116 = <0.45, 0.85) AND AS038 = (-inf, 2.45) AND MM31 = (-inf, 0.29) AND BA13 = <1072.50, 1077.50) THEN MM116_pred = {0.50} [0.44,0.56]
IF MM116 = <0.45, 0.85) AND MM31 = <0.26, inf) THEN MM116_pred = {0.70} [0.57,0.83]
IF MM116 = <0.70, inf) THEN MM116_pred = {0.90} [0.70,1.10]

RSS Measure generated rules

[11]:
for rule in rss_ruleset.rules:
    print(rule)
IF MM31 = (-inf, 0.23) THEN MM116_pred = {0.40} [0.39,0.41]
IF MM116 = (-inf, 0.45) AND MM31 = <0.18, 0.25) AND PG072 = (-inf, 2.05) THEN MM116_pred = {0.40} [0.38,0.42]
IF MM31 = (-inf, 0.26) THEN MM116_pred = {0.40} [0.30,0.50]
IF MM31 = <0.18, 0.28) THEN MM116_pred = {0.50} [0.38,0.62]
IF PD = (-inf, 0.50) AND MM116 = <0.25, inf) AND DMM116 = <-0.95, inf) AND MM31 = <0.23, inf) AND BA13 = (-inf, 1075.50) THEN MM116_pred = {0.70} [0.48,0.92]
IF MM116 = (-inf, 0.25) THEN MM116_pred = {0.20} [0.15,0.25]
IF MM116 = (-inf, 0.55) AND DMM116 = <-0.15, inf) AND MM31 = <0.23, inf) AND PG072 = <1.65, inf) THEN MM116_pred = {0.50} [0.44,0.56]
IF MM116 = (-inf, 0.65) AND DMM116 = <-0.15, 0.15) AND MM31 = <0.23, 0.40) AND PG072 = <1.65, inf) AND BA13 = <1070.50, inf) THEN MM116_pred = {0.50} [0.43,0.57]
IF DMM116 = <-0.25, 0.15) AND MM31 = (-inf, 0.32) AND PG072 = <1.55, inf) THEN MM116_pred = {0.50} [0.36,0.64]
IF MM116 = (-inf, 0.75) AND DMM116 = <-0.15, inf) AND AS038 = <2.15, inf) AND BA13 = <1069.50, inf) THEN MM116_pred = {0.50} [0.37,0.63]
IF MM116 = (-inf, 0.75) AND MM31 = <0.23, 0.46) THEN MM116_pred = {0.60} [0.48,0.72]
IF MM116 = (-inf, 0.85) AND DMM116 = (-inf, 0.15) AND AS038 = (-inf, 2.55) AND MM31 = (-inf, 0.33) THEN MM116_pred = {0.50} [0.37,0.63]
IF MM116 = <0.85, inf) THEN MM116_pred = {1} [0.82,1.18]
IF MM116 = <0.65, 0.85) THEN MM116_pred = {0.80} [0.71,0.89]

Evaluation on a test set

[12]:
x_test = test_df.drop(['MM116_pred'], axis=1)
y_test = test_df['MM116_pred']
[13]:
# C2
c2_predictions = c2_reg.predict(x_test)
c2_regression_metrics = get_regression_metrics('C2', c2_predictions, y_test)

# Correlation
corr_predictions = corr_reg.predict(x_test)
corr_regression_metrics = get_regression_metrics('Correlation', corr_predictions, y_test)

# RSS
rss_predictions = rss_reg.predict(x_test)
rss_regression_metrics = get_regression_metrics('RSS', rss_predictions, y_test)

[14]:
display(pd.concat([c2_regression_metrics, corr_regression_metrics, rss_regression_metrics]))
absolute_error relative_error relative_error_lenient relative_error_strict normalized_absolute_error squared_error root_mean_squared_error root_relative_squared_error correlation squared_correlation
Measure
C2 0.175348 0.153675 0.152205 0.197185 1.209023 0.049460 0.222395 0.183108 0.768065 0.589923
Correlation 0.167494 0.143461 0.143185 0.187964 1.154868 0.049314 0.222068 0.181535 0.801878 0.643009
RSS 0.138080 0.121742 0.120687 0.150155 0.952062 0.032734 0.180926 0.151750 0.815327 0.664758

Hyperparameters tuning

This one gonna take a while…

[15]:
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from rulekit.params import Measures


# define models and parameters
model = RuleRegressor()
min_rule_covered = [5]#range(3, 15)
measures_choice = [Measures.C2, Measures.RSS, Measures.WeightedLaplace, Measures.Correlation]

# define grid search
grid = {
    'min_rule_covered': min_rule_covered,
    'induction_measure': measures_choice,
    'pruning_measure': measures_choice,
    'voting_measure': measures_choice
}
cv = KFold(n_splits=3)
grid_search = GridSearchCV(estimator=model, param_grid=grid, cv=cv, scoring='neg_root_mean_squared_error')
grid_result = grid_search.fit(x_train, y_train)

# summarize results
print("Best RMSE: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Prediction using the model selected from the tuning

[23]:
reg = grid_result.best_estimator_
[26]:
ruleset = reg.model
ruleset_stats = get_ruleset_stats('', ruleset)

Generated rules

[27]:
for rule in ruleset.rules:
    print(rule)
IF PD = (-inf, 0.50) AND AS038 = (-inf, 2.35) AND MM31 = <0.21, 0.22) AND BA13 = <1075.50, inf) THEN MM116_pred = {0.40} [0.40,0.40]
IF MM116 = <0.35, 0.45) AND DMM116 = <-0.05, inf) AND MM31 = (-inf, 0.24) THEN MM116_pred = {0.40} [0.38,0.42]
IF MM116 = (-inf, 0.45) AND MM31 = <0.18, 0.24) THEN MM116_pred = {0.40} [0.38,0.42]
IF MM31 = <0.24, 0.25) AND BA13 = (-inf, 1076.50) THEN MM116_pred = {0.50} [0.50,0.50]
IF MM116 = (-inf, 0.45) AND DMM116 = <-0.05, inf) AND AS038 = (-inf, 2.45) AND MM31 = <0.19, 0.25) AND PG072 = (-inf, 2.05) THEN MM116_pred = {0.40} [0.38,0.42]
IF MM116 = (-inf, 0.45) AND DMM116 = (-inf, 0.05) THEN MM116_pred = {0.40} [0.37,0.43]
IF MM116 = <0.35, inf) AND MM31 = (-inf, 0.23) THEN MM116_pred = {0.40} [0.39,0.41]
IF MM116 = <0.35, inf) AND DMM116 = <-0.05, 0.05) AND MM31 = (-inf, 0.24) AND BA13 = (-inf, 1077.50) THEN MM116_pred = {0.40} [0.37,0.43]
IF MM116 = <0.35, inf) AND DMM116 = <-0.05, inf) AND AS038 = <2.05, inf) AND MM31 = (-inf, 0.24) AND BA13 = (-inf, 1077.50) THEN MM116_pred = {0.40} [0.37,0.43]
IF MM116 = <0.35, 0.70) AND DMM116 = <-0.05, 0.05) AND MM31 = (-inf, 0.24) THEN MM116_pred = {0.40} [0.36,0.44]
IF PD = (-inf, 0.50) AND MM116 = <0.35, inf) AND DMM116 = <-0.05, 0.05) AND MM31 = (-inf, 0.24) THEN MM116_pred = {0.40} [0.36,0.44]
IF MM116 = <0.55, inf) AND DMM116 = (-inf, 0.05) THEN MM116_pred = {0.90} [0.69,1.11]
IF MM116 = <0.45, inf) AND MM31 = <0.23, 0.27) AND PG072 = <1.65, inf) AND BA13 = (-inf, 1075.50) THEN MM116_pred = {0.50} [0.49,0.51]
IF PD = (-inf, 0.50) AND MM116 = <0.45, 0.55) AND DMM116 = <-0.05, inf) AND MM31 = <0.23, inf) AND PG072 = <1.65, inf) THEN MM116_pred = {0.50} [0.47,0.53]
IF MM116 = <0.45, 0.55) AND DMM116 = <-0.05, inf) AND PG072 = <1.65, inf) THEN MM116_pred = {0.50} [0.47,0.53]
IF PD = (-inf, 0.50) AND MM116 = <0.45, 0.55) AND AS038 = (-inf, 2.45) AND PG072 = <1.65, inf) THEN MM116_pred = {0.50} [0.47,0.53]
IF MM116 = <0.55, 0.65) AND DMM116 = <0.05, inf) AND MM31 = (-inf, 0.26) AND PG072 = (-inf, 1.85) THEN MM116_pred = {0.60} [0.60,0.60]
IF MM116 = <0.55, 0.95) AND DMM116 = <0.05, inf) AND MM31 = (-inf, 0.27) AND BA13 = <1075.50, inf) THEN MM116_pred = {0.70} [0.59,0.81]
IF MM116 = (-inf, 1.05) AND DMM116 = (-inf, 0.15) AND AS038 = (-inf, 2.45) AND MM31 = (-inf, 0.27) AND PG072 = (-inf, 2.05) THEN MM116_pred = {0.40} [0.30,0.50]
IF PD = (-inf, 0.50) AND DMM116 = <0.05, inf) AND AS038 = <2.35, inf) AND MM31 = <0.27, 0.28) THEN MM116_pred = {0.60} [0.60,0.60]
IF PD = (-inf, 0.50) AND MM116 = <0.55, 0.75) AND DMM116 = <-0.05, inf) AND MM31 = <0.27, 0.30) THEN MM116_pred = {0.60} [0.57,0.63]
IF MM116 = <0.55, 0.85) AND MM31 = <0.27, 0.30) THEN MM116_pred = {0.60} [0.51,0.69]
IF MM116 = <0.45, 0.55) AND DMM116 = <-0.15, inf) AND MM31 = (-inf, 0.30) AND PG072 = <1.65, inf) THEN MM116_pred = {0.50} [0.47,0.53]
IF DMM116 = (-inf, 0.15) AND AS038 = (-inf, 2.55) AND MM31 = <0.19, 0.30) AND PG072 = <1.55, inf) THEN MM116_pred = {0.50} [0.37,0.63]
IF MM116 = (-inf, 0.95) AND DMM116 = <-0.30, inf) AND AS038 = <2.25, 2.45) AND MM31 = <0.28, 0.31) AND PG072 = <1.75, 1.95) AND BA13 = (-inf, 1077.50) THEN MM116_pred = {0.60} [0.50,0.70]
IF MM116 = <0.45, 1.10) AND DMM116 = <-0.15, inf) AND AS038 = <2.15, 2.45) AND MM31 = (-inf, 0.31) AND BA13 = <1072.50, 1077.50) THEN MM116_pred = {0.50} [0.40,0.60]
IF PD = (-inf, 0.50) AND MM116 = <0.45, 0.95) AND MM31 = <0.30, inf) AND BA13 = (-inf, 1076.50) THEN MM116_pred = {0.80} [0.68,0.92]
IF MM116 = <0.35, 0.65) AND AS038 = (-inf, 2.45) AND MM31 = <0.29, inf) AND BA13 = (-inf, 1076.50) THEN MM116_pred = {0.60} [0.56,0.64]
IF MM116 = <0.65, inf) AND DMM116 = <0.05, inf) AND AS038 = <2.15, inf) AND MM31 = <0.30, 0.32) AND PG072 = (-inf, 1.95) AND BA13 = <1074.50, inf) THEN MM116_pred = {1.20} [1.00,1.40]
IF MM116 = <0.45, inf) AND DMM116 = <-0.15, inf) AND MM31 = <0.32, inf) THEN MM116_pred = {0.90} [0.69,1.11]

Ruleset evaluation

[28]:
display(ruleset_stats)
minimum_covered maximum_uncovered_fraction ignore_missing pruning_enabled max_growing_condition time_total_s time_growing_s time_pruning_s rules_count conditions_per_rule induced_conditions_per_rule avg_rule_coverage avg_rule_precision avg_rule_quality pvalue FDR_pvalue FWER_pvalue fraction_significant fraction_FDR_significant fraction_FWER_significant
Measure
5.0 0.0 False True 0.0 105.966684 71.493354 34.399725 30 4.8 25.266667 0.145382 0.910635 0.724619 0.004967 0.004967 0.004968 0.966667 0.966667 0.966667

Validate model on test dataset

[30]:
predictions = reg.predict(x_test)
regression_metrics = get_regression_metrics('', predictions, y_test)
display(regression_metrics.iloc[0])
absolute_error relative_error relative_error_lenient relative_error_strict normalized_absolute_error squared_error root_mean_squared_error root_relative_squared_error correlation squared_correlation
Measure
0.175348 0.153675 0.152205 0.197185 1.209023 0.04946 0.222395 0.183108 0.768065 0.589923