Coverage for tests/test_regression.py: 99%
116 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-07 11:26 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-07 11:26 +0000
1import os
2import threading
3import unittest
5import numpy as np
6import pandas as pd
8from rulekit import regression
9from rulekit.arff import read_arff
10from rulekit.events import RuleInductionProgressListener
11from rulekit.main import RuleKit
12from rulekit.params import Measures
13from rulekit.rules import RegressionRule
14from rulekit.rules import RuleSet
15from tests.utils import assert_rules_are_equals
16from tests.utils import assert_score_is_greater
17from tests.utils import dir_path
18from tests.utils import get_test_cases
21class TestRegressor(unittest.TestCase):
23 @classmethod
24 def setUpClass(cls):
25 RuleKit.init()
27 def test_induction_progress_listener(self):
28 test_case = get_test_cases("RegressionSnCTest")[0]
30 reg = regression.RuleRegressor()
31 example_set = test_case.example_set
32 MAX_RULES = 3
34 class EventListener(RuleInductionProgressListener):
36 lock = threading.Lock()
37 induced_rules_count = 0
38 on_progress_calls_count = 0
40 def on_new_rule(self, rule: RegressionRule):
41 self.lock.acquire()
42 self.induced_rules_count += 1
43 self.lock.release()
45 def on_progress(
46 self, total_examples_count: int, uncovered_examples_count: int
47 ):
48 self.lock.acquire()
49 self.on_progress_calls_count += 1
50 self.lock.release()
52 def should_stop(self) -> bool:
53 self.lock.acquire()
54 should_stop = self.induced_rules_count == MAX_RULES
55 self.lock.release()
56 return should_stop
58 listener = EventListener()
59 reg.add_event_listener(listener)
60 reg.fit(example_set.values, example_set.labels)
62 rules_count = len(reg.model.rules)
63 self.assertEqual(rules_count, MAX_RULES)
64 self.assertEqual(rules_count, listener.on_progress_calls_count)
66 def test_compare_with_java_results(self):
67 test_cases = get_test_cases("RegressionSnCTest")
69 for test_case in test_cases:
70 params = test_case.induction_params
71 tree = regression.RuleRegressor(**params)
72 example_set = test_case.example_set
73 tree.fit(example_set.values, example_set.labels)
74 model = tree.model
75 expected = test_case.reference_report.rules
76 actual = [str(r) for r in model.rules]
77 assert_rules_are_equals(expected, actual)
78 assert_score_is_greater(
79 tree.predict(example_set.values), example_set.labels, 0.7
80 )
82 def test_fit_and_predict_on_boolean_columns(self):
83 test_case = get_test_cases("RegressionSnCTest")[0]
84 params = test_case.induction_params
85 clf = regression.RuleRegressor(**params)
86 X, y = test_case.example_set.values, test_case.example_set.labels
87 X["boolean_column"] = np.random.randint(low=0, high=2, size=X.shape[0]).astype(
88 bool
89 )
90 clf.fit(X, y)
91 clf.predict(X)
93 y = pd.Series(y)
94 clf.fit(X, y)
95 clf.predict(X)
97 def test_cholesterol(self):
98 resources_dir: str = os.path.join(dir_path, "additional_resources")
99 df: pd.DataFrame = read_arff(
100 os.path.join(resources_dir, "cholesterol.arff"))
101 X, y = df.drop("class", axis=1), df["class"]
103 # Run experiment using python API
104 reg = regression.RuleRegressor(
105 minsupp_new=0.05,
106 max_uncovered_fraction=0.0,
107 max_growing=0.0,
108 induction_measure=Measures.Accuracy,
109 pruning_measure=Measures.Accuracy,
110 voting_measure=Measures.Accuracy,
111 ignore_missing=False,
112 select_best_candidate=False,
113 complementary_conditions=True,
114 max_rule_count=0,
115 )
116 reg.fit(X, y)
117 actual_rules: list[str] = list(map(str, reg.model.rules))
118 expected_rules: list[str] = [
119 "IF trestbps = (-inf, 149) THEN class = {244.84} [192.73,296.96]",
120 "IF trestbps = <122, inf) THEN class = {250.80} [201.79,299.80]",
121 ]
122 self.assertEqual(actual_rules, expected_rules)
125class TestExpertRegressor(unittest.TestCase):
127 @classmethod
128 def setUpClass(cls):
129 RuleKit.init()
131 def test_compare_with_java_results(self):
132 test_cases = get_test_cases("RegressionExpertSnCTest")
134 for test_case in test_cases:
135 params = test_case.induction_params
136 tree = regression.ExpertRuleRegressor(**params)
137 example_set = test_case.example_set
138 tree.fit(
139 example_set.values,
140 example_set.labels,
141 expert_rules=test_case.knowledge.expert_rules,
142 expert_preferred_conditions=(
143 test_case.knowledge.expert_preferred_conditions
144 ),
145 expert_forbidden_conditions=(
146 test_case.knowledge.expert_forbidden_conditions
147 ),
148 )
149 model = tree.model
150 expected = test_case.reference_report.rules
151 actual = [str(r) for r in model.rules]
152 assert_rules_are_equals(expected, actual)
153 assert_score_is_greater(
154 tree.predict(example_set.values), example_set.labels, 0.66
155 )
157 def test_legacy_expert_rules_format(self):
158 """Test if the legacy expert rules format is still supported.
159 In legacy format rules strings contains conclusion in the form
160 of "label_attr = {NaN}". In new format conclusion part should be empty
161 """
162 df: pd.DataFrame = read_arff(
163 os.path.join(dir_path, "additional_resources", "cholesterol.arff")
164 )
165 X, y = df.drop("class", axis=1), df["class"]
167 # Run experiment using python API
168 reg = regression.ExpertRuleRegressor(
169 mean_based_regression=True, extend_using_automatic=True
170 )
171 expert_rule = "IF trestbps < 149 THEN class = {NaN}"
172 legacy_ruleset: RuleSet[RegressionRule] = reg.fit(
173 X, y, expert_rules=[("0", expert_rule)]
174 )
175 new_format_ruleset: RuleSet[RegressionRule] = reg.fit(
176 X, y, expert_rules=[("rule-0", expert_rule.split("class")[0])]
177 )
178 legacy_rules: list[str] = list(map(str, legacy_ruleset.rules))
179 new_format_rules: list[str] = list(map(str, new_format_ruleset.rules))
180 self.assertEqual(legacy_rules, new_format_rules)
182 def test_refining_conditions_for_nominal_attributes(self):
183 df: pd.DataFrame = read_arff(
184 os.path.join(dir_path, "additional_resources", "cholesterol.arff")
185 )
186 X, y = df.drop("class", axis=1), df["class"]
188 # Run experiment using python API
189 clf = regression.ExpertRuleRegressor(
190 induction_measure=Measures.C2,
191 pruning_measure=Measures.C2,
192 voting_measure=Measures.C2,
193 complementary_conditions=True,
194 extend_using_preferred=False,
195 extend_using_automatic=False,
196 induce_using_preferred=False,
197 induce_using_automatic=False,
198 preferred_conditions_per_rule=0,
199 preferred_attributes_per_rule=0,
200 )
201 clf.fit(X, y, expert_rules=[("expert_rules-1", "IF sex @= {1} THEN")])
202 self.assertEqual(
203 ["IF [[sex = {1}]] THEN class = {239.60} [197.06,282.15]"],
204 [str(r) for r in clf.model.rules],
205 "Ruleset should contain only a single rule configured by expert",
206 )
208 clf.fit(X, y, expert_rules=[("expert_rules-1", "IF sex @= Any THEN")])
209 self.assertEqual(
210 ["IF [[sex = {1}]] THEN class = {239.60} [197.06,282.15]"],
211 [str(r) for r in clf.model.rules],
212 (
213 "Ruleset should contain only a single rule configured by expert with "
214 "a refined condition"
215 ),
216 )
219if __name__ == "__main__":
220 unittest.main()