Coverage for rulekit/regression.py: 80%
82 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-07 11:26 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-07 11:26 +0000
1"""Module containing classes for regression analysis and prediction.
2"""
3from __future__ import annotations
5from numbers import Number
6from typing import Tuple
7from typing import Union
9import numpy as np
10import pandas as pd
11from sklearn import metrics
13from rulekit._helpers import PredictionResultMapper
14from rulekit._operator import BaseOperator
15from rulekit._operator import Data
16from rulekit._operator import ExpertKnowledgeOperator
17from rulekit._problem_types import ProblemType
18from rulekit.params import ContrastSetModelParams
19from rulekit.params import DEFAULT_PARAMS_VALUE
20from rulekit.params import ExpertModelParams
21from rulekit.params import Measures
22from rulekit.params import ModelsParams
23from rulekit.rules import RegressionRule
24from rulekit.rules import RuleSet
27class _RegressionModelParams(ModelsParams):
28 mean_based_regression: bool = DEFAULT_PARAMS_VALUE["mean_based_regression"]
31class _RegressionExpertModelParams(_RegressionModelParams, ExpertModelParams):
32 pass
35class RuleRegressor(BaseOperator):
36 """Regression model."""
38 __params_class__ = _RegressionModelParams
40 def __init__( # pylint: disable=too-many-arguments
41 self,
42 minsupp_new: float = DEFAULT_PARAMS_VALUE["minsupp_new"],
43 induction_measure: Measures = DEFAULT_PARAMS_VALUE["induction_measure"],
44 pruning_measure: Union[Measures, str] = DEFAULT_PARAMS_VALUE["pruning_measure"],
45 voting_measure: Measures = DEFAULT_PARAMS_VALUE["voting_measure"],
46 max_growing: float = DEFAULT_PARAMS_VALUE["max_growing"],
47 enable_pruning: bool = DEFAULT_PARAMS_VALUE["enable_pruning"],
48 ignore_missing: bool = DEFAULT_PARAMS_VALUE["ignore_missing"],
49 max_uncovered_fraction: float = DEFAULT_PARAMS_VALUE["max_uncovered_fraction"],
50 select_best_candidate: bool = DEFAULT_PARAMS_VALUE["select_best_candidate"],
51 complementary_conditions: bool = DEFAULT_PARAMS_VALUE[
52 "complementary_conditions"
53 ],
54 mean_based_regression: bool = DEFAULT_PARAMS_VALUE["mean_based_regression"],
55 max_rule_count: int = DEFAULT_PARAMS_VALUE["max_rule_count"],
56 ):
57 """
58 Parameters
59 ----------
60 minsupp_new : float = 5.0
61 a minimum number (or fraction, if value < 1.0) of previously uncovered
62 examples to be covered by a new rule (positive examples for classification
63 problems); default: 5,
64 induction_measure : :class:`rulekit.params.Measures` = \
65 :class:`rulekit.params.Measures.Correlation`
66 measure used during induction; default measure is correlation
67 pruning_measure : Union[:class:`rulekit.params.Measures`, str] = \
68 :class:`rulekit.params.Measures.Correlation`
69 measure used during pruning. Could be user defined (string), for example
70 :code:`2 * p / n`; default measure is correlation
71 voting_measure : :class:`rulekit.params.Measures` = \
72 :class:`rulekit.params.Measures.Correlation`
73 measure used during voting; default measure is correlation
74 max_growing : int = 0.0
75 non-negative integer representing maximum number of conditions which can be
76 added to the rule in the growing phase (use this parameter for large
77 datasets if execution time is prohibitive); 0 indicates no limit; default: 0
78 enable_pruning : bool = True
79 enable or disable pruning, default is True.
80 ignore_missing : bool = False
81 boolean telling whether missing values should be ignored (by default, a
82 missing value of given attribute is always considered as not fulfilling the
83 condition build upon that attribute); default: False.
84 max_uncovered_fraction : float = 0.0
85 Floating-point number from [0,1] interval representing maximum fraction of
86 examples that may remain uncovered by the rule set, default: 0.0.
87 select_best_candidate : bool = False
88 Flag determining if best candidate should be selected from growing phase;
89 default: False.
90 complementary_conditions : bool = False
91 If enabled, complementary conditions in the form a = !{value} for nominal
92 attributes are supported.
93 mean_based_regression : bool = True
94 Enable fast induction of mean-based regression rules instead of default
95 median-based.
96 max_rule_count : int = 0
97 Maximum number of rules to be generated; 0 indicates no limit. Default: 0
98 """
99 super().__init__(
100 minsupp_new=minsupp_new,
101 induction_measure=induction_measure,
102 pruning_measure=pruning_measure,
103 voting_measure=voting_measure,
104 max_growing=max_growing,
105 enable_pruning=enable_pruning,
106 ignore_missing=ignore_missing,
107 max_uncovered_fraction=max_uncovered_fraction,
108 select_best_candidate=select_best_candidate,
109 complementary_conditions=complementary_conditions,
110 mean_based_regression=mean_based_regression,
111 max_rule_count=max_rule_count,
112 )
113 self.model: RuleSet[RegressionRule] = None
115 def _validate_labels(self, labels: Data):
116 if isinstance(labels, (pd.DataFrame, pd.Series)):
117 first_label = labels.iloc[0]
118 else:
119 first_label = labels[0]
120 if not isinstance(first_label, Number):
121 raise ValueError(
122 f"{self.__class__.__name__} requires lables values to be numeric"
123 )
125 def fit( # pylint: disable=arguments-differ
126 self, values: Data, labels: Data
127 ) -> RuleRegressor:
128 """Train model on given dataset.
130 Parameters
131 ----------
132 values : :class:`rulekit.operator.Data`
133 attributes
134 labels : :class:`rulekit.operator.Data`
135 target values
136 Returns
137 -------
138 self : RuleRegressor
139 """
140 self._validate_labels(labels)
141 super().fit(values, labels)
142 return self
144 def predict(self, values: Data) -> np.ndarray:
145 """Perform prediction and returns predicted values.
147 Parameters
148 ----------
149 values : :class:`rulekit.operator.Data`
150 attributes
152 Returns
153 -------
154 result : np.ndarray
155 predicted values
156 """
157 return self._map_result(super().predict(values))
159 def score(self, values: Data, labels: Data) -> float:
160 """Return the coefficient of determination R2 of the prediction
162 Parameters
163 ----------
164 values : :class:`rulekit.operator.Data`
165 attributes
166 labels : :class:`rulekit.operator.Data`
167 true target values
169 Returns
170 -------
171 score : float
172 R2 of self.predict(values) wrt. labels.
173 """
174 predicted_labels = self.predict(values)
175 return metrics.r2_score(labels, predicted_labels)
177 def _map_result(self, predicted_example_set) -> np.ndarray:
178 return PredictionResultMapper.map_to_numerical(
179 predicted_example_set, remap=False
180 )
182 def _get_problem_type(self) -> ProblemType:
183 return ProblemType.REGRESSION
186class ExpertRuleRegressor(ExpertKnowledgeOperator, RuleRegressor):
187 """Expert Regression model."""
189 __params_class__ = _RegressionExpertModelParams
191 def __init__( # pylint: disable=too-many-arguments,too-many-locals
192 self,
193 minsupp_new: float = DEFAULT_PARAMS_VALUE["minsupp_new"],
194 induction_measure: Measures = DEFAULT_PARAMS_VALUE["induction_measure"],
195 pruning_measure: Union[Measures, str] = DEFAULT_PARAMS_VALUE["pruning_measure"],
196 voting_measure: Measures = DEFAULT_PARAMS_VALUE["voting_measure"],
197 max_growing: float = DEFAULT_PARAMS_VALUE["max_growing"],
198 enable_pruning: bool = DEFAULT_PARAMS_VALUE["enable_pruning"],
199 ignore_missing: bool = DEFAULT_PARAMS_VALUE["ignore_missing"],
200 max_uncovered_fraction: float = DEFAULT_PARAMS_VALUE["max_uncovered_fraction"],
201 select_best_candidate: bool = DEFAULT_PARAMS_VALUE["select_best_candidate"],
202 complementary_conditions: bool = DEFAULT_PARAMS_VALUE[
203 "complementary_conditions"
204 ],
205 mean_based_regression: bool = DEFAULT_PARAMS_VALUE["mean_based_regression"],
206 max_rule_count: int = DEFAULT_PARAMS_VALUE["max_rule_count"],
207 extend_using_preferred: bool = DEFAULT_PARAMS_VALUE["extend_using_preferred"],
208 extend_using_automatic: bool = DEFAULT_PARAMS_VALUE["extend_using_automatic"],
209 induce_using_preferred: bool = DEFAULT_PARAMS_VALUE["induce_using_preferred"],
210 induce_using_automatic: bool = DEFAULT_PARAMS_VALUE["induce_using_automatic"],
211 preferred_conditions_per_rule: int = DEFAULT_PARAMS_VALUE[
212 "preferred_conditions_per_rule"
213 ],
214 preferred_attributes_per_rule: int = DEFAULT_PARAMS_VALUE[
215 "preferred_attributes_per_rule"
216 ],
217 ):
218 """
219 Parameters
220 ----------
221 minsupp_new : float = 5.0
222 a minimum number (or fraction, if value < 1.0) of previously uncovered
223 examples to be covered by a new rule (positive examples for classification
224 problems); default: 5,
225 induction_measure : :class:`rulekit.params.Measures` = \
226 :class:`rulekit.params.Measures.Correlation`
227 measure used during induction; default measure is correlation
228 pruning_measure : Union[:class:`rulekit.params.Measures`, str] = \
229 :class:`rulekit.params.Measures.Correlation`
230 measure used during pruning. Could be user defined (string), for example
231 :code:`2 * p / n`; default measure is correlation
232 voting_measure : :class:`rulekit.params.Measures` = \
233 :class:`rulekit.params.Measures.Correlation`
234 measure used during voting; default measure is correlation
235 max_growing : int = 0.0
236 non-negative integer representing maximum number of conditions which can be
237 added to the rule in the growing phase (use this parameter for large
238 datasets if execution time is prohibitive); 0 indicates no limit; default: 0,
239 enable_pruning : bool = True
240 enable or disable pruning, default is True.
241 ignore_missing : bool = False
242 boolean telling whether missing values should be ignored (by default, a
243 missing value of given attribute is always considered as not fulfilling the
244 condition build upon that attribute); default: False.
245 max_uncovered_fraction : float = 0.0
246 Floating-point number from [0,1] interval representing maximum fraction of
247 examples that may remain uncovered by the rule set, default: 0.0.
248 select_best_candidate : bool = False
249 Flag determining if best candidate should be selected from growing phase;
250 default: False.
251 complementary_conditions : bool = False
252 If enabled, complementary conditions in the form a = !{value} for nominal
253 attributes are supported.
254 mean_based_regression : bool = True
255 Enable fast induction of mean-based regression rules instead of default
256 median-based.
257 max_rule_count : int = 0
258 Maximum number of rules to be generated (for classification data sets it
259 applies to a single class); 0 indicates no limit.
261 extend_using_preferred : bool = False
262 boolean indicating whether initial rules should be extended with a use of
263 preferred conditions and attributes; default is False
264 extend_using_automatic : bool = False
265 boolean indicating whether initial rules should be extended with a use of
266 automatic conditions and attributes; default is False
267 induce_using_preferred : bool = False
268 boolean indicating whether new rules should be induced with a use of
269 preferred conditions and attributes; default is False
270 induce_using_automatic : bool = False
271 boolean indicating whether new rules should be induced with a use of
272 automatic conditions and attributes; default is False
273 preferred_conditions_per_rule : int = None
274 maximum number of preferred conditions per rule; default: unlimited,
275 preferred_attributes_per_rule : int = None
276 maximum number of preferred attributes per rule; default: unlimited.
277 """
278 RuleRegressor.__init__(
279 self,
280 minsupp_new=minsupp_new,
281 induction_measure=induction_measure,
282 pruning_measure=pruning_measure,
283 voting_measure=voting_measure,
284 max_growing=max_growing,
285 enable_pruning=enable_pruning,
286 ignore_missing=ignore_missing,
287 max_uncovered_fraction=max_uncovered_fraction,
288 select_best_candidate=select_best_candidate,
289 complementary_conditions=complementary_conditions,
290 mean_based_regression=mean_based_regression,
291 max_rule_count=max_rule_count,
292 )
293 ExpertKnowledgeOperator.__init__(
294 self,
295 minsupp_new=minsupp_new,
296 induction_measure=induction_measure,
297 pruning_measure=pruning_measure,
298 voting_measure=voting_measure,
299 max_growing=max_growing,
300 enable_pruning=enable_pruning,
301 ignore_missing=ignore_missing,
302 max_uncovered_fraction=max_uncovered_fraction,
303 select_best_candidate=select_best_candidate,
304 extend_using_preferred=extend_using_preferred,
305 extend_using_automatic=extend_using_automatic,
306 induce_using_preferred=induce_using_preferred,
307 induce_using_automatic=induce_using_automatic,
308 preferred_conditions_per_rule=preferred_conditions_per_rule,
309 preferred_attributes_per_rule=preferred_attributes_per_rule,
310 complementary_conditions=complementary_conditions,
311 mean_based_regression=mean_based_regression,
312 max_rule_count=max_rule_count,
313 )
314 self.model: RuleSet[RegressionRule] = None
316 def fit( # pylint: disable=arguments-differ,too-many-arguments
317 self,
318 values: Data,
319 labels: Data,
320 expert_rules: list[Union[str, tuple[str, str]]] = None,
321 expert_preferred_conditions: list[Union[str, tuple[str, str]]] = None,
322 expert_forbidden_conditions: list[Union[str, tuple[str, str]]] = None,
323 ) -> ExpertRuleRegressor:
324 """Train model on given dataset.
326 Parameters
327 ----------
328 values : :class:`rulekit.operator.Data`
329 attributes
330 labels : :class:`rulekit.operator.Data`
331 target values
333 expert_rules : List[Union[str, Tuple[str, str]]]
334 set of initial rules, either passed as a list of strings representing rules
335 or as list of tuples where first element is name of the rule and second one
336 is rule string.
337 expert_preferred_conditions : List[Union[str, Tuple[str, str]]]
338 multiset of preferred conditions (used also for specifying preferred
339 attributes by using special value Any). Either passed as a list of strings
340 representing rules or as list of tuples where first element is name of the
341 rule and second one is rule string.
342 expert_forbidden_conditions : List[Union[str, Tuple[str, str]]]
343 set of forbidden conditions (used also for specifying forbidden attributes
344 by using special valye Any). Either passed as a list of strings representing
345 rules or as list of tuples where first element is name of the rule and
346 second one is rule string.
347 Returns
348 -------
349 self : ExpertRuleRegressor
350 """
351 self._validate_labels(labels)
352 return ExpertKnowledgeOperator.fit(
353 self,
354 values,
355 labels,
356 expert_rules=expert_rules,
357 expert_preferred_conditions=expert_preferred_conditions,
358 expert_forbidden_conditions=expert_forbidden_conditions,
359 )
361 def predict(self, values: Data) -> np.ndarray:
362 return self._map_result(ExpertKnowledgeOperator.predict(self, values))
364 def _get_problem_type(self) -> ProblemType:
365 return ProblemType.REGRESSION
368class ContrastSetRuleRegressor(BaseOperator):
369 """Contrast set regression model."""
371 __params_class__ = ContrastSetModelParams
373 def __init__( # pylint: disable=too-many-arguments,too-many-locals
374 self,
375 minsupp_all: Tuple[float, float, float, float] = DEFAULT_PARAMS_VALUE[
376 "minsupp_all"
377 ],
378 max_neg2pos: float = DEFAULT_PARAMS_VALUE["max_neg2pos"],
379 max_passes_count: int = DEFAULT_PARAMS_VALUE["max_passes_count"],
380 penalty_strength: float = DEFAULT_PARAMS_VALUE["penalty_strength"],
381 penalty_saturation: float = DEFAULT_PARAMS_VALUE["penalty_saturation"],
382 minsupp_new: float = DEFAULT_PARAMS_VALUE["minsupp_new"],
383 induction_measure: Measures = DEFAULT_PARAMS_VALUE["induction_measure"],
384 pruning_measure: Union[Measures, str] = DEFAULT_PARAMS_VALUE["pruning_measure"],
385 voting_measure: Measures = DEFAULT_PARAMS_VALUE["voting_measure"],
386 max_growing: float = DEFAULT_PARAMS_VALUE["max_growing"],
387 enable_pruning: bool = DEFAULT_PARAMS_VALUE["enable_pruning"],
388 ignore_missing: bool = DEFAULT_PARAMS_VALUE["ignore_missing"],
389 max_uncovered_fraction: float = DEFAULT_PARAMS_VALUE["max_uncovered_fraction"],
390 select_best_candidate: bool = DEFAULT_PARAMS_VALUE["select_best_candidate"],
391 complementary_conditions: bool = DEFAULT_PARAMS_VALUE[
392 "complementary_conditions"
393 ],
394 mean_based_regression: bool = DEFAULT_PARAMS_VALUE["mean_based_regression"],
395 max_rule_count: int = DEFAULT_PARAMS_VALUE["max_rule_count"],
396 ):
397 """
398 Parameters
399 ----------
400 minsupp_all: Tuple[float, float, float, float]
401 a minimum positive support of a contrast set (p/P). When multiple values are
402 specified, a metainduction is performed; Default and recommended sequence
403 is: 0.8, 0.5, 0.2, 0.1
404 max_neg2pos: float
405 a maximum ratio of negative to positive supports (nP/pN); Default is 0.5
406 max_passes_count: int
407 a maximum number of sequential covering passes for a single minsupp-all;
408 Default is 5
409 penalty_strength: float
410 (s) - penalty strength; Default is 0.5
411 penalty_saturation: float
412 the value of p_new / P at which penalty reward saturates; Default is 0.2.
413 minsupp_new : float = 5.0
414 a minimum number (or fraction, if value < 1.0) of previously uncovered
415 examples to be covered by a new rule (positive examples for classification
416 problems); default: 5,
417 induction_measure : :class:`rulekit.params.Measures` = \
418 :class:`rulekit.params.Measures.Correlation`
419 measure used during induction; default measure is correlation
420 pruning_measure : Union[:class:`rulekit.params.Measures`, str] = \
421 :class:`rulekit.params.Measures.Correlation`
422 measure used during pruning. Could be user defined (string), for example
423 :code:`2 * p / n`; default measure is correlation
424 voting_measure : :class:`rulekit.params.Measures` = \
425 :class:`rulekit.params.Measures.Correlation`
426 measure used during voting; default measure is correlation
427 max_growing : int = 0.0
428 non-negative integer representing maximum number of conditions which can be
429 added to the rule in the growing phase (use this parameter for large
430 datasets if execution time is prohibitive); 0 indicates no limit; default: 0
431 enable_pruning : bool = True
432 enable or disable pruning, default is True.
433 ignore_missing : bool = False
434 boolean telling whether missing values should be ignored (by default, a
435 missing value of given attribute is always considered as not fulfilling the
436 condition build upon that attribute); default: False.
437 max_uncovered_fraction : float = 0.0
438 Floating-point number from [0,1] interval representing maximum fraction of
439 examples that may remain uncovered by the rule set, default: 0.0.
440 select_best_candidate : bool = False
441 Flag determining if best candidate should be selected from growing phase;
442 default: False.
443 complementary_conditions : bool = False
444 If enabled, complementary conditions in the form a = !{value} for nominal
445 attributes are supported.
446 mean_based_regression : bool = True
447 Enable fast induction of mean-based regression rules instead of default
448 median-based.
449 max_rule_count : int = 0
450 Maximum number of rules to be generated; 0 indicates no limit.
451 """
452 super().__init__(
453 minsupp_all=minsupp_all,
454 max_neg2pos=max_neg2pos,
455 max_passes_count=max_passes_count,
456 penalty_strength=penalty_strength,
457 penalty_saturation=penalty_saturation,
458 minsupp_new=minsupp_new,
459 induction_measure=induction_measure,
460 pruning_measure=pruning_measure,
461 voting_measure=voting_measure,
462 max_growing=max_growing,
463 enable_pruning=enable_pruning,
464 ignore_missing=ignore_missing,
465 max_uncovered_fraction=max_uncovered_fraction,
466 select_best_candidate=select_best_candidate,
467 complementary_conditions=complementary_conditions,
468 mean_based_regression=mean_based_regression,
469 max_rule_count=max_rule_count,
470 )
471 self.contrast_attribute: str = None
472 self.model: RuleSet[RegressionRule] = None
474 def fit(
475 self, values: Data, labels: Data, contrast_attribute: str
476 ) -> ContrastSetRuleRegressor: # pylint: disable=arguments-differ
477 """Train model on given dataset.
479 Parameters
480 ----------
481 values : :class:`rulekit.operator.Data`
482 attributes
483 labels : :class:`rulekit.operator.Data`
484 target values
485 contrast_attribute: str
486 group attribute
487 Returns
488 -------
489 self : ContrastSetRuleRegressor
490 """
491 RuleRegressor._validate_labels(self, labels) # pylint: disable=protected-access
492 super().fit(values, labels, contrast_attribute=contrast_attribute)
493 self.contrast_attribute = contrast_attribute
494 return self
496 def predict(self, values: Data) -> np.ndarray:
497 """Perform prediction and returns predicted values.
499 Parameters
500 ----------
501 values : :class:`rulekit.operator.Data`
502 attributes
504 Returns
505 -------
506 result : np.ndarray
507 predicted values
508 """
509 return RuleRegressor.predict(self, values)
511 def score(self, values: Data, labels: Data) -> float:
512 """Return the coefficient of determination R2 of the prediction
514 Parameters
515 ----------
516 values : :class:`rulekit.operator.Data`
517 attributes
518 labels : :class:`rulekit.operator.Data`
519 true target values
521 Returns
522 -------
523 score : float
524 R2 of self.predict(values) wrt. labels.
525 """
526 return RuleRegressor.score(self, values, labels)
528 def __getstate__(self) -> dict:
529 return {
530 **BaseOperator.__getstate__(self),
531 **{
532 "contrast_attribute": self.contrast_attribute,
533 },
534 }
536 def __setstate__(self, state: dict):
537 BaseOperator.__setstate__(self, state)
538 self.contrast_attribute = state["contrast_attribute"]
540 def _get_problem_type(self) -> ProblemType:
541 return ProblemType.CONTRAST_REGRESSION