Coverage for rulekit/stats.py: 93%
30 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-07 11:26 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-07 11:26 +0000
1"""Contains classes describing rules and rulesets statistics and metrics
2"""
5class RuleStatistics:
6 """Statistics for single rule.
8 Attributes
9 ----------
10 p : float
11 Number of positives covered by the rule (accounting weights).
12 n : float
13 Number of negatives covered by the rule (accounting weights).
14 P : float
15 Number of positives in the training set (accounting weights).
16 N : float
17 Number of negatives in the training set (accounting weights).
18 weight : float
19 Rule weight.
20 pvalue : float
21 Rule significance.
22 """
24 def __init__(self, rule):
25 # pylint: disable=invalid-name
26 self.p = rule.weighted_p
27 self.n = rule.weighted_n
28 self.P = rule.weighted_P
29 self.N = rule.weighted_N
30 self.weight = rule.weight
31 self.pvalue = rule.pvalue
33 def __str__(self):
34 """Returns string representation of the object."""
35 return f'(p = {self.p}, n = {self.n}, P = {self.P}, ' + \
36 f'N = {self.N}, weight = {self.weight}, pvalue = {self.pvalue})'
39class RuleSetStatistics: # pylint: disable=too-many-instance-attributes
40 """Statistics for ruleset.
42 Attributes
43 ----------
44 SIGNIFICANCE_LEVEL : float
45 Significance level, default value is *0.05*
48 time_total_s : float
49 Time of constructing the rule set in seconds.
50 time_growing_s : float
51 Time of growing in seconds.
52 time_pruning_s : float
53 Time of pruning in seconds.
54 rules_count : int
55 Number of rules in ruleset.
56 conditions_per_rule : float
57 Average number of conditions per rule.
58 induced_conditions_per_rule : float
59 Average number of induced conditions.
60 avg_rule_coverage : float
61 Average rule coverage.
62 avg_rule_precision : float
63 Average rule precision.
64 avg_rule_quality : float
65 Average rule quality.
66 pvalue : float
67 rule set significance.
68 FDR_pvalue : float
69 Significance of the rule set with false discovery rate correction.
70 FWER_pvalue : float
71 Significance of the rule set with familiy-wise error rate correction.
72 fraction_significant : float
73 Fraction of rules significant at assumed level
74 fraction_FDR_significant : float
75 Fraction of rules significant, set with false \
76 discovery rate correction, at assumed level.
77 fraction_FWER_significant : float
78 Fraction of rules significant, set with familiy-wise \
79 error rate correction, at assumed level.
80 """
81 SIGNIFICANCE_LEVEL = 0.05
83 def __init__(self, ruleset):
84 # pylint: disable=invalid-name
85 self.time_total_s = ruleset.total_time
86 self.time_growing_s = ruleset.growing_time
87 self.time_pruning_s = ruleset.pruning_time
89 self.rules_count = len(ruleset.rules)
90 self.conditions_per_rule = ruleset.calculate_conditions_count()
91 self.induced_conditions_per_rule = ruleset. \
92 calculate_induced_conditions_count()
94 self.avg_rule_coverage = ruleset.calculate_avg_rule_coverage()
95 self.avg_rule_precision = ruleset.calculate_avg_rule_precision()
96 self.avg_rule_quality = ruleset.calculate_avg_rule_quality()
98 self.pvalue = ruleset.calculate_significance(
99 RuleSetStatistics.SIGNIFICANCE_LEVEL)['p']
100 self.FDR_pvalue = ruleset.calculate_significance_fdr(
101 RuleSetStatistics.SIGNIFICANCE_LEVEL)['p']
102 self.FWER_pvalue = ruleset.calculate_significance_fwer(
103 RuleSetStatistics.SIGNIFICANCE_LEVEL)['p']
105 self.fraction_significant = ruleset.calculate_significance(
106 RuleSetStatistics.SIGNIFICANCE_LEVEL)['fraction']
107 self.fraction_FDR_significant = ruleset.calculate_significance_fdr(
108 RuleSetStatistics.SIGNIFICANCE_LEVEL
109 )['fraction']
110 self.fraction_FWER_significant = ruleset.calculate_significance_fwer(
111 RuleSetStatistics.SIGNIFICANCE_LEVEL
112 )['fraction']
114 def __str__(self):
115 return (
116 f'Time total [s]: {self.time_total_s}\n' +
117 f'Time growing [s]: {self.time_growing_s}\n' +
118 f'Time pruning [s]: {self.time_pruning_s}\n' +
119 '\n' +
120 f'Rules count: {self.rules_count}\n' +
121 f'Conditions per rule: {self.conditions_per_rule}\n' +
122 'Induced conditions per rule: ' +
123 f'{self.induced_conditions_per_rule}\n'
124 '\n' +
125 f'Average rule coverage: {self.avg_rule_coverage}\n' +
126 f'Average rule precision: {self.avg_rule_precision}\n' +
127 f'Average rule quality: {self.avg_rule_quality}\n' +
128 '\n' +
129 f'pvalue: {self.pvalue}\n' +
130 f'FDR pvalue: {self.FDR_pvalue}\n' +
131 f'FWER pvalue: {self.FWER_pvalue}\n' +
132 '\n' +
133 (
134 f'Fraction {RuleSetStatistics.SIGNIFICANCE_LEVEL} ' +
135 f'significant: {self.fraction_significant}\n'
136 ) +
137 (
138 f'Fraction {RuleSetStatistics.SIGNIFICANCE_LEVEL} ' +
139 f'FDR significant: {self.fraction_FDR_significant}\n'
140 ) +
141 (
142 f'Fraction {RuleSetStatistics.SIGNIFICANCE_LEVEL} ' +
143 f'FWER significant: {self.fraction_FWER_significant}\n'
144 )
145 )