Coverage for rulekit/stats.py: 93%

1"""Contains classes describing rules and rulesets statistics and metrics

2"""

5class RuleStatistics:

6 """Statistics for single rule.

8 Attributes

9 ----------

10 p : float

11 Number of positives covered by the rule (accounting weights).

12 n : float

13 Number of negatives covered by the rule (accounting weights).

14 P : float

15 Number of positives in the training set (accounting weights).

16 N : float

17 Number of negatives in the training set (accounting weights).

18 weight : float

19 Rule weight.

20 pvalue : float

21 Rule significance.

22 """

24 def __init__(self, rule):

25 # pylint: disable=invalid-name

26 self.p = rule.weighted_p

27 self.n = rule.weighted_n

28 self.P = rule.weighted_P

29 self.N = rule.weighted_N

30 self.weight = rule.weight

31 self.pvalue = rule.pvalue

33 def __str__(self):

34 """Returns string representation of the object."""

35 return f'(p = {self.p}, n = {self.n}, P = {self.P}, ' + \

36 f'N = {self.N}, weight = {self.weight}, pvalue = {self.pvalue})'

39class RuleSetStatistics: # pylint: disable=too-many-instance-attributes

40 """Statistics for ruleset.

42 Attributes

43 ----------

44 SIGNIFICANCE_LEVEL : float

45 Significance level, default value is *0.05*

48 time_total_s : float

49 Time of constructing the rule set in seconds.

50 time_growing_s : float

51 Time of growing in seconds.

52 time_pruning_s : float

53 Time of pruning in seconds.

54 rules_count : int

55 Number of rules in ruleset.

56 conditions_per_rule : float

57 Average number of conditions per rule.

58 induced_conditions_per_rule : float

59 Average number of induced conditions.

60 avg_rule_coverage : float

61 Average rule coverage.

62 avg_rule_precision : float

63 Average rule precision.

64 avg_rule_quality : float

65 Average rule quality.

66 pvalue : float

67 rule set significance.

68 FDR_pvalue : float

69 Significance of the rule set with false discovery rate correction.

70 FWER_pvalue : float

71 Significance of the rule set with familiy-wise error rate correction.

72 fraction_significant : float

73 Fraction of rules significant at assumed level

74 fraction_FDR_significant : float

75 Fraction of rules significant, set with false \

76 discovery rate correction, at assumed level.

77 fraction_FWER_significant : float

78 Fraction of rules significant, set with familiy-wise \

79 error rate correction, at assumed level.

80 """

81 SIGNIFICANCE_LEVEL = 0.05

83 def __init__(self, ruleset):

84 # pylint: disable=invalid-name

85 self.time_total_s = ruleset.total_time

86 self.time_growing_s = ruleset.growing_time

87 self.time_pruning_s = ruleset.pruning_time

89 self.rules_count = len(ruleset.rules)

90 self.conditions_per_rule = ruleset.calculate_conditions_count()

91 self.induced_conditions_per_rule = ruleset. \

92 calculate_induced_conditions_count()

94 self.avg_rule_coverage = ruleset.calculate_avg_rule_coverage()

95 self.avg_rule_precision = ruleset.calculate_avg_rule_precision()

96 self.avg_rule_quality = ruleset.calculate_avg_rule_quality()

98 self.pvalue = ruleset.calculate_significance(

99 RuleSetStatistics.SIGNIFICANCE_LEVEL)['p']

100 self.FDR_pvalue = ruleset.calculate_significance_fdr(

101 RuleSetStatistics.SIGNIFICANCE_LEVEL)['p']

102 self.FWER_pvalue = ruleset.calculate_significance_fwer(

103 RuleSetStatistics.SIGNIFICANCE_LEVEL)['p']

104

105 self.fraction_significant = ruleset.calculate_significance(

106 RuleSetStatistics.SIGNIFICANCE_LEVEL)['fraction']

107 self.fraction_FDR_significant = ruleset.calculate_significance_fdr(

108 RuleSetStatistics.SIGNIFICANCE_LEVEL

109 )['fraction']

110 self.fraction_FWER_significant = ruleset.calculate_significance_fwer(

111 RuleSetStatistics.SIGNIFICANCE_LEVEL

112 )['fraction']

113

114 def __str__(self):

115 return (

116 f'Time total [s]: {self.time_total_s}\n' +

117 f'Time growing [s]: {self.time_growing_s}\n' +

118 f'Time pruning [s]: {self.time_pruning_s}\n' +

119 '\n' +

120 f'Rules count: {self.rules_count}\n' +

121 f'Conditions per rule: {self.conditions_per_rule}\n' +

122 'Induced conditions per rule: ' +

123 f'{self.induced_conditions_per_rule}\n'

124 '\n' +

125 f'Average rule coverage: {self.avg_rule_coverage}\n' +

126 f'Average rule precision: {self.avg_rule_precision}\n' +

127 f'Average rule quality: {self.avg_rule_quality}\n' +

128 '\n' +

129 f'pvalue: {self.pvalue}\n' +

130 f'FDR pvalue: {self.FDR_pvalue}\n' +

131 f'FWER pvalue: {self.FWER_pvalue}\n' +

132 '\n' +

133 (

134 f'Fraction {RuleSetStatistics.SIGNIFICANCE_LEVEL} ' +

135 f'significant: {self.fraction_significant}\n'

136 ) +

137 (

138 f'Fraction {RuleSetStatistics.SIGNIFICANCE_LEVEL} ' +

139 f'FDR significant: {self.fraction_FDR_significant}\n'

140 ) +

141 (

142 f'Fraction {RuleSetStatistics.SIGNIFICANCE_LEVEL} ' +

143 f'FWER significant: {self.fraction_FWER_significant}\n'

144 )

145 )