Coverage for rulekit/stats.py: 93%

30 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-07 11:26 +0000

1"""Contains classes describing rules and rulesets statistics and metrics 

2""" 

3 

4 

5class RuleStatistics: 

6 """Statistics for single rule. 

7 

8 Attributes 

9 ---------- 

10 p : float 

11 Number of positives covered by the rule (accounting weights). 

12 n : float 

13 Number of negatives covered by the rule (accounting weights). 

14 P : float 

15 Number of positives in the training set (accounting weights). 

16 N : float 

17 Number of negatives in the training set (accounting weights). 

18 weight : float 

19 Rule weight. 

20 pvalue : float 

21 Rule significance. 

22 """ 

23 

24 def __init__(self, rule): 

25 # pylint: disable=invalid-name 

26 self.p = rule.weighted_p 

27 self.n = rule.weighted_n 

28 self.P = rule.weighted_P 

29 self.N = rule.weighted_N 

30 self.weight = rule.weight 

31 self.pvalue = rule.pvalue 

32 

33 def __str__(self): 

34 """Returns string representation of the object.""" 

35 return f'(p = {self.p}, n = {self.n}, P = {self.P}, ' + \ 

36 f'N = {self.N}, weight = {self.weight}, pvalue = {self.pvalue})' 

37 

38 

39class RuleSetStatistics: # pylint: disable=too-many-instance-attributes 

40 """Statistics for ruleset. 

41 

42 Attributes 

43 ---------- 

44 SIGNIFICANCE_LEVEL : float 

45 Significance level, default value is *0.05* 

46 

47 

48 time_total_s : float 

49 Time of constructing the rule set in seconds. 

50 time_growing_s : float 

51 Time of growing in seconds. 

52 time_pruning_s : float 

53 Time of pruning in seconds. 

54 rules_count : int 

55 Number of rules in ruleset. 

56 conditions_per_rule : float 

57 Average number of conditions per rule. 

58 induced_conditions_per_rule : float 

59 Average number of induced conditions. 

60 avg_rule_coverage : float 

61 Average rule coverage. 

62 avg_rule_precision : float 

63 Average rule precision. 

64 avg_rule_quality : float 

65 Average rule quality. 

66 pvalue : float 

67 rule set significance. 

68 FDR_pvalue : float 

69 Significance of the rule set with false discovery rate correction. 

70 FWER_pvalue : float 

71 Significance of the rule set with familiy-wise error rate correction. 

72 fraction_significant : float 

73 Fraction of rules significant at assumed level 

74 fraction_FDR_significant : float 

75 Fraction of rules significant, set with false \ 

76 discovery rate correction, at assumed level. 

77 fraction_FWER_significant : float 

78 Fraction of rules significant, set with familiy-wise \ 

79 error rate correction, at assumed level. 

80 """ 

81 SIGNIFICANCE_LEVEL = 0.05 

82 

83 def __init__(self, ruleset): 

84 # pylint: disable=invalid-name 

85 self.time_total_s = ruleset.total_time 

86 self.time_growing_s = ruleset.growing_time 

87 self.time_pruning_s = ruleset.pruning_time 

88 

89 self.rules_count = len(ruleset.rules) 

90 self.conditions_per_rule = ruleset.calculate_conditions_count() 

91 self.induced_conditions_per_rule = ruleset. \ 

92 calculate_induced_conditions_count() 

93 

94 self.avg_rule_coverage = ruleset.calculate_avg_rule_coverage() 

95 self.avg_rule_precision = ruleset.calculate_avg_rule_precision() 

96 self.avg_rule_quality = ruleset.calculate_avg_rule_quality() 

97 

98 self.pvalue = ruleset.calculate_significance( 

99 RuleSetStatistics.SIGNIFICANCE_LEVEL)['p'] 

100 self.FDR_pvalue = ruleset.calculate_significance_fdr( 

101 RuleSetStatistics.SIGNIFICANCE_LEVEL)['p'] 

102 self.FWER_pvalue = ruleset.calculate_significance_fwer( 

103 RuleSetStatistics.SIGNIFICANCE_LEVEL)['p'] 

104 

105 self.fraction_significant = ruleset.calculate_significance( 

106 RuleSetStatistics.SIGNIFICANCE_LEVEL)['fraction'] 

107 self.fraction_FDR_significant = ruleset.calculate_significance_fdr( 

108 RuleSetStatistics.SIGNIFICANCE_LEVEL 

109 )['fraction'] 

110 self.fraction_FWER_significant = ruleset.calculate_significance_fwer( 

111 RuleSetStatistics.SIGNIFICANCE_LEVEL 

112 )['fraction'] 

113 

114 def __str__(self): 

115 return ( 

116 f'Time total [s]: {self.time_total_s}\n' + 

117 f'Time growing [s]: {self.time_growing_s}\n' + 

118 f'Time pruning [s]: {self.time_pruning_s}\n' + 

119 '\n' + 

120 f'Rules count: {self.rules_count}\n' + 

121 f'Conditions per rule: {self.conditions_per_rule}\n' + 

122 'Induced conditions per rule: ' + 

123 f'{self.induced_conditions_per_rule}\n' 

124 '\n' + 

125 f'Average rule coverage: {self.avg_rule_coverage}\n' + 

126 f'Average rule precision: {self.avg_rule_precision}\n' + 

127 f'Average rule quality: {self.avg_rule_quality}\n' + 

128 '\n' + 

129 f'pvalue: {self.pvalue}\n' + 

130 f'FDR pvalue: {self.FDR_pvalue}\n' + 

131 f'FWER pvalue: {self.FWER_pvalue}\n' + 

132 '\n' + 

133 ( 

134 f'Fraction {RuleSetStatistics.SIGNIFICANCE_LEVEL} ' + 

135 f'significant: {self.fraction_significant}\n' 

136 ) + 

137 ( 

138 f'Fraction {RuleSetStatistics.SIGNIFICANCE_LEVEL} ' + 

139 f'FDR significant: {self.fraction_FDR_significant}\n' 

140 ) + 

141 ( 

142 f'Fraction {RuleSetStatistics.SIGNIFICANCE_LEVEL} ' + 

143 f'FWER significant: {self.fraction_FWER_significant}\n' 

144 ) 

145 )