{
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
},
"orig_nbformat": 2,
"kernelspec": {
"name": "rulekit",
"display_name": "Python 3.8.6 64-bit ('env': venv)"
}
},
"nbformat": 4,
"nbformat_minor": 2,
"cells": [
{
"source": [
"# Classification"
],
"cell_type": "markdown",
"metadata": {}
},
{
"source": [
"This notebook presents example usage of package for solving classification problem on `seismic-bumps` dataset. You can download dataset [here](https://raw.githubusercontent.com/adaa-polsl/RuleKit/master/data/seismic-bumps/seismic-bumps.arff).\n",
"\n",
"This tutorial will cover topics such as: \n",
"- training model \n",
"- changing model hyperparameters \n",
"- hyperparameters tuning \n",
"- calculating metrics for model \n",
"- getting RuleKit inbuilt "
],
"cell_type": "markdown",
"metadata": {}
},
{
"source": [
"## Summary of the dataset"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" genergy gimpuls goenergy goimpuls nbumps \\\n",
"count 2.584000e+03 2584.000000 2584.000000 2584.000000 2584.000000 \n",
"mean 9.024252e+04 538.579334 12.375774 4.508901 0.859520 \n",
"std 2.292005e+05 562.652536 80.319051 63.166556 1.364616 \n",
"min 1.000000e+02 2.000000 -96.000000 -96.000000 0.000000 \n",
"25% 1.166000e+04 190.000000 -37.000000 -36.000000 0.000000 \n",
"50% 2.548500e+04 379.000000 -6.000000 -6.000000 0.000000 \n",
"75% 5.283250e+04 669.000000 38.000000 30.250000 1.000000 \n",
"max 2.595650e+06 4518.000000 1245.000000 838.000000 9.000000 \n",
"\n",
" nbumps2 nbumps3 nbumps4 nbumps5 nbumps6 nbumps7 \\\n",
"count 2584.000000 2584.000000 2584.000000 2584.000000 2584.0 2584.0 \n",
"mean 0.393576 0.392802 0.067724 0.004644 0.0 0.0 \n",
"std 0.783772 0.769710 0.279059 0.068001 0.0 0.0 \n",
"min 0.000000 0.000000 0.000000 0.000000 0.0 0.0 \n",
"25% 0.000000 0.000000 0.000000 0.000000 0.0 0.0 \n",
"50% 0.000000 0.000000 0.000000 0.000000 0.0 0.0 \n",
"75% 1.000000 1.000000 0.000000 0.000000 0.0 0.0 \n",
"max 8.000000 7.000000 3.000000 1.000000 0.0 0.0 \n",
"\n",
" nbumps89 senergy maxenergy class \n",
"count 2584.0 2584.000000 2584.000000 2584.000000 \n",
"mean 0.0 4975.270898 4278.850619 0.065789 \n",
"std 0.0 20450.833222 19357.454882 0.247962 \n",
"min 0.0 0.000000 0.000000 0.000000 \n",
"25% 0.0 0.000000 0.000000 0.000000 \n",
"50% 0.0 0.000000 0.000000 0.000000 \n",
"75% 0.0 2600.000000 2000.000000 0.000000 \n",
"max 0.0 402000.000000 400000.000000 1.000000 "
],
"text/html": "
\n\n
\n \n \n | \n genergy | \n gimpuls | \n goenergy | \n goimpuls | \n nbumps | \n nbumps2 | \n nbumps3 | \n nbumps4 | \n nbumps5 | \n nbumps6 | \n nbumps7 | \n nbumps89 | \n senergy | \n maxenergy | \n class | \n
\n \n \n \n | count | \n 2.584000e+03 | \n 2584.000000 | \n 2584.000000 | \n 2584.000000 | \n 2584.000000 | \n 2584.000000 | \n 2584.000000 | \n 2584.000000 | \n 2584.000000 | \n 2584.0 | \n 2584.0 | \n 2584.0 | \n 2584.000000 | \n 2584.000000 | \n 2584.000000 | \n
\n \n | mean | \n 9.024252e+04 | \n 538.579334 | \n 12.375774 | \n 4.508901 | \n 0.859520 | \n 0.393576 | \n 0.392802 | \n 0.067724 | \n 0.004644 | \n 0.0 | \n 0.0 | \n 0.0 | \n 4975.270898 | \n 4278.850619 | \n 0.065789 | \n
\n \n | std | \n 2.292005e+05 | \n 562.652536 | \n 80.319051 | \n 63.166556 | \n 1.364616 | \n 0.783772 | \n 0.769710 | \n 0.279059 | \n 0.068001 | \n 0.0 | \n 0.0 | \n 0.0 | \n 20450.833222 | \n 19357.454882 | \n 0.247962 | \n
\n \n | min | \n 1.000000e+02 | \n 2.000000 | \n -96.000000 | \n -96.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.0 | \n 0.0 | \n 0.0 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n
\n \n | 25% | \n 1.166000e+04 | \n 190.000000 | \n -37.000000 | \n -36.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.0 | \n 0.0 | \n 0.0 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n
\n \n | 50% | \n 2.548500e+04 | \n 379.000000 | \n -6.000000 | \n -6.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.0 | \n 0.0 | \n 0.0 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n
\n \n | 75% | \n 5.283250e+04 | \n 669.000000 | \n 38.000000 | \n 30.250000 | \n 1.000000 | \n 1.000000 | \n 1.000000 | \n 0.000000 | \n 0.000000 | \n 0.0 | \n 0.0 | \n 0.0 | \n 2600.000000 | \n 2000.000000 | \n 0.000000 | \n
\n \n | max | \n 2.595650e+06 | \n 4518.000000 | \n 1245.000000 | \n 838.000000 | \n 9.000000 | \n 8.000000 | \n 7.000000 | \n 3.000000 | \n 1.000000 | \n 0.0 | \n 0.0 | \n 0.0 | \n 402000.000000 | \n 400000.000000 | \n 1.000000 | \n
\n \n
\n
"
},
"metadata": {},
"execution_count": 28
}
],
"source": [
"from scipy.io import arff\n",
"import pandas as pd\n",
"\n",
"df_full = pd.DataFrame(arff.loadarff('../../../../../data/seismic-bumps/seismic-bumps.arff')[0])\n",
"df_full['class'] = df_full['class'].astype(int)\n",
"df_full.describe()"
]
},
{
"source": [
"### Decision class distribution"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": "",
"image/svg+xml": "\r\n\r\n\r\n\r\n",
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAV0AAADsCAYAAADXaXXTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAkh0lEQVR4nO3dd5hU5aE/8O85U/fMsI0FdqmDMFJFIOBEUQSNiC7GgiUWTCxJNPHGn0luMk+SJ5eb3Jtwfz9vFKwxxhhNgiUa2yT2ggWHIl2BQVjKLm37Ti/n/P44A4iCzOzOzDtn5vt5nnk2kS3fbd995z3v+x5J0zQQEVFhyKIDEBGVE5YuEVEBsXSJiAqIpUtEVEAsXSKiAmLpEhEVEEuX6BgkSXpEkqQDkiRtFJ2FSgtLl+jYHgUwV3QIKj0sXaJj0DRtGYB20Tmo9LB0iYgKiKVLRFRALF0iogJi6RIRFRBLl+gYJElaCmA5gDGSJO2RJOkm0ZmoNEg82pGIqHA40iUiKiCWLhFRAbF0iYgKyCw6ANEhLq+vFkA9gEHHeFkD/efVDMB8qfzunrusDwwDkASQ+MzLVgAtAJo/87IZC7u4u4yKAkuXCsrl9ZkBjAUwOf04FcAY6MVqzfT9mFLRdem3zczCqij0Ej5UxJsBrAGwBgu7dmX8foj6iKVLeePy+qwAPACm4EjBTgBg6+v7TkaCg1GR1ZvYAZyUfhxtYVUbgLUAVgB4H8ByjowpX1i6lFMur+8kABcAmKtp2mxJkhz5+DiSbIrn8N31B3Bu+gEAGhZWbYFewP8C8C8s7Arn8ONRGWPpUp+4vL4KALOhl+xcSZLch/5NkiRxwfpGgj4FMhbATQAiWFj1CoBnAbyIhV2dArORwbF0KWvpor0EwLWapp0rSZIdMHTJnsihz/cSAAksrHoLegE/h4Vd+wXmIgNi6VLGXF7fTE3TvgloV0iS3A8o6aI9HguAOenH/VhY9QGApwE8joVdHUKTkSGwdOlLuby+/gC+panqLZIsj9ZLtuyK9nhkAGemH7/FwqqlAO7Dwq41YmNRMWPp0jG5vL5TNVX9KSRpviRJVknmPpoTUKDP/96EhVXLr4//9M5l6qnPNy1qTIkORsWFpUtHGfGTF6dqavI3kskyR5JlDml7IaaZBy1TJ/0dwC6X13cXgIebFjWGROei4sDSJQDAiJ+8OF1LJRbJFts5kpzxHgU6hnvDc4IwSRKAEQDuBvAfLq9vCYA7mxY1BoWGI+F4tGOZG/GTFz1aKvE/ssV2tugs2bg0+kLzXdVPDBGd4/N6EnJiWvz3UszkONaAZj+A/4A+8uW0Q5niRF2ZGvGTF04Z/qNn35Vk+UOjFW4xey50SstxChfQtzo/CGC9y+ubV8BYVEQ4vVBmht/xtFNLJZfIFc5vyhYb/+jmUFKFdq95Qf8MXnU8gBddXt9bAH7ctKjxozxHoyLCX7oyMvT7f74Bsmm3Sam8QeJyhJx7Lzh4737zYGcWbzIbwCqX1/e4y+sbmq9cVFz4i1cGht32lzHDbl+60tyv7hHZYqsWnadULcbVvXnmKAG4DsBGl9d3Q44jURHi9EIJc3l91mSw/f+alKrvS7KJ3+s82hJytq+xfmVgH95FFYBHXF7fpQC+07SocV+OolGR4Ui3RA359oNnqLHwDrOz9nYWbv49mGjM1SlkF0Ef9V6Zo/dHRYalW2IUt0duuGHJ/zPXDH5HtimDRecpB60xc+Q527xcLl/rD+BJl9f3ZHobNpUQlm4JqZv3w4aac769wjZo1I85ui2cv0a+ul+TTPnYvXcl9FEvl5eVEJZuiRh01a/mVYz2bLLUNHxFdJZyEk1JqYct19Tn8UPUQ19e9muX18dt2SWApWtwittjavjmXYvtIyY/Z7I7a0TnKTcv94xu6TFV2wvwoX4B4BmX15eXO3FQ4bB0Daz2a98dVHPOt1fYGk7+gSSbTKLzlBtVAxabFmSzLrevLgXwgcvrcxXwY1KOsXQNqvZr3/E4xp651lLTMFV0lnK1Olh3YIdldKGfXUwCsNLl9c0s8MelHGHpGozi9ki1c2693jnx3NdMztp8ziXSCdyrzlcFfeg6AK+7vL7vCPr41AcsXQNR3B6TcvIZv3Cect5Dst3ZT3SecrYrbO9+x3a2yD96FgC/d3l9S1xeH3+PDYTfLINQ3B67MvasxY7xs34pW2w20XnK3R9j53aKzpD2bwD+7PL6OKdvECxdA1DcHqdjwuxHHOPOvlUymbn+VrDuuBxfarusmDaeXAfgCZfXZxEdhE6MpVvkFLen2jlpzlJlzJnfkGSeDFYMnglP3huXK4rtj9/lAP7h8vr4LKjI8Ze4iCluzwBl3MzHK0Z75klleK/zYpRQod1nXlAnOsdxNEJfy8v7LRUxlm6RUtyegRUnTXvIMXbmhezb4vFOz7CWVvOgYt6g0AjgKU41FC+WbhFS3J4a27CJv3NOmjOPUwrFZTGuMcLT94sBLHV5fcU2BUJg6RYdxe1xWuvdv62cOu9yXjQrLpuClW0bbKcW69TC580HcK/oEPRFLN0iorg9dkv/4b+snH7JAslsNcKIqqw8kLwoIjpDlr7r8vpuFx2CjsbSLRKK22MxV9X/uPKrl98iWysU0XnoaAei5vBLtrlFd8v3DPyvy+u7QHQIOoKlWwQUt8ck2ZSbq06/4g4Td5oVpcejMw4iP2fm5psJ+hreCaKDkI6lK5ji9kgArqycdskPTY6aWtF5jKYzquHyp8IYe28Q4+4LYvnu5Bde5+2mJCY/GMSE+4M4+9EQAOBgSMWZj4Qw8f4gntucOPy6Fz8RRkvP0UcqRJJS8hHL1UY+56ISwEsur2+A6CDEG1MWg7OVcTNvs9WPHi06iBHd/nIUc0eb8fcrrYinNIQTR/97Z1TD93xRvHydguFVMg6E9EJdujGBW6ZZcNk4Cy78axiXjLXgxS0JTKmXMbjf0WORl4JjWkL2yuGF+pzyxAV988S5TYsaY6LDlDOOdAVS3J6RloEjb3eMPes00VmMqCuqYdnOJG6aoi9JtZokVNuPngH424YELhtnxvAq/Ud9oEN/aZElhBNALAmYZCCparjbH8dPZhx9/VLVgCWmBVUF+HQKYQaAP4gOUe5YuoIobk8/2eb4YdX0S2fxfma9s6NTxQBFwg3PRzHl90Hc/EIEobh21OtsbVPREdEw69EQvvJQEI+tiwMArjnFgue3JHHe4yH87Ewb7l8Zx4JJFiiWo0vb3zNw327LyFIpXQBY4PL6bhIdopyxdAVQ3B4ZwI1Vp185T7Y7q0XnMaqkCny0V8Wt0yxY810nHBYJi96Lfe51NKzeq8J3jYJXrlPw62VxbG1LocouwXeNglXfcWJqgwkvbk3i8vEWfPuFCC5/Knx4bniJdoWITy3f7nZ5fSeJDlGuWLpizHVOmnOdpf8wl+ggRja0UsLQSgmeofoThcvHm/HRPvVzryPj/FEmOKwS6hQZM4ebsO5zr/PrZTH8/Cwblm5I4MzhJvz5kgosfCeG7eGKruW2GUa+gHY8TgCP8RxeMfhFLzDF7RlrrXd/v2L0abzNTh/VO2UMq5KxpTUFAHhjRxLj647+kb54jBnv7U4hqWoIJzT4m1MYN+DI6wTaUtjTrWKWy4xwQoMsAZIERBLAw/E5XQX9hAprBoCfig5Rjli6BaS4PTWQTbf1m9p4miTxTIVcuOcCO659NoJJDwSxdp+Kn51lw4Or4nhwlT53O26ACXNHmTHpgRBO+0MIN0+1YOLAI+d9//zNGP77HP3i2dWnWPDAqgSm/yGEG6faE09ZLzXiZohs/KfL65ssOkS5kTRNO/FrUZ+l53Hv6DflwisqTprmEZ3H6C6NvtB8V/UTeSvFhzqn7/yN/Y4R+Xr/RWQTgGlNixqjooOUC462Cme6ubp+ht015Suig9CXi6tQH7QsKJeNBBMA/FZ0iHLC0i0Axe2pBHB95bRLpnB5WPF7o8fV0m6qK6fzL253eX1niA5RLli6hTFfGTNjvLlqoEt0EDqxxdK15VS4ACABuMvl9RnxbAnDYenmmeL2jJXt/eYo3HVmCOuD1a2brRPK8QyM0wBcKzpEOWDp5pHi9tgA3Fg5/ZJTZLO13EZPhnRf8uJyPpfgty6vjz+necbSza+51nr3OMsAF4/VM4C9UUvoZdt5pb5M7MsMBfBj0SFKHUs3TxS3ZwiAi52nfG0CbyxpDH+OnN0qSWX/K/ETl9c3WHSIUlb2P2H5cOiMXPvwSXXmygEjReehEwslpMSjtitZNoADwG9EhyhlLN38GA3gVMf4WdzqaxAvhCa0RGUnb1uuu97l9XE9eZ6wdHPs0Ci3YtT0OpOjupznBw0jpUK7x7SgRnSOIiKBo928Yenm3gQAbuXkM6aIDkKZ+SBYv6/FMqxSdI4iM8fl9fGZWh6wdHMoPcq93D5yapVJqeL8oEEs0a7i78GxeUUHKEX8Ycut8QBcjjEzOB9mEIGQo3OlzTNIdI4iNd/l9fHefTnG0s2R9Cj3MtvQ8Q6To2aY6DyUmYcSc3tEZyhiMoAfiQ5Rali6uTMGwKiK0R636CCUmfaYKfqs7SJe7Pxy17u8Pl5kzCGWbu7Mle3OhKVmMHefGcTSyGn7U5KVvwNfTgFws+gQpYQ/cDmguD11ACY5xs0cwqMbjSGWgvqQ+bqBonMYxG0ur8904lejTLB0c8MDQLM2jOESG4N4LTiqpctcUyE6h0EMBzBPdIhSwdLtI8XtMQOYYxs6wWqq6Mer4AagasDd0nUO0TkMhsc+5ghLt+/GAqisGDWNc7kGsTZYe2CbdQwvDmWn0eX1OUWHKAUs3b47V7I5kpbaIRNFB6HM3Je6NCU6gwEpAL4uOkQpYOn2geL21AI41THmzAZJNvOwFAPYE7H1vG6b3SA6h0F9Q3SAUsDS7ZvTAGjWQSeNER2EMvOn2Ox2npnba+dzzW7f8aevl9I70OZIFnuXyVnrEp2HTiyYkBN/sV7BzRC9ZwVwmegQRsfS7b0hAKorRk6t59pcY3g2dEpLTK7g96pvOMXQRyzd3hsLANb60SeLDkInllSh3Wde0F90jhIw2+X1cVNJH7B0e+90AF3m6nqetWAA7waH7N1vHswlT31nAnC+6BBGxtLtBcXtqQQw0jp4TIVssfPwawNYjKu5uiR3zhYdwMhYur3jBgD7kPEc5RrA5pCzfa116gDROUrITNEBjIyl2zvTAMQs/YdxPtcAHkw0hkVnKDFul9fHtc69xNLNUvqshSmyzRGUlSouPypyrTFz5HnbPH6fco9TDL3E0s2eC4DF2nBynSRJkugw9OX+Ejl9vyaZ+H3KPZZuL7F0szcagGSpHcqnV0UukpRSf7RcXS86R4nivG4vsXSzNxZA0Fw5gHf7LXIvh9wtPaZqu+gcJWq8y+vjxcleYOlmIb31dxSAoMlRzZFuEVM1YLG8oJ/oHCXuLNEBjIilm51qAA7J5oBkc3B3UxFbFazb32QZVS06R4k7RXQAI2LpZqcBgGqrH13Pi2jF7V71clV0hjLAJZO9wNLNzhAAMi+iFbedYXv3MttMfo/yj6XbCyzd7IwDEDJVDeQvdBH7Y/xrnaIzlAmWbi+wdDOUvog2GkCPqaKSV22LVFdcji+1zudmiMKodHl9XJKXJZZu5ioBOAHEZWtFteAsdBzPhCfvTcg2k+gcZYSj3SyxdDNXB0CVLHazZLYqosPQFyVUaPebF9SJzlFmWLpZYulmrgqAZK5uqBIdhI7t7Z7hLa3mQQ7ROcoM7w+YJZZu5moASObKOpZukVosXWMTnaEM8XjTLLF0M1cPIC5XVPLuA0VoU7CybaN1EqcWCo+37skSSzdz9QBiss3Bp69F6P7k1yOiM5Qp7szMEks3czUA4rJN4Ui3yOyPWsI+2/lcJiZGregARsPSzVwVgLhkreBIt8g8HjnzIHhmrig1Lq+PX/sssHQzoLg9JuhrdBOSycKLNUUknJSSj1i/wR2C4pgA8DS3LLB0M+MAoAGAJEn8mhUBTVNNAOALjm0Jm/pZRecpc1y3ngUWSGasSJcuwNPFikFKVStUDVhsur5adBYCp9yywNLNjIxDpcuRblFQVc36Yc/AfXssIypFZyGWbjZYIJk5MrrlObpFQZVMySXalfxeFIcK0QGMxCw6gEF85o8TR7rFYL11stxsdQ0SnYMAAEnRAYyEBZIZjnSLTLPVxae0xYMbU7LA0s3M4a+TxJEu0eexdLPAAskMR7pEx8fSzQJLNzOHv06apqZEBiEqQizdLLB0M3N4dKslYmGRQYiKEEs3CyzdzBwe3WpJli7RZ6hNixrjokMYCUs3M4eLVo1HWLpER3CUmyWWbmZCSH+ttHgkJDgLUTHhICRLLN3MxKBvA5bUWJg/ZERHNIsOYDQs3QyEA34NQA8AixoNsnSJjmgSHcBoWLqZ6wZgUaM9LF2iI3aIDmA0LN3MdQGwpkKdnNMlOoKlmyWWbuY6AViSPQeDmqaqosMQFYkm0QGMhqWbuQ4AFqSSqhoLtYoOQ1QkONLNEks3cweg3w8Karj7gOAsRMWiSXQAo2HpZu4AABUAUsF2li4R0Nq0qDEoOoTRsHQzdwDpMxiSXftZukTAdtEBjIilm7kuAHEA5kTbbpYuEbBKdAAjYulmKL1BYhcAR6Jtd4eWSvKQDyp3H4oOYEQs3ex8ivSdT9VY8KDgLESiLRcdwIhYutnZifTNPFPhLk4xUNnSNO1g06LGbaJzGBFLNzuHVzAkO/fxoA8qW5IkcWqhl1i62Tm8VjfWvJmLwqmcsXR7iaWbhXDAHwJwEICSaN3ZriaiPaIzEQnC+dxeYulmbw2AagBIdbdytEtlR9O0FIAVonMYFUs3e58gPcWQaNvF0qWyI0nSuqZFjTxtr5dYutnbgfTOtMjO9ds0TRMch6jgXhIdwMhYulkKB/xdAPYBcKS6DwTVaM8+0ZmICuxF0QGMjKXbOyuQntdNdrRwrSKVDU3TmgGsFp3DyFi6vfMx0l+7WMuWrYKzEBWMJEkvNS1q5JxaH7B0e2cHgCQAU3Tnut1qPNIlOhBRgfxddACjY+n2QjjgTwDYAKAWAOIHm9aLTUSUf5qmtQJ4S3QOo2Pp9t77ACoAIPLpynWCsxDlnSRJzzQtakyJzmF0LN3e+xj6+bqWxMGmtlSoY4/oQER59pToAKWApdtL4YA/BuBdAAMAILY3sFZoIKI80jR1F4C3RecoBSzdvvkQgAUAwls/2KSpqaTgPER5IUnyvU2LGlXROUoBS7dvdgBoBeBQI93RZNf+LaIDEeWapqkxAH8UnaNUsHT7IBzwqwBeR3oVQ3T3Rl5Qo9KjaX9rWtTYLjpGqWDp9t1q6GcxSJFtK7ap8Uin4DxEOSXJprtFZyglLN0+Cgf8rQC2AqiBpmrR3RvfF52JKFe0VHJ506JGrkPPIZZubrwGoB8AhDa+sUZNxIKC8xDlhGQy3yU6Q6lh6ebGOgBtAJxaMp6KNX/CU/XJ8DQ1tQ/AP0TnKDUs3RwIB/xJAM8C6A8AwY1vrNKSiYjYVER9JEl3Ny1q5DLIHGPp5s4qAN0AFC0Wisf2bvWLDkTUW1oq2SpJ8hLROUoRSzdHwgF/HMBzSO9QC2583a+lknGhoYh6SUslFzYtauSztTxg6ebWcgARAHY13BWN7/90pehARNlSE7HdstX+oOgcpYqlm0PhgD8K4AUAAwEguPH15VoqGRObiihLmvbvPE0sf1i6ufcu9NPHrKmetlB014a3xcYhypwaj27c9bv5T4rOUcpYujkWDvhD0O+WWg8APWv/uSIV7TkgNhVRhmT5B6IjlDqWbn68AaALQD+oKTW06e1/ig5EdCJqPPLurjsv5Z0h8oylmwfhgD8C4DEAdQAQbVqzM9HevFFsKqLj01Q1JcnmW0XnKAcs3fxZC/0+aoMAoOejl17lEjIqVmqk656dd16ySXSOcsDSzZNwwK8B+BsAGwBzsmt/T3T3xncExyL6glQ0uEeNR/9ddI5yIWkab2GfT4rbMx9AI4BdMJnlurk/uFW2O+tE58qX7lXPI7juFUADnKeej8rpF6Nz2eMIb/MDkgSTUo3+F/4fmPv1P+bbq7EwWh6+FcrJX0XtebdCSyZw4NlfI9XTin5TGtFvaiMAoO3le+CcfAFs9aML+emVHE1V1UT77lktD3/vXdFZygVHuvn3TwA9AJxIJdXghtdf0Er0L138YBOC615B/fW/Q8ON9yDy6QokOlpQ6ZmPwTfei8E33IOKUdPR9cHS476Pzncfh23YxMP/P7LjI9iGjkfDjfciuOlN/eMc2A5NVVm4OZDsPvAoC7ewWLp59pmLagMAILpr/e5Yy+aS/CFPtO2BtWEMZIsdkmyCbdhEhLd+ANmmHH4dLRGFfub7F8X2bUMq1ImKkVMO/zdJNkFLxIBUCkj/qep89y+oPuu6fH4qZUGNBlvUaJAXzwqMpVsYH0G/sDYYALpXPPtOKtzZLDRRHljrRiC2ZxNSkW6oiSgi21ch1d0KAOhY9hj23P8thD5++5iFqWkqOt58GDWzbzrqv9tHTkGy6wD2Pv4jVE67COGAH9ZBo447PUGZ0TRVTQbbrt376O28uFtgnNMtEMXtqQHwX9DPZgha6kbUVp917S2SbLYIjpZTPeteRXCND5LFDkvdcEgmC2q/9p3D/961/CloyQSqz7r2qLfrXv0itGQMVZ7LEdzwOuL7Aqg97+hBmJZKYv9Tv8TAy36Bzvf+ilT3QTgmngvF7SnI51ZKEp37/tz84E3fEp2jHHGkWyDhgL8DwEPQz2WQE60728NbP3xJcKyc63fqHDR8azHqr/0fyHYnLLVDjvp3x4RZCG/94h2NYi2b0bPahz0P3IiOtx5BcOOb6Hj70aNep2eND86J5yDWsgWyzYG6i3+K7pU8YztbqVDn9lSw/duic5Qrs+gAZWY99LsHzwKwK7TpzfWW/sNGWAeMmCo2Vu6kQp0wOaqR7D6A8NblaFhwJxLtzYfLNxzww1I79AtvN+CiIyuWDo10a2Z968j7jQYR2bYSA6/6FSLbVgCSBEgSNJ4nlBU1Ho3E9gW+fuDphQnRWcoVS7eAwgG/prg9TwMYA/3C2sGu5U/8q/a87w0xVfQbJDheThx87jdQIz2AbELtebdAtjvR9q8lSLTvASQZ5soBqD3/+wCA2N4Agmv/hf4XnHi7f9f7S1F1xpWQJBkVI6ei5yMf9v7xNjinXJDvT6lkaKqqRXdvvOPA0wu5CUIgzukKoLg9QwAsBNAOIGKpG1FbPeOamyWzpUJsMiplsZbNj3ctf+qb6Y07JAjndAUIB/zNAB4B0ID0/G7P2n8+oakqzzClvIi37lrdtfypm1m44rF0xfkQ+mlkwwEgunPdrtDmZc/ymQflWrKntSW06c156VtKkWAsXUE+czbDBgBDASD8ybKPozvXvio0GJUUNRYKhrd+cGnX8qf3ic5COpauQOGAPwHgQQB7ceg0stUvLo/t28Y7CVOfqfFIJPTJspvaX/v9CtFZ6AiWrmDpO03cBSAKoD8AdH2w9JVEx95PhAYjQ1MT0Whw3Ss/j3y68mnRWehoLN0iEA742wH8LwArgH7QNK3z3ceeTYU6dguORgakJmOxnjX/vDO6a/0SXjgrPizdIhEO+PdAH/HWArBriViyY9njS1PhrhbB0chAtGQ8Hlz78pLY7o2/Cgf8XA1ThFi6RSQc8G8G8HvoB+NY1HBnpOOtRx5LBtt3CY5GBqClEsme9a8+EN257hfp6wVUhLg5oggpbs/5AK4FsAdAXLLYzTWzbrjKXDmAB8jSMWmpZCq4/tWHIttX3REO+Lk3uohxpFucXgXwJwBDANi1RDTZ8ebDTyQ6Wj4WnIuKkJZKJIIbXn80sn3Vj1i4xY8j3SKmuD2nA7gFwH4AYUiyVD3z+q9b64ZPFpuMioUaD4e6V734p/jeLd70ShgqcizdIqe4PVMB/BuANgBBAKg689oLbINGnSY0GAmXCnW0dS1/6tFk1/6F4YA/KDoPZYalawCK2zMBwB0AutMPVJ522Uzb0AmzJenYt76h0pZo37On64Mnl6ix0H3hgD8sOg9ljqVrEIrb4wbwY+ibKDoAoML9Vbdz/OzLJLPFLjQcFVS0+ZPN3f5n/gua+mQ44E+KzkPZYekaiOL2uAD8CIAFwD4AsPQfVlPpmX+VqaKyJM7jpePTNE0LB5avDG14/WcA3uTGB2Ni6RqM4vb0B3ArgNEAdgFQJYvdXD3j6oss/YdNEpuO8kVNxCLB9a8uizat+Wk44F8nOg/1HkvXgBS3xwrgKgDnAWiBPuUA5+QLp1ecNHWuJMlcClhCEp17d3X7n3k1FWz/73DA3yQ6D/UNS9egFLdHAnA6gJsAhKDfhQL24ZOGOSfPvUK22PuJzEd9p6mpZGTbitXBDa+9AuCecMDfKjoT9R1L1+AUt2cEgB8AqATQDAAmZ61SOf3SCy21QyYIDUe9lop0t3av+McHidadfwXwHA8gLx0s3RKguD2V0Ee8k6FvHU4AgDL2rHHKmBmNstnqEBiPsqBpGuItWzZ1r3ruHS0Zvy8c8HMXYolh6ZYIxe0xATgfwOUAIgAOAodHvRdYaodMFJmPTkyNR7qD619bGd259jkAj4UD/m7RmSj3WLolRnF7hkEf9Y7EZ0a9FaNOG+UYN7NRtik1IvPRF2mqmoo1f7yuZ41vvZaI/QnAe+GAXxWdi/KDpVuCFLfHAmAOgPnQVzYcAADJYjdXTvv62daGk8/gCofikOw+uKN79Qtrk+3N6wE8FA74eX5yiWPpljDF7RkCYAGAcdA3U0QAwFI7tNox6bxZltqhkyTuIxZCjYbaQp+8syqyfdVOAE8CWMbdZeWBpVviFLdHhr607FoANug3wUwCgHXQqAGOCefMNlfXj2P3FoaWjEciO9asCG58fRfU1HsAng4H/B2ic1HhsHTLhOL2VAG4APqGChV6+aoAYBsyrsExfta55soBowRGLGlqIhaMNX/yUXDjG81aLLQNwF/DAf8W0bmo8Fi6ZUZxewYAmAdgJoA49GkHDQDsrikjHGNmnGNy1g4XGLGkqLFwe3TX+hXBTW/tRyrRBWApgJW8f1n5YumWKcXtGQzgYgAe6HO9B/CZ8q0YOXWaubphnCTLJoExDSsV7tob2b56ZXjr++3QtAiAlwC8FQ74I6KzkVgs3TKX3tE2H8AkAGHo63s1ADD16+9wjJ051Vo/+iuytaJKYEzDSHa3bg8HPlgTbVobhL41+3kA/nDAHxUcjYoES5cOneMwGsCFAE6FXroHkT5IB5IkKe6vuu3DT51uqhwwiisejqZGQ63xgzs2hAMf7k52tGjQ10c/B2AtVyTQ57F06SjpOd/Toa/zdQDoQfowHUBfblZx8ulTLf2HjTXZnQMExRROTcSCibbdG6M7Pvo41rI5Bf2M460AXgDwCTc30PGwdOmY0sdHToS+tfhk6MvMDiC9ww0ALHUjau0jTh1jqRs+1uSoGVbqI2AtlYwnO/duju7euDGyfXU3NNUO/dnA++lHEw8WpxNh6dIJpTdZnAVgNvQRXRL6jTIP3+7b5KxV7K6pbuugkWPNlQNGSbLZIiZt7miqmkqFO/ckO/buiO/ftiO6e1MIatIBfandRwDeA7CZJ4BRNli6lLH06HcU9NPMTgfghD7/24H0nYoBQLLYzPZhE4dZ6kYMM1cOHGZy1Aw1wn3cNE3T1HBXS7Jr3474/u07ors3tmiJqAP6NIsEYAuAtwBs4t13qbdYutQr6Z1uw6BPQcwAUJ/+pxCALqR3vR1iGXhSnXWAq8FcNajB5KxtMCmV9ZJJXBFraiqpRoMHU+Gu/ametv3Jzr0HYs2f7FVjITuAftD/mCQBfAxgDfSibROVl0oHS5f6LL36oQ76GQ9TAYwBYIU+OkxAvxgXQnop2iEmZ3/FXDWwyuTsX2VSqqrkin6Vst1ZJVsrqiRrRZVktjl7M0+spZJxLZWIaKlEVEsmIloyHlFjoe5UqLMt1XOwLdGxtz3Z0dwFTbNAH8U602+qQr8Y9hGATwHs4eoDyjWWLuVcehQ8EPpI+GToJTwk/c8S9PKNQp8TjkHfGffFH0TZJMt2p1UyWUxHHmYT0i8lk9kE2WxCKp5MRUNRNdIVSYW7IkglP79ywAZAAVABwAy9XGXofwh2AtgIYBuAXZyfpXxj6VJBpI+bHASgAfpUxCDoxTwAQBWOLl0ZejFq6Yd6nJcy9BI1AzB95t8OkdKPTug38NwNfQ3tAQD7AfRwtQEVGkuXhFPcHjP0edSqzzwU6CsljvUwp18moM8fd6cfEegj6Mhn/ndPOOCPgahIsHQppyRJmgtgMfSR58Oapi0SHImoqLB0KWckSTJBvxB1HvSn8SsBXK1pGm+uSJTGW7ZQLp0GYJumads1TYsDeAL6SWZElMbSpVwaAv1i1SF7cGTVAhGBpUtEVFAsXcqlZuhrcw8Zmv5vRJTG0qVcWgnALUnSSEmSrAC+Af2oQyJKM4sOQKVD07SkJEm3AXgF+pKxRzRN2yQ4FlFR4ZIxIqIC4vQCEVEBsXSJiAqIpUtEVEAsXSKiAmLpEhEVEEuXiKiAWLpERAXE0iUiKqD/Dw13tVN4Tzd8AAAAAElFTkSuQmCC\n"
},
"metadata": {}
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"groups = df_full['class'].value_counts()\n",
"sizes = [groups[0], groups[1]]\n",
"labels = list(map(lambda e: str(e), groups.index))\n",
"\n",
"fig1, ax1 = plt.subplots()\n",
"ax1.pie(sizes, labels=labels, autopct='%1.1f%%', shadow=True, startangle=90)\n",
"ax1.axis('equal')\n",
"\n",
"plt.show()"
]
},
{
"source": [
"## Import and init RuleKit"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from rulekit import RuleKit\n",
"from rulekit.classification import RuleClassifier\n",
"from rulekit.params import Measures\n",
"\n",
"\n",
"RuleKit.init()"
]
},
{
"source": [
"## Helper function for calculating metrics"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"import sklearn.tree as scikit\n",
"from sklearn.datasets import load_iris\n",
"import math\n",
"from sklearn.preprocessing import MultiLabelBinarizer\n",
"from sklearn import metrics\n",
"import pandas as pd\n",
"import numpy as np\n",
"from typing import Tuple\n",
"\n",
"\n",
"x = df_full.drop(['class'], axis=1)\n",
"y = df_full['class']\n",
"\n",
"def get_prediction_metrics(measure: str, y_pred, y_true, classification_metrics: dict) -> Tuple[pd.DataFrame, np.ndarray]:\n",
" confusion_matrix = metrics.confusion_matrix(y_true, y_pred)\n",
" tn, fp, fn, tp = confusion_matrix.ravel()\n",
" sensitivity = tp / (tp + fn)\n",
" specificity = tn / (tn + fp)\n",
" npv = tn / (tn + fn)\n",
" ppv = tp / (tp + fp)\n",
"\n",
" dictionary = {\n",
" 'Measure': measure,\n",
" 'Accuracy': metrics.accuracy_score(y_true, y_pred),\n",
" 'MAE': metrics.mean_absolute_error(y_true, y_pred),\n",
" 'Kappa': metrics.cohen_kappa_score(y_true, y_pred),\n",
" 'Balanced accuracy': metrics.balanced_accuracy_score(y_true, y_pred),\n",
" 'Logistic loss': metrics.log_loss(y_true, y_pred),\n",
" 'Precision': metrics.log_loss(y_true, y_pred),\n",
" 'Sensitivity': sensitivity,\n",
" 'Specificity': specificity,\n",
" 'NPV': npv,\n",
" 'PPV': ppv,\n",
" 'psep': ppv + npv - 1,\n",
" 'Fall-out': fp / (fp + tn),\n",
" \"Youden's J statistic\": sensitivity + specificity - 1,\n",
" 'Lift': (tp / (tp + fp)) / ((tp + fn) / (tp + tn + fp + fn)),\n",
" 'F-measure': 2 * tp / (2 * tp + fp + fn),\n",
" 'Fowlkes-Mallows index': metrics.fowlkes_mallows_score(y_true, y_pred),\n",
" 'False positive': fp,\n",
" 'False negative': fn,\n",
" 'True positive': tp,\n",
" 'True negative': tn,\n",
" 'Rules per example': classification_metrics['rules_per_example'],\n",
" 'Voting conflicts': classification_metrics['voting_conflicts'],\n",
" 'Negative voting conflicts': classification_metrics['negative_voting_conflicts'],\n",
" 'Geometric mean': math.sqrt(specificity * sensitivity),\n",
" 'Geometric mean': math.sqrt(specificity * sensitivity),\n",
" }\n",
" return pd.DataFrame.from_records([dictionary], index='Measure'), confusion_matrix\n",
"\n",
"def get_ruleset_stats(measure: str, model) -> pd.DataFrame:\n",
" return pd.DataFrame.from_records([{'Measure': measure, **model.stats.__dict__}], index='Measure')\n",
" "
]
},
{
"source": [
"## Rule induction on full dataset"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": " time_total_s time_growing_s time_pruning_s rules_count \\\nMeasure \nC2 2.878110 4.122812 0.673479 178 \nCorrelation 3.684995 6.220216 0.909509 58 \nRSS 3.444098 5.140447 0.778426 60 \n\n conditions_per_rule induced_conditions_per_rule \\\nMeasure \nC2 5.005618 14.387640 \nCorrelation 6.000000 54.517241 \nRSS 4.233333 46.266667 \n\n avg_rule_coverage avg_rule_precision avg_rule_quality \\\nMeasure \nC2 0.141539 0.916631 0.479177 \nCorrelation 0.396505 0.692544 0.188542 \nRSS 0.585900 0.852020 0.328324 \n\n pvalue FDR_pvalue FWER_pvalue fraction_significant \\\nMeasure \nC2 0.058208 0.063738 0.884413 0.769663 \nCorrelation 0.031233 0.032530 0.101139 0.879310 \nRSS 0.011781 0.012022 0.025579 0.950000 \n\n fraction_FDR_significant fraction_FWER_significant \nMeasure \nC2 0.752809 0.561798 \nCorrelation 0.879310 0.862069 \nRSS 0.933333 0.900000 ",
"text/html": "\n\n
\n \n \n | \n time_total_s | \n time_growing_s | \n time_pruning_s | \n rules_count | \n conditions_per_rule | \n induced_conditions_per_rule | \n avg_rule_coverage | \n avg_rule_precision | \n avg_rule_quality | \n pvalue | \n FDR_pvalue | \n FWER_pvalue | \n fraction_significant | \n fraction_FDR_significant | \n fraction_FWER_significant | \n
\n \n | Measure | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n
\n \n \n \n | C2 | \n 2.878110 | \n 4.122812 | \n 0.673479 | \n 178 | \n 5.005618 | \n 14.387640 | \n 0.141539 | \n 0.916631 | \n 0.479177 | \n 0.058208 | \n 0.063738 | \n 0.884413 | \n 0.769663 | \n 0.752809 | \n 0.561798 | \n
\n \n | Correlation | \n 3.684995 | \n 6.220216 | \n 0.909509 | \n 58 | \n 6.000000 | \n 54.517241 | \n 0.396505 | \n 0.692544 | \n 0.188542 | \n 0.031233 | \n 0.032530 | \n 0.101139 | \n 0.879310 | \n 0.879310 | \n 0.862069 | \n
\n \n | RSS | \n 3.444098 | \n 5.140447 | \n 0.778426 | \n 60 | \n 4.233333 | \n 46.266667 | \n 0.585900 | \n 0.852020 | \n 0.328324 | \n 0.011781 | \n 0.012022 | \n 0.025579 | \n 0.950000 | \n 0.933333 | \n 0.900000 | \n
\n \n
\n
"
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": " Accuracy MAE Kappa Balanced accuracy Logistic loss \\\nMeasure \nC2 0.968266 0.031734 0.682882 0.777962 1.096047 \nCorrelation 0.916796 0.083204 0.321259 0.660191 2.873809 \nRSS 0.922601 0.077399 0.241820 0.600414 2.673301 \n\n Precision Sensitivity Specificity NPV PPV ... \\\nMeasure ... \nC2 1.096047 0.558824 0.997100 0.969782 0.931373 ... \nCorrelation 2.873809 0.364706 0.955675 0.955280 0.366864 ... \nRSS 2.673301 0.229412 0.971417 0.947092 0.361111 ... \n\n F-measure Fowlkes-Mallows index False positive False negative \\\nMeasure \nC2 0.698529 0.966199 7 75 \nCorrelation 0.365782 0.913023 107 108 \nRSS 0.280576 0.920751 69 131 \n\n True positive True negative Rules per example \\\nMeasure \nC2 95 2407 25.193885 \nCorrelation 62 2307 22.997291 \nRSS 39 2345 35.154025 \n\n Voting conflicts Negative voting conflicts Geometric mean \nMeasure \nC2 837.0 57.0 0.746460 \nCorrelation 1603.0 157.0 0.590373 \nRSS 1844.0 94.0 0.472075 \n\n[3 rows x 24 columns]",
"text/html": "\n\n
\n \n \n | \n Accuracy | \n MAE | \n Kappa | \n Balanced accuracy | \n Logistic loss | \n Precision | \n Sensitivity | \n Specificity | \n NPV | \n PPV | \n ... | \n F-measure | \n Fowlkes-Mallows index | \n False positive | \n False negative | \n True positive | \n True negative | \n Rules per example | \n Voting conflicts | \n Negative voting conflicts | \n Geometric mean | \n
\n \n | Measure | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n
\n \n \n \n | C2 | \n 0.968266 | \n 0.031734 | \n 0.682882 | \n 0.777962 | \n 1.096047 | \n 1.096047 | \n 0.558824 | \n 0.997100 | \n 0.969782 | \n 0.931373 | \n ... | \n 0.698529 | \n 0.966199 | \n 7 | \n 75 | \n 95 | \n 2407 | \n 25.193885 | \n 837.0 | \n 57.0 | \n 0.746460 | \n
\n \n | Correlation | \n 0.916796 | \n 0.083204 | \n 0.321259 | \n 0.660191 | \n 2.873809 | \n 2.873809 | \n 0.364706 | \n 0.955675 | \n 0.955280 | \n 0.366864 | \n ... | \n 0.365782 | \n 0.913023 | \n 107 | \n 108 | \n 62 | \n 2307 | \n 22.997291 | \n 1603.0 | \n 157.0 | \n 0.590373 | \n
\n \n | RSS | \n 0.922601 | \n 0.077399 | \n 0.241820 | \n 0.600414 | \n 2.673301 | \n 2.673301 | \n 0.229412 | \n 0.971417 | \n 0.947092 | \n 0.361111 | \n ... | \n 0.280576 | \n 0.920751 | \n 69 | \n 131 | \n 39 | \n 2345 | \n 35.154025 | \n 1844.0 | \n 94.0 | \n 0.472075 | \n
\n \n
\n
3 rows × 24 columns
\n
"
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"Confusion matrix - C2\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": " 0 1\n0 2407 7\n1 75 95",
"text/html": "\n\n
\n \n \n | \n 0 | \n 1 | \n
\n \n \n \n | 0 | \n 2407 | \n 7 | \n
\n \n | 1 | \n 75 | \n 95 | \n
\n \n
\n
"
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"Confusion matrix - Correlation\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": " 0 1\n0 2307 107\n1 108 62",
"text/html": "\n\n
\n \n \n | \n 0 | \n 1 | \n
\n \n \n \n | 0 | \n 2307 | \n 107 | \n
\n \n | 1 | \n 108 | \n 62 | \n
\n \n
\n
"
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"Confusion matrix - RSS\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": " 0 1\n0 2345 69\n1 131 39",
"text/html": "\n\n
\n \n \n | \n 0 | \n 1 | \n
\n \n \n \n | 0 | \n 2345 | \n 69 | \n
\n \n | 1 | \n 131 | \n 39 | \n
\n \n
\n
"
},
"metadata": {}
}
],
"source": [
"from rulekit.params import Measures\n",
"from IPython.display import display\n",
"\n",
"# C2\n",
"clf = RuleClassifier(\n",
" induction_measure=Measures.C2,\n",
" pruning_measure=Measures.C2,\n",
" voting_measure=Measures.C2,\n",
")\n",
"clf.fit(x, y)\n",
"c2_ruleset = clf.model\n",
"prediction, classification_metrics = clf.predict(x, return_metrics=True)\n",
"\n",
"prediction_metric, c2_confusion_matrix = get_prediction_metrics('C2', prediction, y, classification_metrics)\n",
"model_stats = get_ruleset_stats('C2', clf.model)\n",
"\n",
"# Correlation\n",
"clf = RuleClassifier(\n",
" induction_measure=Measures.Correlation,\n",
" pruning_measure=Measures.Correlation,\n",
" voting_measure=Measures.Correlation,\n",
")\n",
"clf.fit(x, y)\n",
"corr_ruleset = clf.model\n",
"prediction, classification_metrics = clf.predict(x, return_metrics=True)\n",
"\n",
"tmp, corr_confusion_matrix = get_prediction_metrics('Correlation', prediction, y, classification_metrics)\n",
"prediction_metric = pd.concat([prediction_metric, tmp])\n",
"model_stats = pd.concat([model_stats, get_ruleset_stats('Correlation', clf.model)])\n",
"\n",
"# RSS\n",
"clf = RuleClassifier(\n",
" induction_measure=Measures.RSS,\n",
" pruning_measure=Measures.RSS,\n",
" voting_measure=Measures.RSS,\n",
")\n",
"clf.fit(x, y)\n",
"rss_ruleset = clf.model\n",
"prediction, classification_metrics = clf.predict(x, return_metrics=True)\n",
"tmp, rss_confusion_matrix = get_prediction_metrics('RSS', prediction, y, classification_metrics)\n",
"prediction_metric = pd.concat([prediction_metric, tmp])\n",
"model_stats = pd.concat([model_stats, get_ruleset_stats('RSS', clf.model)])\n",
"\n",
"display(model_stats)\n",
"display(prediction_metric)\n",
"\n",
"print('Confusion matrix - C2')\n",
"display(pd.DataFrame(c2_confusion_matrix))\n",
"\n",
"print('Confusion matrix - Correlation')\n",
"display(pd.DataFrame(corr_confusion_matrix))\n",
"\n",
"print('Confusion matrix - RSS')\n",
"display(pd.DataFrame(rss_confusion_matrix))"
]
},
{
"source": [
"### C2 Measure generated rules"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"IF gimpuls = (-inf, 32.50) THEN label = {0}\nIF gimpuls = (-inf, 54.50) AND senergy = (-inf, 3700) THEN label = {0}\nIF gimpuls = (-inf, 54.50) AND genergy = <1865, inf) THEN label = {0}\nIF goenergy = <-84.50, inf) AND goimpuls = (-inf, -0.50) AND genergy = (-inf, 13675) AND nbumps = (-inf, 0.50) THEN label = {0}\nIF goenergy = <-84.50, inf) AND genergy = (-inf, 17640) AND nbumps = (-inf, 0.50) THEN label = {0}\nIF genergy = <1635, 13675) AND goimpuls = (-inf, -0.50) AND nbumps = (-inf, 0.50) THEN label = {0}\nIF goenergy = <-84.50, inf) AND gimpuls = (-inf, 772.50) AND genergy = (-inf, 17640) AND senergy = (-inf, 650) THEN label = {0}\nIF goenergy = <-84.50, inf) AND maxenergy = (-inf, 950) AND gimpuls = (-inf, 772.50) AND genergy = (-inf, 17640) THEN label = {0}\nIF goenergy = <-84.50, inf) AND goimpuls = (-inf, -5.50) AND genergy = (-inf, 13675) AND senergy = (-inf, 2200) AND nbumps2 = (-inf, 0.50) THEN label = {0}\nIF goenergy = <-84.50, inf) AND gimpuls = (-inf, 772.50) AND genergy = (-inf, 17640) AND senergy = (-inf, 2200) THEN label = {0}\nIF goenergy = <-84.50, inf) AND maxenergy = (-inf, 3500) AND genergy = (-inf, 17640) AND nbumps2 = (-inf, 0.50) THEN label = {0}\nIF goenergy = <-84.50, inf) AND maxenergy = (-inf, 3500) AND gimpuls = (-inf, 772.50) AND genergy = (-inf, 17640) THEN label = {0}\nIF goenergy = <-84.50, inf) AND gimpuls = (-inf, 772.50) AND genergy = (-inf, 17640) AND nbumps3 = (-inf, 0.50) AND senergy = (-inf, 25000) THEN label = {0}\nIF goenergy = <-84.50, inf) AND gimpuls = (-inf, 772.50) AND genergy = (-inf, 17640) AND nbumps3 = (-inf, 0.50) THEN label = {0}\nIF gimpuls = (-inf, 772.50) AND genergy = <1865, 17640) AND senergy = (-inf, 4400) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF gimpuls = (-inf, 217) AND genergy = <1865, inf) AND goimpuls = (-inf, -5.50) AND nbumps4 = (-inf, 0.50) AND nbumps2 = (-inf, 0.50) THEN label = {0}\nIF goenergy = (-inf, 68) AND genergy = <1865, 17640) AND senergy = (-inf, 25000) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF ghazard = {c} THEN label = {0}\nIF goenergy = <-84.50, inf) AND gimpuls = (-inf, 536) AND genergy = (-inf, 18585) AND nbumps = (-inf, 0.50) THEN label = {0}\nIF goenergy = <-84.50, inf) AND genergy = (-inf, 18585) AND nbumps = (-inf, 0.50) THEN label = {0}\nIF maxenergy = (-inf, 950) AND gimpuls = (-inf, 536) AND genergy = (-inf, 18585) THEN label = {0}\nIF gimpuls = (-inf, 536) AND genergy = <1865, 18585) AND nbumps3 = (-inf, 1.50) AND senergy = (-inf, 27100) THEN label = {0}\nIF goenergy = <297.50, inf) THEN label = {0}\nIF senergy = <115450, inf) THEN label = {0}\nIF genergy = <1789250, inf) THEN label = {0}\nIF gimpuls = (-inf, 786) AND genergy = <1865, 18810) AND nbumps3 = (-inf, 1.50) AND senergy = (-inf, 27100) THEN label = {0}\nIF goenergy = <-84.50, inf) AND genergy = (-inf, 51290) AND goimpuls = (-inf, -0.50) AND shift = {N} AND nbumps = (-inf, 0.50) THEN label = {0}\nIF goenergy = <-84.50, inf) AND gimpuls = (-inf, 184.50) AND goimpuls = (-inf, 27.50) AND nbumps = (-inf, 0.50) THEN label = {0}\nIF goenergy = <-84.50, inf) AND genergy = (-inf, 51290) AND shift = {N} AND nbumps = (-inf, 0.50) THEN label = {0}\nIF goenergy = <-73.50, inf) AND goimpuls = (-inf, -0.50) AND shift = {N} AND nbumps = (-inf, 0.50) THEN label = {0}\nIF goenergy = <-73.50, inf) AND goimpuls = (-inf, 96.50) AND shift = {N} AND nbumps = (-inf, 0.50) THEN label = {0}\nIF goenergy = <-55.50, inf) AND goimpuls = (-inf, 96.50) AND shift = {N} AND senergy = (-inf, 2150) THEN label = {0}\nIF goimpuls = <-70.50, 96.50) AND genergy = <4640, inf) AND shift = {N} AND nbumps2 = (-inf, 0.50) THEN label = {0}\nIF gimpuls = <135, inf) AND goimpuls = (-inf, 230.50) AND genergy = <9110, inf) AND shift = {N} AND senergy = (-inf, 2150) THEN label = {0}\nIF genergy = <9110, inf) AND shift = {N} AND senergy = <2400, 9500) AND nbumps3 = (-inf, 1.50) THEN label = {0}\nIF goenergy = <-84.50, inf) AND gimpuls = (-inf, 395) AND genergy = (-inf, 19310) AND goimpuls = (-inf, -0.50) AND nbumps = (-inf, 0.50) THEN label = {0}\nIF goenergy = <-84.50, inf) AND gimpuls = (-inf, 786) AND genergy = (-inf, 19310) AND senergy = (-inf, 650) THEN label = {0}\nIF goenergy = <-54.50, inf) AND genergy = <10915, 19310) AND goimpuls = <-50.50, 230.50) AND nbumps2 = (-inf, 1.50) AND nbumps = <0.50, inf) THEN label = {0}\nIF goenergy = <-84.50, inf) AND gimpuls = (-inf, 786) AND genergy = (-inf, 19510) AND senergy = (-inf, 650) THEN label = {0}\nIF goenergy = <-84.50, inf) AND gimpuls = (-inf, 392.50) AND genergy = (-inf, 20525) AND goimpuls = (-inf, -0.50) AND nbumps = (-inf, 0.50) THEN label = {0}\nIF goenergy = <-84.50, 118) AND genergy = (-inf, 20525) AND senergy = (-inf, 550) THEN label = {0}\nIF goenergy = <-84.50, inf) AND gimpuls = (-inf, 319.50) AND goimpuls = (-inf, -0.50) AND seismoacoustic = {a} AND nbumps = (-inf, 0.50) THEN label = {0}\nIF goenergy = <-84.50, inf) AND gimpuls = (-inf, 319.50) AND goimpuls = (-inf, -0.50) AND nbumps = (-inf, 0.50) THEN label = {0}\nIF goenergy = <-84.50, inf) AND gimpuls = (-inf, 362.50) AND goimpuls = (-inf, -0.50) AND nbumps = (-inf, 0.50) THEN label = {0}\nIF goenergy = <-84.50, inf) AND gimpuls = (-inf, 319.50) AND goimpuls = (-inf, -0.50) AND senergy = (-inf, 550) THEN label = {0}\nIF goenergy = <-84.50, inf) AND gimpuls = (-inf, 362.50) AND goimpuls = (-inf, -0.50) AND senergy = (-inf, 550) THEN label = {0}\nIF goenergy = <-84.50, 118) AND gimpuls = (-inf, 362.50) AND goimpuls = (-inf, 96.50) AND nbumps = (-inf, 0.50) THEN label = {0}\nIF goenergy = <-84.50, 118) AND gimpuls = (-inf, 362.50) AND goimpuls = (-inf, 96.50) AND senergy = (-inf, 550) THEN label = {0}\nIF goenergy = <-84.50, 118) AND gimpuls = (-inf, 380.50) AND goimpuls = (-inf, 96.50) AND nbumps = (-inf, 0.50) THEN label = {0}\nIF goenergy = <-84.50, 120.50) AND gimpuls = (-inf, 395.50) AND maxenergy = (-inf, 350) AND goimpuls = (-inf, 96.50) AND senergy = (-inf, 550) THEN label = {0}\nIF goenergy = <-84.50, 120.50) AND gimpuls = (-inf, 449.50) AND maxenergy = (-inf, 350) AND genergy = (-inf, 32875) THEN label = {0}\nIF goenergy = <-84.50, 120.50) AND gimpuls = (-inf, 449.50) AND maxenergy = (-inf, 350) AND goimpuls = (-inf, 96.50) AND senergy = (-inf, 550) THEN label = {0}\nIF goenergy = <-84.50, inf) AND gimpuls = (-inf, 449.50) AND goimpuls = (-inf, 96.50) AND senergy = (-inf, 550) THEN label = {0}\nIF goenergy = <-84.50, inf) AND gimpuls = (-inf, 537.50) AND genergy = (-inf, 25125) AND goimpuls = (-inf, 27.50) AND nbumps = (-inf, 0.50) THEN label = {0}\nIF goenergy = <-84.50, 114.50) AND gimpuls = (-inf, 537.50) AND maxenergy = (-inf, 350) AND genergy = (-inf, 31790) THEN label = {0}\nIF goenergy = <-84.50, 114.50) AND genergy = (-inf, 31790) AND senergy = (-inf, 550) THEN label = {0}\nIF goenergy = <116.50, inf) AND gimpuls = (-inf, 788.50) AND genergy = <20930, 31790) THEN label = {0}\nIF goenergy = <-84.50, 114.50) AND genergy = (-inf, 32770) AND senergy = (-inf, 550) THEN label = {0}\nIF goenergy = <-84.50, 87.50) AND gimpuls = (-inf, 1342.50) AND goimpuls = (-inf, 96) AND senergy = (-inf, 550) THEN label = {0}\nIF goenergy = <-84.50, 87.50) AND gimpuls = (-inf, 1732) AND goimpuls = (-inf, 96) AND senergy = (-inf, 550) THEN label = {0}\nIF goenergy = <-84.50, 87.50) AND gimpuls = (-inf, 2168) AND goimpuls = (-inf, 96) AND senergy = (-inf, 550) THEN label = {0}\nIF goenergy = <-84.50, 87.50) AND genergy = (-inf, 1674705) AND goimpuls = (-inf, 96) AND senergy = (-inf, 550) THEN label = {0}\nIF ghazard = {a} AND goenergy = <57, inf) AND gimpuls = (-inf, 514.50) AND goimpuls = <-1.50, 96.50) AND senergy = (-inf, 550) THEN label = {0}\nIF goenergy = (-inf, 104.50) AND gimpuls = <523, 1342.50) AND goimpuls = <17.50, inf) AND genergy = <46870, inf) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF goenergy = <29.50, 104.50) AND gimpuls = <522, 2168) AND senergy = (-inf, 250) THEN label = {0}\nIF goenergy = <-19, inf) AND goimpuls = <4.50, 312) AND genergy = <4455, 34260) AND nbumps = (-inf, 0.50) THEN label = {0}\nIF goenergy = <8.50, inf) AND gimpuls = <523, 1342.50) AND goimpuls = (-inf, 96.50) AND senergy = (-inf, 250) THEN label = {0}\nIF genergy = <36470, 42165) AND goimpuls = <5.50, inf) AND senergy = (-inf, 550) THEN label = {0}\nIF goenergy = <119.50, inf) AND gimpuls = <516, 1210) AND goimpuls = (-inf, 118.50) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF gimpuls = <144.50, 1210) AND genergy = <42430, inf) AND goimpuls = <59.50, inf) AND senergy = (-inf, 250) THEN label = {0}\nIF gimpuls = <813.50, 1427.50) AND goimpuls = <104.50, inf) AND senergy = (-inf, 350) THEN label = {0}\nIF gimpuls = (-inf, 319) AND genergy = <1865, 19670) AND goimpuls = (-inf, -6.50) AND senergy = (-inf, 9600) THEN label = {0}\nIF goenergy = <-84.50, inf) AND gimpuls = (-inf, 362.50) AND goimpuls = (-inf, -0.50) AND senergy = (-inf, 650) THEN label = {0}\nIF goenergy = <-44.50, inf) AND gimpuls = <324.50, inf) AND genergy = (-inf, 32770) AND goimpuls = (-inf, 105.50) AND nbumps = <0.50, 1.50) THEN label = {0}\nIF goenergy = <-73.50, 14.50) AND gimpuls = (-inf, 1342.50) AND genergy = <36280, inf) AND senergy = (-inf, 650) THEN label = {0}\nIF goimpuls = <-6.50, inf) AND genergy = <49585, inf) AND senergy = (-inf, 650) AND nbumps = <0.50, inf) THEN label = {0}\nIF goenergy = <-54.50, inf) AND genergy = (-inf, 64725) AND senergy = <650, 750) THEN label = {0}\nIF goenergy = <-33.50, inf) AND maxenergy = (-inf, 950) AND gimpuls = (-inf, 537.50) AND genergy = (-inf, 25125) AND goimpuls = <-41.50, -0.50) THEN label = {0}\nIF goenergy = <-84.50, 114.50) AND gimpuls = (-inf, 587.50) AND genergy = (-inf, 27275) AND nbumps3 = (-inf, 0.50) AND senergy = (-inf, 25250) THEN label = {0}\nIF goenergy = (-inf, 114.50) AND genergy = <1865, 28515) AND senergy = (-inf, 7500) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF goenergy = (-inf, -20.50) AND gimpuls = (-inf, 537.50) AND genergy = <20610, 28515) AND nbumps2 = (-inf, 0.50) THEN label = {0}\nIF genergy = (-inf, 28515) AND nbumps = <5.50, inf) THEN label = {0}\nIF maxenergy = (-inf, 3500) AND genergy = <20270, 28515) AND goimpuls = (-inf, -8.50) AND nbumps2 = <0.50, 1.50) THEN label = {0}\nIF genergy = <3260, 28515) AND senergy = <8500, inf) AND nbumps = (-inf, 2.50) THEN label = {0}\nIF goenergy = <-36.50, inf) AND genergy = (-inf, 28515) AND senergy = <5050, inf) THEN label = {0}\nIF ghazard = {a} AND goenergy = <-53.50, 40.50) AND genergy = <20560, 29105) AND nbumps2 = <0.50, inf) THEN label = {0}\nIF goenergy = (-inf, 14.50) AND maxenergy = (-inf, 550) AND gimpuls = (-inf, 1252.50) AND nbumps = (-inf, 2.50) THEN label = {0}\nIF goenergy = <-40.50, 28.50) AND gimpuls = (-inf, 2168) AND genergy = <40210, inf) AND senergy = (-inf, 850) AND seismic = {a} THEN label = {0}\nIF goenergy = (-inf, 104.50) AND gimpuls = (-inf, 362.50) AND genergy = <1865, inf) AND goimpuls = (-inf, 66.50) AND senergy = (-inf, 7500) AND nbumps2 = (-inf, 0.50) THEN label = {0}\nIF goenergy = (-inf, 88.50) AND gimpuls = (-inf, 1210) AND goimpuls = (-inf, 96) AND genergy = <1865, inf) AND senergy = (-inf, 7500) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF goenergy = <17.50, inf) AND gimpuls = (-inf, 1210) AND goimpuls = (-inf, 66.50) AND nbumps2 = (-inf, 0.50) AND nbumps = <0.50, inf) THEN label = {0}\nIF gimpuls = (-inf, 1210) AND genergy = <7815, inf) AND senergy = <1500, 7500) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF goenergy = (-inf, 88.50) AND gimpuls = (-inf, 1252.50) AND goimpuls = (-inf, 96) AND genergy = <1865, inf) AND senergy = (-inf, 7500) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF gimpuls = (-inf, 1342.50) AND goimpuls = <-54.50, inf) AND genergy = <7870, inf) AND senergy = <1500, inf) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF goenergy = <-40.50, 31.50) AND gimpuls = (-inf, 1485) AND genergy = <44960, inf) AND senergy = (-inf, 5500) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF gimpuls = <1441.50, inf) AND genergy = (-inf, 117575) THEN label = {0}\nIF goenergy = (-inf, 87.50) AND gimpuls = (-inf, 1752) AND goimpuls = (-inf, 96) AND nbumps3 = (-inf, 0.50) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF goimpuls = <-40.50, inf) AND genergy = <422215, inf) AND seismoacoustic = {a} AND senergy = <2500, inf) AND nbumps2 = (-inf, 0.50) THEN label = {0}\nIF genergy = <29945, 31245) THEN label = {0}\nIF goenergy = <-33.50, inf) AND genergy = <29155, 31615) AND goimpuls = (-inf, 105.50) AND nbumps3 = (-inf, 1.50) THEN label = {0}\nIF genergy = <31805, 32680) THEN label = {0}\nIF goenergy = (-inf, 158.50) AND maxenergy = (-inf, 650) AND gimpuls = (-inf, 1210) AND goimpuls = (-inf, 96.50) THEN label = {0}\nIF genergy = <32925, 34315) THEN label = {0}\nIF maxenergy = (-inf, 750) AND genergy = <35480, 45240) AND nbumps = <0.50, inf) THEN label = {0}\nIF ghazard = {a} AND goenergy = <-27.50, inf) AND maxenergy = (-inf, 750) AND gimpuls = (-inf, 2056) AND genergy = (-inf, 715465) AND senergy = <850, inf) THEN label = {0}\nIF goenergy = <-84.50, inf) AND gimpuls = (-inf, 305.50) AND goimpuls = (-inf, 17.50) AND senergy = (-inf, 2300) THEN label = {0}\nIF goenergy = <-84.50, inf) AND gimpuls = (-inf, 305.50) AND maxenergy = (-inf, 3500) AND goimpuls = (-inf, -5.50) THEN label = {0}\nIF gimpuls = (-inf, 305.50) AND genergy = <29195, inf) AND goimpuls = (-inf, 96) AND senergy = (-inf, 9850) THEN label = {0}\nIF senergy = <71000, inf) AND nbumps2 = (-inf, 0.50) THEN label = {0}\nIF goenergy = (-inf, 88.50) AND gimpuls = (-inf, 1141.50) AND maxenergy = (-inf, 7500) AND genergy = <1865, inf) AND goimpuls = (-inf, 96) AND nbumps3 = (-inf, 2.50) AND nbumps2 = (-inf, 0.50) THEN label = {0}\nIF goenergy = <-72.50, inf) AND gimpuls = (-inf, 1372) AND genergy = <55365, inf) AND senergy = <1500, inf) AND nbumps2 = (-inf, 0.50) THEN label = {0}\nIF goenergy = (-inf, 87.50) AND genergy = (-inf, 1733075) AND nbumps3 = (-inf, 1.50) AND nbumps2 = (-inf, 0.50) THEN label = {0}\nIF goenergy = <-32.50, inf) AND gimpuls = (-inf, 2681) AND genergy = <173815, 1026530) AND nbumps3 = (-inf, 2.50) AND nbumps2 = (-inf, 0.50) THEN label = {0}\nIF ghazard = {a} AND goenergy = <0.50, 87.50) AND maxenergy = <550, 850) THEN label = {0}\nIF ghazard = {a} AND goenergy = <-29.50, inf) AND gimpuls = <259.50, inf) AND maxenergy = <550, inf) AND genergy = (-inf, 39305) AND goimpuls = <-39.50, inf) AND senergy = (-inf, 4400) AND nbumps3 = (-inf, 2.50) THEN label = {0}\nIF goenergy = <-18.50, 105.50) AND genergy = <9110, 39695) AND goimpuls = <-41.50, inf) AND nbumps3 = (-inf, 2.50) THEN label = {0}\nIF gimpuls = (-inf, 361.50) AND maxenergy = (-inf, 3500) AND senergy = <3250, inf) THEN label = {0}\nIF goenergy = <-37.50, inf) AND gimpuls = (-inf, 361.50) AND maxenergy = (-inf, 35000) AND genergy = <38315, inf) AND senergy = <950, inf) THEN label = {0}\nIF goenergy = <-18.50, inf) AND gimpuls = <334.50, 804.50) AND maxenergy = <550, inf) AND genergy = (-inf, 44750) AND senergy = (-inf, 25150) AND nbumps2 = <0.50, 3.50) THEN label = {0}\nIF senergy = <1250, 1550) AND nbumps2 = (-inf, 1.50) THEN label = {0}\nIF genergy = <44780, 45255) THEN label = {0}\nIF goenergy = (-inf, 158.50) AND senergy = <1150, 1650) AND nbumps2 = (-inf, 2.50) THEN label = {0}\nIF genergy = <46690, 48545) THEN label = {0}\nIF goenergy = (-inf, 68) AND gimpuls = (-inf, 769.50) AND genergy = <43280, 49095) AND nbumps3 = (-inf, 3.50) THEN label = {0}\nIF goenergy = (-inf, 95.50) AND gimpuls = (-inf, 514.50) AND goimpuls = <-7, 96.50) AND genergy = <40245, inf) AND nbumps3 = (-inf, 2.50) THEN label = {0}\nIF goenergy = <-73.50, inf) AND gimpuls = (-inf, 514.50) AND maxenergy = <550, 8500) AND genergy = <49265, 108000) THEN label = {0}\nIF goenergy = <-53.50, inf) AND gimpuls = (-inf, 1836) AND nbumps3 = (-inf, 0.50) AND nbumps4 = <0.50, inf) AND nbumps2 = (-inf, 1.50) THEN label = {0}\nIF maxenergy = (-inf, 1500) AND gimpuls = <673.50, 1210) AND senergy = <1700, inf) THEN label = {0}\nIF goenergy = <-84.50, inf) AND gimpuls = (-inf, 1245.50) AND genergy = <49585, 58435) AND goimpuls = (-inf, 96.50) AND nbumps3 = (-inf, 1.50) AND nbumps2 = (-inf, 2.50) THEN label = {0}\nIF ghazard = {a} AND goenergy = (-inf, 68.50) AND gimpuls = <526, 606) AND genergy = <17700, inf) AND senergy = (-inf, 9550) AND nbumps2 = (-inf, 1.50) THEN label = {0}\nIF goenergy = <-43.50, 87.50) AND senergy = (-inf, 3150) AND nbumps3 = (-inf, 1.50) AND nbumps2 = (-inf, 1.50) AND nbumps = <1.50, inf) THEN label = {0}\nIF goenergy = (-inf, 120.50) AND gimpuls = (-inf, 1029.50) AND genergy = <58515, 61125) AND nbumps2 = (-inf, 2.50) THEN label = {0}\nIF gimpuls = <393.50, 725.50) AND genergy = <81935, inf) AND nbumps3 = (-inf, 2.50) THEN label = {0}\nIF goenergy = <11.50, 68.50) AND maxenergy = (-inf, 2500) AND gimpuls = <556, inf) AND genergy = (-inf, 1482055) AND nbumps2 = <0.50, inf) THEN label = {0}\nIF goenergy = (-inf, 28.50) AND gimpuls = <354, 791.50) AND genergy = <81505, 366505) AND nbumps3 = (-inf, 2.50) THEN label = {0}\nIF goenergy = <-31.50, 104.50) AND gimpuls = <325, 2068.50) AND goimpuls = (-inf, 50.50) AND senergy = (-inf, 5750) AND nbumps3 = (-inf, 1.50) AND nbumps2 = <0.50, 1.50) THEN label = {0}\nIF goenergy = <-9.50, inf) AND gimpuls = <938.50, 2902.50) AND maxenergy = (-inf, 3500) AND genergy = <80845, 508210) AND nbumps = <0.50, inf) THEN label = {0}\nIF senergy = <5050, 5750) THEN label = {0}\nIF gimpuls = <887.50, 977) AND goimpuls = (-inf, -6.50) AND senergy = (-inf, 85450) THEN label = {0}\nIF ghazard = {a} AND goenergy = <-38.50, inf) AND gimpuls = <813.50, 1151) AND maxenergy = <3500, inf) AND goimpuls = (-inf, 89.50) AND nbumps2 = (-inf, 2.50) THEN label = {0}\nIF goenergy = <-27.50, inf) AND genergy = <123990, 544010) AND senergy = (-inf, 17850) AND nbumps = <3.50, inf) THEN label = {0}\nIF goenergy = (-inf, 68.50) AND maxenergy = <7500, inf) AND genergy = (-inf, 189505) AND goimpuls = <32.50, inf) THEN label = {0}\nIF goenergy = <-29.50, inf) AND gimpuls = (-inf, 2078.50) AND goimpuls = (-inf, -5.50) AND genergy = <138665, inf) AND senergy = <3250, inf) AND nbumps2 = (-inf, 1.50) AND nbumps = <1.50, inf) THEN label = {0}\nIF goenergy = <-15.50, 53.50) AND gimpuls = (-inf, 2917) AND goimpuls = <-7.50, inf) AND nbumps3 = (-inf, 1.50) AND senergy = <7500, inf) THEN label = {0}\nIF goenergy = <-88.50, 87.50) AND genergy = (-inf, 1713980) AND goimpuls = (-inf, 89.50) AND senergy = (-inf, 18500) AND nbumps3 = (-inf, 4.50) AND nbumps2 = (-inf, 3.50) THEN label = {0}\nIF goenergy = <22.50, inf) AND gimpuls = <364, inf) AND genergy = (-inf, 144410) AND nbumps3 = <3.50, inf) THEN label = {1}\nIF gimpuls = <364, inf) AND goimpuls = (-inf, 21.50) AND nbumps3 = <3.50, inf) AND senergy = <10150, inf) THEN label = {1}\nIF goenergy = <-15, inf) AND goimpuls = (-inf, 44.50) AND senergy = <13850, inf) AND nbumps3 = (-inf, 3.50) AND nbumps = <5.50, inf) THEN label = {1}\nIF gimpuls = <2208.50, 2361.50) AND genergy = <493095, inf) AND nbumps2 = <0.50, inf) THEN label = {1}\nIF gimpuls = <3011, inf) AND genergy = (-inf, 1005720) AND nbumps2 = <0.50, inf) THEN label = {1}\nIF gimpuls = <1328, 1361.50) AND nbumps2 = <0.50, inf) THEN label = {1}\nIF goenergy = (-inf, -29.50) AND gimpuls = <1328, inf) AND goimpuls = <-29, -14.50) THEN label = {1}\nIF ghazard = {a} AND goenergy = <-10.50, inf) AND gimpuls = <1328, 1443.50) AND goimpuls = <-1, inf) AND nbumps2 = (-inf, 1.50) THEN label = {1}\nIF gimpuls = <1328, 2109) AND maxenergy = (-inf, 7500) AND goimpuls = (-inf, -5.50) AND genergy = (-inf, 642325) AND senergy = <850, 9350) AND seismoacoustic = {a} AND nbumps = (-inf, 3.50) THEN label = {1}\nIF gimpuls = <1394.50, 2004) AND goimpuls = <-25, 13) AND genergy = <393900, inf) AND senergy = (-inf, 38250) AND nbumps2 = <0.50, inf) AND nbumps = <1.50, 3.50) THEN label = {1}\nIF gimpuls = <1747.50, 3018) AND goimpuls = <-25, 20.50) AND nbumps3 = (-inf, 1.50) AND senergy = (-inf, 32750) THEN label = {1}\nIF goenergy = <-16.50, inf) AND gimpuls = <1831, 2945.50) AND genergy = <254130, 1133675) AND seismic = {b} AND senergy = <1600, 32750) THEN label = {1}\nIF maxenergy = (-inf, 25000) AND gimpuls = <364, inf) AND goimpuls = <1.50, inf) AND nbumps3 = <1.50, 4.50) AND senergy = <4300, inf) AND nbumps = <4.50, 6.50) THEN label = {1}\nIF gimpuls = <740.50, 887.50) AND goimpuls = (-inf, 9) AND nbumps = <2.50, inf) THEN label = {1}\nIF gimpuls = <764.50, 1288.50) AND genergy = <61240, 213225) AND goimpuls = <-22.50, 58.50) AND senergy = (-inf, 27350) AND nbumps3 = (-inf, 1.50) AND nbumps = <2.50, inf) THEN label = {1}\nIF gimpuls = <379, 484) AND goimpuls = (-inf, 12.50) AND senergy = (-inf, 10350) AND nbumps = <2.50, inf) THEN label = {1}\nIF goenergy = (-inf, -4.50) AND maxenergy = <3500, inf) AND goimpuls = <-50, inf) AND genergy = (-inf, 52070) AND senergy = <5750, 15200) AND nbumps = <2.50, 5.50) AND nbumps2 = (-inf, 2.50) THEN label = {1}\nIF goenergy = (-inf, 123.50) AND goimpuls = <-70.50, 32.50) AND seismoacoustic = {a} AND senergy = (-inf, 27350) AND nbumps = <2.50, 4.50) THEN label = {1}\nIF goenergy = <-30.50, inf) AND gimpuls = <1139.50, 1270.50) AND goimpuls = (-inf, 105) AND genergy = <54930, 220205) AND senergy = (-inf, 38250) AND nbumps3 = (-inf, 1.50) THEN label = {1}\nIF goenergy = <-51, inf) AND gimpuls = <754.50, 1048) AND goimpuls = (-inf, 62.50) AND genergy = (-inf, 99210) AND senergy = (-inf, 201650) AND nbumps = <1.50, 2.50) AND nbumps2 = (-inf, 1.50) THEN label = {1}\nIF goenergy = (-inf, 144) AND gimpuls = <361.50, 728.50) AND maxenergy = <450, inf) AND genergy = <32455, inf) AND goimpuls = <-12.50, 8.50) AND senergy = (-inf, 7600) AND nbumps2 = <0.50, inf) AND nbumps = (-inf, 2.50) THEN label = {1}\nIF ghazard = {a} AND gimpuls = <160, 256) AND maxenergy = (-inf, 4500) AND genergy = (-inf, 21865) AND nbumps = <1.50, inf) THEN label = {1}\nIF goenergy = (-inf, 106.50) AND gimpuls = <110, 649.50) AND genergy = (-inf, 46930) AND senergy = (-inf, 40500) AND nbumps = <1.50, 2.50) THEN label = {1}\nIF gimpuls = <110, inf) AND senergy = <550, inf) AND nbumps2 = <0.50, inf) THEN label = {1}\nIF goenergy = <-78.50, inf) AND gimpuls = <32.50, 237.50) AND maxenergy = <3500, inf) AND goimpuls = <-74.50, 68.50) AND nbumps3 = (-inf, 2.50) AND nbumps2 = (-inf, 2.50) AND nbumps = (-inf, 4.50) THEN label = {1}\nIF gimpuls = <767.50, 813.50) AND genergy = (-inf, 75455) AND goimpuls = <1, inf) AND senergy = (-inf, 1300) AND nbumps = (-inf, 1.50) THEN label = {1}\nIF ghazard = {a} AND goenergy = (-inf, 106.50) AND gimpuls = <131, 735) AND maxenergy = (-inf, 350) AND genergy = <48545, 66335) AND goimpuls = <-72, inf) THEN label = {1}\nIF ghazard = {a} AND goenergy = <5.50, inf) AND gimpuls = <396, 732.50) AND genergy = <40050, 50765) AND goimpuls = (-inf, 79.50) AND senergy = (-inf, 350) THEN label = {1}\nIF goenergy = <-37.50, 152.50) AND gimpuls = <571, 651) AND genergy = <20840, 36590) AND nbumps = (-inf, 0.50) THEN label = {1}\nIF ghazard = {a} AND goenergy = <-22, 33.50) AND gimpuls = <361.50, 525.50) AND genergy = <25145, 42200) AND goimpuls = <-27.50, 8.50) AND nbumps = (-inf, 0.50) THEN label = {1}\nIF goenergy = <-45.50, inf) AND gimpuls = <380.50, 542.50) AND genergy = <17635, 21260) AND shift = {W} AND nbumps = (-inf, 0.50) THEN label = {1}\nIF gimpuls = <240, 324.50) AND genergy = <18585, 25665) AND goimpuls = <-49.50, 37.50) AND shift = {W} AND senergy = (-inf, 3350) AND nbumps = (-inf, 2.50) THEN label = {1}\nIF ghazard = {a} AND goenergy = <-59.50, -10.50) AND gimpuls = <88, 269.50) AND maxenergy = (-inf, 4500) AND goimpuls = <-42.50, 4.50) AND genergy = <4565, 21365) THEN label = {1}\n"
]
}
],
"source": [
"for rule in c2_ruleset.rules:\n",
" print(rule)"
]
},
{
"source": [
"### Correlation Measure generated rules"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"IF gimpuls = (-inf, 1252.50) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF gimpuls = (-inf, 1342.50) AND goimpuls = (-inf, 96.50) AND senergy = (-inf, 550) THEN label = {0}\nIF gimpuls = (-inf, 1342.50) AND goimpuls = (-inf, 312) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF gimpuls = (-inf, 1410) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF gimpuls = (-inf, 1653.50) AND genergy = (-inf, 1006585) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF gimpuls = (-inf, 1684) AND goimpuls = (-inf, 312) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF gimpuls = (-inf, 1798) AND goimpuls = (-inf, 312) AND genergy = (-inf, 1006585) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF gimpuls = (-inf, 2733) AND nbumps2 = (-inf, 0.50) THEN label = {0}\nIF gimpuls = (-inf, 3146) AND genergy = (-inf, 1733075) AND goimpuls = (-inf, 312) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF goimpuls = (-inf, 312) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF senergy = (-inf, 2350) AND nbumps2 = (-inf, 1.50) THEN label = {0}\nIF gimpuls = (-inf, 1331) AND nbumps = (-inf, 2.50) THEN label = {0}\nIF gimpuls = (-inf, 1655.50) AND nbumps = (-inf, 2.50) AND nbumps2 = (-inf, 1.50) THEN label = {0}\nIF ghazard = {a} AND gimpuls = <334.50, 2892) AND genergy = (-inf, 318735) AND goimpuls = <31.50, inf) AND senergy = <350, inf) AND nbumps = (-inf, 2.50) THEN label = {0}\nIF gimpuls = (-inf, 1832.50) AND nbumps = (-inf, 2.50) THEN label = {0}\nIF gimpuls = (-inf, 3146) AND genergy = (-inf, 1713980) AND goimpuls = (-inf, 312) AND nbumps = (-inf, 2.50) AND nbumps2 = (-inf, 1.50) THEN label = {0}\nIF gimpuls = (-inf, 1253.50) AND nbumps3 = (-inf, 1.50) AND nbumps5 = (-inf, 0.50) AND nbumps2 = (-inf, 1.50) THEN label = {0}\nIF gimpuls = (-inf, 1253.50) AND nbumps3 = (-inf, 1.50) AND nbumps2 = (-inf, 2.50) THEN label = {0}\nIF goenergy = (-inf, 104.50) AND genergy = (-inf, 32675) AND senergy = (-inf, 2350) THEN label = {0}\nIF gimpuls = (-inf, 1253.50) AND nbumps3 = (-inf, 1.50) THEN label = {0}\nIF gimpuls = (-inf, 1150.50) AND nbumps3 = (-inf, 2.50) AND nbumps2 = (-inf, 1.50) THEN label = {0}\nIF maxenergy = (-inf, 4500) AND gimpuls = (-inf, 769.50) THEN label = {0}\nIF gimpuls = (-inf, 1253.50) AND nbumps3 = (-inf, 3.50) AND nbumps2 = (-inf, 1.50) THEN label = {0}\nIF goenergy = (-inf, 123.50) AND gimpuls = (-inf, 1028.50) AND maxenergy = <1500, inf) AND genergy = <31805, 373295) AND goimpuls = <-54.50, inf) AND senergy = (-inf, 14350) AND seismic = {a} AND nbumps2 = (-inf, 2.50) THEN label = {0}\nIF gimpuls = (-inf, 1253.50) AND nbumps3 = (-inf, 2.50) AND nbumps2 = (-inf, 2.50) THEN label = {0}\nIF genergy = (-inf, 378500) AND nbumps3 = (-inf, 2.50) AND nbumps = (-inf, 5.50) THEN label = {0}\nIF goenergy = (-inf, 68.50) AND gimpuls = (-inf, 901) AND genergy = <21765, inf) AND nbumps3 = (-inf, 3.50) AND nbumps2 = <1.50, 3.50) AND nbumps = <3.50, inf) THEN label = {0}\nIF gimpuls = (-inf, 1150.50) AND senergy = (-inf, 20650) THEN label = {0}\nIF gimpuls = (-inf, 1378) AND maxenergy = (-inf, 75000) AND goimpuls = (-inf, 312) AND nbumps4 = (-inf, 2.50) AND nbumps = (-inf, 8.50) THEN label = {0}\nIF goenergy = <-4.50, inf) AND gimpuls = (-inf, 2185.50) AND genergy = <135285, 1505475) AND senergy = (-inf, 5750) AND nbumps2 = <0.50, inf) THEN label = {0}\nIF goenergy = <-0.50, 104.50) AND maxenergy = (-inf, 5500) AND goimpuls = <20.50, inf) AND genergy = <101710, inf) AND nbumps = <1.50, inf) THEN label = {0}\nIF goenergy = <-29.50, inf) AND goimpuls = (-inf, 6.50) AND genergy = <392530, inf) AND senergy = <7250, inf) AND nbumps2 = (-inf, 1.50) THEN label = {0}\nIF gimpuls = (-inf, 3881.50) AND nbumps = (-inf, 4.50) AND nbumps2 = (-inf, 2.50) THEN label = {0}\nIF maxenergy = <1500, inf) AND gimpuls = <994.50, 1959) AND goimpuls = <-34, 95) AND genergy = (-inf, 662435) AND senergy = (-inf, 36050) AND nbumps3 = <0.50, 4.50) AND nbumps2 = <0.50, 2.50) THEN label = {1}\nIF goenergy = (-inf, 96) AND maxenergy = <1500, inf) AND gimpuls = <712, 2257.50) AND genergy = <61250, 662435) AND goimpuls = (-inf, 95) AND nbumps3 = <0.50, inf) AND senergy = (-inf, 27350) AND nbumps2 = <0.50, inf) AND nbumps = (-inf, 6.50) THEN label = {1}\nIF goenergy = (-inf, 96) AND maxenergy = <1500, inf) AND gimpuls = <538.50, inf) AND goimpuls = <-34, 109) AND genergy = <61250, 826925) AND senergy = (-inf, 36050) AND nbumps3 = (-inf, 4.50) AND nbumps2 = <0.50, inf) AND nbumps = (-inf, 6.50) THEN label = {1}\nIF goenergy = (-inf, 186) AND maxenergy = <1500, inf) AND gimpuls = <538.50, inf) AND genergy = <58310, 934630) AND goimpuls = <-55, inf) AND senergy = (-inf, 40650) AND nbumps2 = <0.50, inf) THEN label = {1}\nIF ghazard = {a} AND gimpuls = <521.50, inf) AND genergy = <58310, 799855) AND goimpuls = <-23.50, 64.50) AND senergy = <850, 36050) AND nbumps = <1.50, 3.50) AND nbumps2 = <0.50, inf) THEN label = {1}\nIF goenergy = (-inf, 84) AND gimpuls = <894.50, inf) AND genergy = <66235, 1161025) AND goimpuls = <-46, 77.50) AND senergy = <650, inf) AND nbumps3 = (-inf, 2.50) AND nbumps = <1.50, 5.50) AND nbumps2 = <0.50, 3.50) THEN label = {1}\nIF goenergy = <-34.50, 96) AND gimpuls = <521.50, 1548.50) AND maxenergy = (-inf, 7500) AND genergy = <34360, 207270) AND goimpuls = <-22.50, inf) AND nbumps = <1.50, inf) THEN label = {1}\nIF goenergy = (-inf, 135.50) AND gimpuls = <378, inf) AND genergy = <32635, 622815) AND goimpuls = (-inf, 10.50) AND senergy = (-inf, 36050) AND nbumps = <1.50, inf) THEN label = {1}\nIF goenergy = (-inf, 106.50) AND gimpuls = <306, 542) AND genergy = <19245, 81890) AND senergy = <750, 12050) AND nbumps = <1.50, 3.50) THEN label = {1}\nIF ghazard = {a} AND goenergy = (-inf, -1.50) AND gimpuls = <153.50, 289) AND genergy = (-inf, 37085) AND senergy = (-inf, 40500) AND nbumps3 = (-inf, 3.50) AND nbumps = <1.50, inf) AND nbumps2 = <0.50, inf) THEN label = {1}\nIF ghazard = {a} AND goenergy = <-65.50, 27) AND gimpuls = <98.50, 346) AND goimpuls = <-70.50, 8.50) AND genergy = (-inf, 64310) AND senergy = <2350, inf) AND nbumps3 = (-inf, 3.50) AND nbumps2 = <0.50, inf) THEN label = {1}\nIF ghazard = {a} AND goenergy = <-50.50, inf) AND gimpuls = <1328.50, inf) AND genergy = (-inf, 1062020) AND goimpuls = <-33.50, 39.50) AND senergy = <850, 38250) AND nbumps = (-inf, 7.50) THEN label = {1}\nIF goenergy = (-inf, 56.50) AND gimpuls = <1253.50, inf) AND maxenergy = (-inf, 65000) AND genergy = <52565, 716085) AND goimpuls = <-60.50, 73) AND senergy = <350, inf) AND nbumps3 = (-inf, 2.50) AND nbumps4 = (-inf, 1.50) AND nbumps2 = (-inf, 2.50) AND nbumps = (-inf, 4.50) THEN label = {1}\nIF gimpuls = <1342, 3508) AND maxenergy = (-inf, 7500) AND genergy = <77100, inf) AND goimpuls = (-inf, 68.50) AND shift = {W} AND senergy = (-inf, 13350) AND nbumps2 = (-inf, 3.50) THEN label = {1}\nIF ghazard = {a} AND goenergy = <-59.50, 45.50) AND gimpuls = <110, 762) AND genergy = <12145, 134125) AND goimpuls = <-53.50, inf) AND senergy = <550, 950) THEN label = {1}\nIF goenergy = (-inf, 128.50) AND genergy = <10495, inf) AND shift = {W} AND senergy = (-inf, 36050) AND nbumps3 = <0.50, inf) AND nbumps2 = (-inf, 4.50) AND nbumps = (-inf, 6.50) THEN label = {1}\nIF goenergy = <-78.50, inf) AND gimpuls = <32.50, inf) AND maxenergy = <250, inf) AND goimpuls = <-74.50, inf) AND senergy = <350, inf) THEN label = {1}\nIF goenergy = (-inf, 176.50) AND gimpuls = <449.50, inf) AND genergy = <49095, inf) THEN label = {1}\nIF ghazard = {a} AND goenergy = <68, 124.50) AND gimpuls = <725.50, 1445.50) AND maxenergy = (-inf, 2500) AND genergy = (-inf, 127635) AND goimpuls = <16, inf) AND senergy = (-inf, 4700) AND nbumps2 = (-inf, 1.50) THEN label = {1}\nIF ghazard = {a} AND goenergy = <15.50, 160) AND gimpuls = <133.50, 732.50) AND maxenergy = (-inf, 5500) AND genergy = <40050, 52010) AND nbumps3 = (-inf, 0.50) AND nbumps2 = (-inf, 1.50) THEN label = {1}\nIF ghazard = {a} AND goenergy = (-inf, 152.50) AND gimpuls = <361.50, 653.50) AND maxenergy = (-inf, 7500) AND genergy = <32680, 36470) AND nbumps3 = (-inf, 0.50) THEN label = {1}\nIF goenergy = <-37.50, 124.50) AND gimpuls = <537.50, 621) AND genergy = <17635, 28105) AND shift = {W} AND nbumps = (-inf, 0.50) THEN label = {1}\nIF ghazard = {a} AND goenergy = <-37.50, 181) AND gimpuls = <240, 470.50) AND genergy = <20485, 27430) AND goimpuls = <-43, inf) AND shift = {W} AND senergy = (-inf, 450) THEN label = {1}\nIF goenergy = <-55.50, 297.50) AND gimpuls = <217.50, 796) AND genergy = <13725, 49585) AND goimpuls = <-42.50, inf) AND shift = {W} AND senergy = (-inf, 1050) AND nbumps2 = (-inf, 0.50) THEN label = {1}\nIF goenergy = (-inf, -1.50) AND gimpuls = <54.50, 195.50) AND genergy = <1510, 12565) AND goimpuls = <-72.50, 28.50) AND shift = {N} AND seismoacoustic = {a} AND seismic = {a} AND senergy = (-inf, 3200) THEN label = {1}\n"
]
}
],
"source": [
"for rule in corr_ruleset.rules:\n",
" print(rule)"
]
},
{
"source": [
"### RSS Measure generated rules"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"IF genergy = (-inf, 126350) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF gimpuls = (-inf, 1210) AND goimpuls = (-inf, 233.50) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF gimpuls = (-inf, 1342.50) AND goimpuls = (-inf, 233.50) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF gimpuls = (-inf, 1410) AND goimpuls = (-inf, 233.50) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF gimpuls = (-inf, 1485) AND goimpuls = (-inf, 96.50) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF gimpuls = (-inf, 1653.50) AND goimpuls = (-inf, 96.50) AND genergy = (-inf, 1006585) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF gimpuls = (-inf, 1752) AND goimpuls = (-inf, 96.50) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF gimpuls = (-inf, 1822) AND goimpuls = (-inf, 96.50) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF goenergy = (-inf, 104.50) AND gimpuls = (-inf, 2168) AND goimpuls = (-inf, 96.50) AND senergy = (-inf, 550) THEN label = {0}\nIF gimpuls = (-inf, 2733) AND genergy = (-inf, 1026530) AND goimpuls = (-inf, 312) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF gimpuls = (-inf, 2733) AND goimpuls = (-inf, 312) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF genergy = (-inf, 1733075) AND goimpuls = (-inf, 312) AND nbumps = (-inf, 1.50) THEN label = {0}\nIF gimpuls = (-inf, 1342.50) AND goimpuls = (-inf, 96.50) AND senergy = (-inf, 650) THEN label = {0}\nIF maxenergy = (-inf, 550) AND goimpuls = (-inf, 312) THEN label = {0}\nIF goenergy = (-inf, 104.50) AND maxenergy = (-inf, 650) AND gimpuls = (-inf, 1210) AND senergy = (-inf, 1550) THEN label = {0}\nIF maxenergy = (-inf, 650) AND gimpuls = (-inf, 1732) AND goimpuls = (-inf, 233.50) THEN label = {0}\nIF gimpuls = (-inf, 1141.50) AND goimpuls = (-inf, 312) AND nbumps3 = (-inf, 2.50) AND nbumps2 = (-inf, 0.50) THEN label = {0}\nIF gimpuls = (-inf, 1141.50) AND goimpuls = (-inf, 312) AND nbumps3 = (-inf, 3.50) AND nbumps2 = (-inf, 0.50) THEN label = {0}\nIF gimpuls = (-inf, 1372) AND nbumps2 = (-inf, 0.50) THEN label = {0}\nIF goenergy = (-inf, 104.50) AND gimpuls = (-inf, 1655.50) AND genergy = (-inf, 1006585) AND goimpuls = (-inf, 96) AND nbumps3 = (-inf, 2.50) AND nbumps2 = (-inf, 0.50) THEN label = {0}\nIF goenergy = (-inf, 104.50) AND gimpuls = (-inf, 1760.50) AND goimpuls = (-inf, 96) AND nbumps3 = (-inf, 3.50) AND nbumps2 = (-inf, 0.50) THEN label = {0}\nIF gimpuls = (-inf, 2892) AND goimpuls = (-inf, 312) AND nbumps3 = (-inf, 2.50) AND nbumps2 = (-inf, 0.50) THEN label = {0}\nIF nbumps2 = (-inf, 0.50) THEN label = {0}\nIF gimpuls = (-inf, 1210) AND goimpuls = (-inf, 96.50) AND nbumps = (-inf, 2.50) THEN label = {0}\nIF maxenergy = (-inf, 750) AND gimpuls = (-inf, 1732) AND goimpuls = (-inf, 96.50) AND genergy = (-inf, 703425) THEN label = {0}\nIF goenergy = (-inf, 104.50) AND maxenergy = (-inf, 850) AND gimpuls = (-inf, 2888) AND goimpuls = (-inf, 96) THEN label = {0}\nIF genergy = (-inf, 31245) AND nbumps3 = (-inf, 0.50) THEN label = {0}\nIF goenergy = (-inf, 105.50) AND genergy = (-inf, 31245) AND senergy = (-inf, 4400) AND nbumps2 = (-inf, 2.50) THEN label = {0}\nIF goenergy = (-inf, 105.50) AND gimpuls = (-inf, 664.50) AND senergy = (-inf, 27100) AND nbumps = (-inf, 3.50) AND nbumps2 = (-inf, 1.50) THEN label = {0}\nIF genergy = (-inf, 31245) AND goimpuls = (-inf, 233.50) AND senergy = (-inf, 24700) AND nbumps2 = (-inf, 1.50) THEN label = {0}\nIF gimpuls = (-inf, 380.50) AND goimpuls = (-inf, 105.50) AND nbumps4 = (-inf, 0.50) AND nbumps = (-inf, 5.50) THEN label = {0}\nIF genergy = (-inf, 31245) AND goimpuls = (-inf, 105.50) AND senergy = (-inf, 27650) THEN label = {0}\nIF gimpuls = (-inf, 664.50) AND goimpuls = (-inf, 105.50) AND nbumps3 = (-inf, 3.50) AND nbumps4 = (-inf, 2.50) AND nbumps2 = (-inf, 4) THEN label = {0}\nIF goenergy = (-inf, 105.50) AND maxenergy = (-inf, 7500) AND genergy = (-inf, 44750) AND senergy = (-inf, 13700) THEN label = {0}\nIF gimpuls = (-inf, 1414) AND genergy = (-inf, 48545) AND goimpuls = (-inf, 233.50) THEN label = {0}\nIF goenergy = (-inf, 104.50) AND goimpuls = (-inf, 96) AND senergy = (-inf, 1950) AND nbumps2 = (-inf, 2.50) THEN label = {0}\nIF gimpuls = (-inf, 1836) AND goimpuls = (-inf, 233.50) AND nbumps3 = (-inf, 0.50) AND nbumps5 = (-inf, 0.50) THEN label = {0}\nIF goenergy = (-inf, 104.50) AND maxenergy = (-inf, 1500) AND genergy = (-inf, 531845) THEN label = {0}\nIF goenergy = (-inf, 104.50) AND genergy = (-inf, 61125) AND goimpuls = (-inf, 96.50) AND nbumps = (-inf, 6.50) THEN label = {0}\nIF goenergy = (-inf, 94.50) AND gimpuls = (-inf, 698) AND genergy = <45830, 105885) AND goimpuls = <-41.50, inf) AND senergy = <3950, 29200) THEN label = {0}\nIF gimpuls = (-inf, 2068.50) AND goimpuls = (-inf, 233.50) AND senergy = (-inf, 4400) AND nbumps = (-inf, 2.50) AND nbumps2 = (-inf, 1.50) THEN label = {0}\nIF goimpuls = (-inf, 96.50) AND nbumps3 = (-inf, 1.50) AND nbumps = (-inf, 2.50) AND nbumps2 = (-inf, 1.50) THEN label = {0}\nIF gimpuls = (-inf, 1139.50) AND nbumps3 = (-inf, 1.50) AND nbumps5 = (-inf, 0.50) THEN label = {0}\nIF goenergy = (-inf, 104.50) AND gimpuls = (-inf, 1139.50) AND genergy = (-inf, 366505) AND nbumps3 = (-inf, 2.50) AND nbumps5 = (-inf, 0.50) AND nbumps = (-inf, 4.50) AND nbumps2 = (-inf, 2.50) THEN label = {0}\nIF goenergy = (-inf, 87.50) AND gimpuls = (-inf, 1655) AND genergy = (-inf, 1505475) AND nbumps = (-inf, 4.50) THEN label = {0}\nIF gimpuls = (-inf, 2185.50) AND genergy = (-inf, 1505475) AND goimpuls = (-inf, 96) AND senergy = (-inf, 5750) AND nbumps2 = (-inf, 2.50) THEN label = {0}\nIF goenergy = (-inf, 87.50) AND gimpuls = (-inf, 1328) AND senergy = (-inf, 85450) AND nbumps2 = (-inf, 3.50) THEN label = {0}\nIF goenergy = (-inf, 87.50) AND maxenergy = (-inf, 4500) AND goimpuls = (-inf, 96) AND senergy = (-inf, 12000) THEN label = {0}\nIF gimpuls = <1148.50, inf) AND genergy = (-inf, 189505) AND goimpuls = <-17.50, 107) AND senergy = <8950, inf) THEN label = {0}\nIF goenergy = <-88.50, inf) AND gimpuls = (-inf, 2917) AND goimpuls = (-inf, 312) AND nbumps3 = (-inf, 1.50) AND nbumps2 = (-inf, 2.50) THEN label = {0}\nIF goenergy = (-inf, 104.50) AND goimpuls = (-inf, 96.50) AND seismic = {a} AND nbumps3 = (-inf, 3.50) AND senergy = (-inf, 20650) THEN label = {0}\nIF gimpuls = <521.50, inf) AND genergy = <57680, inf) THEN label = {1}\nIF goenergy = (-inf, 123) AND senergy = <550, inf) THEN label = {1}\nIF ghazard = {a} AND goenergy = <68.50, 105.50) AND gimpuls = <483, inf) AND genergy = <46530, 51605) AND nbumps = (-inf, 1.50) THEN label = {1}\nIF ghazard = {a} AND goenergy = <7, 58) AND gimpuls = <396, 836) AND genergy = <34315, 43280) AND goimpuls = <-21.50, 28.50) AND nbumps = (-inf, 0.50) THEN label = {1}\nIF ghazard = {a} AND goenergy = (-inf, 160) AND gimpuls = <362.50, 732.50) AND maxenergy = (-inf, 850) AND genergy = <32680, 66275) AND senergy = (-inf, 1350) THEN label = {1}\nIF goenergy = <14.50, 297.50) AND gimpuls = <133.50, 797) AND maxenergy = (-inf, 1500) AND genergy = <27275, 52010) AND nbumps3 = (-inf, 0.50) THEN label = {1}\nIF goenergy = <-37.50, 122) AND gimpuls = <537.50, 796) AND genergy = <16805, 29510) AND goimpuls = <-36.50, inf) AND senergy = (-inf, 250) THEN label = {1}\nIF ghazard = {a} AND goenergy = <-37.50, inf) AND gimpuls = <240, 473.50) AND genergy = <20485, 25310) AND goimpuls = <-43, inf) AND shift = {W} AND senergy = (-inf, 450) THEN label = {1}\nIF goenergy = <-55.50, 124.50) AND gimpuls = <194.50, inf) AND genergy = <9060, inf) AND goimpuls = <-60.50, inf) AND nbumps2 = (-inf, 4.50) THEN label = {1}\n"
]
}
],
"source": [
"for rule in rss_ruleset.rules:\n",
" print(rule)"
]
},
{
"source": [
"## Stratified K-Folds cross-validation"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"from IPython.display import display\n",
"from sklearn.model_selection import StratifiedKFold\n",
"\n",
"N_SPLITS = 10\n",
"\n",
"skf = StratifiedKFold(n_splits=10)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"c2_ruleset_stats = pd.DataFrame()\n",
"c2_prediction_metrics = pd.DataFrame()\n",
"c2_confusion_matrix = np.array([[0.0, 0.0], [0.0, 0.0]])\n",
"\n",
"for train_index, test_index in skf.split(x, y):\n",
" x_train, x_test = x.iloc[train_index], x.iloc[test_index]\n",
" y_train, y_test = y.iloc[train_index], y.iloc[test_index]\n",
"\n",
" clf = RuleClassifier(\n",
" induction_measure=Measures.C2,\n",
" pruning_measure=Measures.C2,\n",
" voting_measure=Measures.C2,\n",
" )\n",
" clf.fit(x_train, y_train)\n",
" c2_ruleset = clf.model\n",
" prediction, classification_metrics = clf.predict(x_test, return_metrics=True)\n",
" tmp, confusion_matrix = get_prediction_metrics('C2', prediction, y_test, classification_metrics)\n",
" \n",
" c2_prediction_metrics = pd.concat([c2_prediction_metrics, tmp])\n",
" c2_ruleset_stats = pd.concat([c2_ruleset_stats, get_ruleset_stats('C2', c2_ruleset)])\n",
" c2_confusion_matrix += confusion_matrix\n",
"\n",
"c2_confusion_matrix /= N_SPLITS"
]
},
{
"source": [
"Rules characteristics "
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": "time_total_s 2.165153\ntime_growing_s 2.956999\ntime_pruning_s 0.562653\nrules_count 166.400000\nconditions_per_rule 4.951229\ninduced_conditions_per_rule 13.649359\navg_rule_coverage 0.168386\navg_rule_precision 0.919191\navg_rule_quality 0.486175\npvalue 0.045394\nFDR_pvalue 0.049264\nFWER_pvalue 0.585424\nfraction_significant 0.808201\nfraction_FDR_significant 0.789743\nfraction_FWER_significant 0.639193\ndtype: float64"
},
"metadata": {}
}
],
"source": [
"display(c2_ruleset_stats.mean())"
]
},
{
"source": [
"Rules evaluation (average)"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": "Accuracy 0.899071\nMAE 0.100929\nKappa 0.047367\nBalanced accuracy 0.535887\nLogistic loss 3.486010\nPrecision 3.486010\nSensitivity 0.117647\nSpecificity 0.954127\nNPV 0.939956\nPPV 0.197777\npsep 0.140239\nFall-out 0.045873\nYouden's J statistic 0.071774\nLift 3.013186\nF-measure 0.073023\nFowlkes-Mallows index 0.901979\nFalse positive 11.100000\nFalse negative 15.000000\nTrue positive 2.000000\nTrue negative 230.300000\nRules per example 23.878503\nVoting conflicts 103.400000\nNegative voting conflicts 7.400000\nGeometric mean 0.180079\ndtype: float64"
},
"metadata": {}
}
],
"source": [
"display(c2_prediction_metrics.mean())"
]
},
{
"source": [
"Confusion matrix (average)"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": " 0 1\n0 230.3 11.1\n1 15.0 2.0",
"text/html": "\n\n
\n \n \n | \n 0 | \n 1 | \n
\n \n \n \n | 0 | \n 230.3 | \n 11.1 | \n
\n \n | 1 | \n 15.0 | \n 2.0 | \n
\n \n
\n
"
},
"metadata": {}
}
],
"source": [
"display(pd.DataFrame(c2_confusion_matrix))"
]
},
{
"source": [
"## Hyperparameters tuning\n",
"\n",
"This one gonna take a while..."
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Best BAC: 0.522949 using {'induction_measure': , 'min_rule_covered': 7, 'pruning_measure': , 'voting_measure': }\n"
]
}
],
"source": [
"from sklearn.datasets import make_blobs\n",
"from sklearn.model_selection import StratifiedKFold\n",
"from sklearn.model_selection import GridSearchCV\n",
"from sklearn.linear_model import LogisticRegression\n",
"from rulekit.params import Measures\n",
"# define dataset\n",
"import numpy as np\n",
"\n",
"N_SPLITS = 3\n",
"\n",
"# define models and parameters\n",
"model = RuleClassifier()\n",
"min_rule_covered = range(3, 15, 2)\n",
"measures_choice = [Measures.C2, Measures.RSS, Measures.WeightedLaplace, Measures.Correlation]\n",
"# define grid search\n",
"grid = {\n",
" 'min_rule_covered': min_rule_covered, \n",
" 'induction_measure': measures_choice, \n",
" 'pruning_measure': measures_choice, \n",
" 'voting_measure': measures_choice\n",
"}\n",
"cv = StratifiedKFold(n_splits=N_SPLITS)\n",
"grid_search = GridSearchCV(estimator=model, param_grid=grid, cv=cv, scoring='balanced_accuracy')\n",
"grid_result = grid_search.fit(x, y)\n",
"# summarize results\n",
"\n",
"print(\"Best BAC: %f using %s\" % (grid_result.best_score_, grid_result.best_params_))"
]
},
{
"source": [
"## Building model with tuned hyperparameters\n",
"\n",
"### Split dataset to train and test (80%/20%)."
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"from IPython.display import display\n",
"from rulekit.params import Measures\n",
"\n",
"x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=True)\n",
"\n",
"\n",
"clf = RuleClassifier(\n",
" min_rule_covered=7,\n",
" induction_measure=Measures.WeightedLaplace,\n",
" pruning_measure=Measures.Correlation,\n",
" voting_measure=Measures.WeightedLaplace,\n",
")\n",
"clf.fit(x_train, y_train)\n",
"ruleset = clf.model\n",
"ruleset_stats = get_ruleset_stats('Best', ruleset)"
]
},
{
"source": [
"Rules evaluation"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": "time_total_s 0.420254\ntime_growing_s 0.559084\ntime_pruning_s 0.106422\nrules_count 38.000000\nconditions_per_rule 4.289474\ninduced_conditions_per_rule 13.921053\navg_rule_coverage 0.329832\navg_rule_precision 0.722841\navg_rule_quality 2.962379\npvalue 0.015023\nFDR_pvalue 0.015570\nFWER_pvalue 0.034304\nfraction_significant 0.894737\nfraction_FDR_significant 0.894737\nfraction_FWER_significant 0.868421\ndtype: float64"
},
"metadata": {}
}
],
"source": [
"display(ruleset_stats.mean())"
]
},
{
"source": [
"### Validate model on test dataset"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": "Accuracy 0.876209\nMAE 0.123791\nKappa 0.303411\nBalanced accuracy 0.746612\nLogistic loss 4.275674\nPrecision 4.275674\nSensitivity 0.600000\nSpecificity 0.893224\nNPV 0.973154\nPPV 0.257143\npsep 0.230297\nFall-out 0.106776\nYouden's J statistic 0.493224\nLift 4.431429\nF-measure 0.360000\nFowlkes-Mallows index 0.871226\nFalse positive 52.000000\nFalse negative 12.000000\nTrue positive 18.000000\nTrue negative 435.000000\nRules per example 12.618956\nVoting conflicts 207.000000\nNegative voting conflicts 69.000000\nGeometric mean 0.732075\ndtype: float64"
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": " 0 1\n0 435 52\n1 12 18",
"text/html": "\n\n
\n \n \n | \n 0 | \n 1 | \n
\n \n \n \n | 0 | \n 435 | \n 52 | \n
\n \n | 1 | \n 12 | \n 18 | \n
\n \n
\n
"
},
"metadata": {}
}
],
"source": [
"prediction, classification_metrics = clf.predict(x_test, return_metrics=True)\n",
"prediction_metrics, confusion_matrix = get_prediction_metrics('Best', prediction, y_test, classification_metrics)\n",
"\n",
"display(prediction_metrics.mean())\n",
"display(pd.DataFrame(confusion_matrix))"
]
}
]
}