{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Classification" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook presents example usage of package for solving classification problem on `seismic-bumps` dataset. You can download dataset [here](https://raw.githubusercontent.com/adaa-polsl/RuleKit/master/data/seismic-bumps/seismic-bumps.arff).\n", "\n", "This tutorial will cover topics such as: \n", "- training model \n", "- changing model hyperparameters \n", "- hyperparameters tuning \n", "- calculating metrics for model \n", "- getting RuleKit inbuilt " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Summary of the dataset" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
genergygimpulsgoenergygoimpulsnbumpsnbumps2nbumps3nbumps4nbumps5nbumps6nbumps7nbumps89senergymaxenergyclass
count2.584000e+032584.0000002584.0000002584.0000002584.0000002584.0000002584.0000002584.0000002584.0000002584.02584.02584.02584.0000002584.0000002584.000000
mean9.024252e+04538.57933412.3757744.5089010.8595200.3935760.3928020.0677240.0046440.00.00.04975.2708984278.8506190.065789
std2.292005e+05562.65253680.31905163.1665561.3646160.7837720.7697100.2790590.0680010.00.00.020450.83322219357.4548820.247962
min1.000000e+022.000000-96.000000-96.0000000.0000000.0000000.0000000.0000000.0000000.00.00.00.0000000.0000000.000000
25%1.166000e+04190.000000-37.000000-36.0000000.0000000.0000000.0000000.0000000.0000000.00.00.00.0000000.0000000.000000
50%2.548500e+04379.000000-6.000000-6.0000000.0000000.0000000.0000000.0000000.0000000.00.00.00.0000000.0000000.000000
75%5.283250e+04669.00000038.00000030.2500001.0000001.0000001.0000000.0000000.0000000.00.00.02600.0000002000.0000000.000000
max2.595650e+064518.0000001245.000000838.0000009.0000008.0000007.0000003.0000001.0000000.00.00.0402000.000000400000.0000001.000000
\n", "
" ], "text/plain": [ " genergy gimpuls goenergy goimpuls nbumps \\\n", "count 2.584000e+03 2584.000000 2584.000000 2584.000000 2584.000000 \n", "mean 9.024252e+04 538.579334 12.375774 4.508901 0.859520 \n", "std 2.292005e+05 562.652536 80.319051 63.166556 1.364616 \n", "min 1.000000e+02 2.000000 -96.000000 -96.000000 0.000000 \n", "25% 1.166000e+04 190.000000 -37.000000 -36.000000 0.000000 \n", "50% 2.548500e+04 379.000000 -6.000000 -6.000000 0.000000 \n", "75% 5.283250e+04 669.000000 38.000000 30.250000 1.000000 \n", "max 2.595650e+06 4518.000000 1245.000000 838.000000 9.000000 \n", "\n", " nbumps2 nbumps3 nbumps4 nbumps5 nbumps6 nbumps7 \\\n", "count 2584.000000 2584.000000 2584.000000 2584.000000 2584.0 2584.0 \n", "mean 0.393576 0.392802 0.067724 0.004644 0.0 0.0 \n", "std 0.783772 0.769710 0.279059 0.068001 0.0 0.0 \n", "min 0.000000 0.000000 0.000000 0.000000 0.0 0.0 \n", "25% 0.000000 0.000000 0.000000 0.000000 0.0 0.0 \n", "50% 0.000000 0.000000 0.000000 0.000000 0.0 0.0 \n", "75% 1.000000 1.000000 0.000000 0.000000 0.0 0.0 \n", "max 8.000000 7.000000 3.000000 1.000000 0.0 0.0 \n", "\n", " nbumps89 senergy maxenergy class \n", "count 2584.0 2584.000000 2584.000000 2584.000000 \n", "mean 0.0 4975.270898 4278.850619 0.065789 \n", "std 0.0 20450.833222 19357.454882 0.247962 \n", "min 0.0 0.000000 0.000000 0.000000 \n", "25% 0.0 0.000000 0.000000 0.000000 \n", "50% 0.0 0.000000 0.000000 0.000000 \n", "75% 0.0 2600.000000 2000.000000 0.000000 \n", "max 0.0 402000.000000 400000.000000 1.000000 " ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from scipy.io import arff\n", "import pandas as pd\n", "\n", "df_full = pd.DataFrame(arff.loadarff('./seismic-bumps.arff')[0])\n", "df_full['class'] = df_full['class'].astype(int)\n", "df_full.describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Decision class distribution" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAgMAAAGLCAYAAABa0JF/AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABE1UlEQVR4nO3dd3xc1YE2/ufe6UVt1Ltsucu9G4PBBWxjShqQhBACm2R3k81mQ3bfN/tu/2V3s5stIbRAIBASIJRgOhiDwQ033Lst2apWb9Prvff3h4yxLMmW7Zk5U57v56OP7JmR9EiyNY/OOfccSdM0DURERJS2ZNEBiIiISCyWASIiojTHMkBERJTmWAaIiIjSHMsAERFRmmMZICIiSnMsA0RERGmOZYCIiCjNsQwQERGlOZYBIiKiNMcyQEQJbfPmzbj11ltRUlICSZLw+uuvi45ElHJYBogooXm9XsyYMQOPPvqo6ChEKUsvOgAR0cWsXr0aq1evFh2DKKVxZICIiCjNsQwQERGlOZYBIiKiNMcyQERElOZYBoiIiNIcryYgooTm8XhQV1d37u/19fXYv38/HA4HKioqBCYjSh2Spmma6BBERCPZuHEjli5dOuT2e++9F7/97W/jH4goBbEMEBERpTmuGSAiIkpzLANERERpjmWAiIgozbEMEBERpTmWASIiojTHMkBERJTmWAaIiIjSHHcgJEoBgbCCLncQXZ4gus++7nIH0X32db8vjIiqIaJqUFQVEUWDop590TRsMv4VIMlnX3SAzgAYbYDRDpjsgCkDMGYM/Pmz26y5QEYJkFEEZBQDeqPoLwMRXSGWAaIE1+8L4WibC829voEnfHcQ3Z7QoCd7dzBydR/E3HCVKSXA6hgoB5nFAwUhsxRwVAN54wdejLar/BhEFCssA0QJQtM0NPT4cKzNhWNtLhxtHXjd6gyIjjYKGuDrGXjpODTM/dJAOcgbD+RN+Px1wRTAnh/3tEQ0GLcjJhLAH1JwvN2Fo+c98Z9od8MbUoTkaTB/XcjHBTBQEopnAiUzgZJZQOmcgVEGIooblgGiOKjtcGPTyS7sa+7HsTYXGrq9UBPof57QMjAcRzVQNg8onwdULQHyJ4hORJTSWAaIYsATjGBrbTc2nezC5pNdONPvFx3pohKuDFwoowQYez0w9oaBl4wi0YmIUgrLAFGUHGl1YtPJLmw80YV9TX0IK8nzXyvhy8CF8iaeLQbXA2OuH7i6gYiuGMsA0RVy+sLYXDvw5L+ltgud7qDoSFcs6crA+XQmoHopMPlWYOLNXG9AdAVYBoguw8kON9471I6NJztxsMUJJZEm/q9CUpeB80k6oPIaYPJtwORbgMwS0YmIkgLLANEldHuCeGN/K9bubcGRVpfoODGRMmVgEGngyoSaLwDT7wLsBaIDESUslgGiYQQjCj442oG1e89g88kuRFJkBGAkqVkGziPrgXE3ArPuBiasGthhkYjOYRkgOs/hM078YVcT3jrQClfgKnf1SyIpXwbOZ80Dpt8JzLwbKJoqOg1RQmAZoLTnC0Xw1oFWvLCzCQdanKLjCJFWZeB8xTOAWfcAM746cP4CUZpiGaC0dbTVhRd2NeKNfa1Xv7d/kkvbMvAZUyYw8+vA/O8CudWi0xDFHcsApZ2NJzrx6Md1+LShT3SUhJH2ZeAcCR/MfhS2mpW4pjpPdBiiuOFBRZQ2PjregYc21GF/c7/oKJSgVEsuHthlh3vbTkwrzcJ3l4zFzdOKoZMl0dGIYoojA5TyPjjagYc/qsXBNF0PMBocGRiws/zbuKt22aDbyh0W/MniMbhrXgUsRp2gZESxxTJAKUnTNLx/ZKAEpOreANHEMgBoOiNWS4/huMc67P3ZVgPuXVSF7y4ZC5uJg6qUWlgGKKVomob3DrfjoQ21ON7uFh0nabAMAI1lt+H6uq9e8nF5dhP+asV4fG1+BacPKGWwDFBKUFUN7xxqw8Mf1eJkh0d0nKTDMgDcr/8ZPvJUjvrx4wrs+MmqSVgxpTCGqYjig2WAkpqqanjrYCse/qgOdZ0sAVcq3cvAEaUSa8I/u6K3XTQ2F3+3ZjKmlmZFORVR/LAMUNLa29SHv3/tMI62cU3A1Ur3MvD3+D6eCyy+4reXJOD2GSX4m1WTUJptiWIyovhgGaCk4/SF8R/rjuHFT5vBf73Rkc5loEfKxTz/g1Bx9VcKmPQyvrW4Ct9fOg6ZZp5/QMmDS2Ipqbyyuxn/8d5x9HhDoqNQingVy6NSBAAgGFHxxKbTeGV3C36wbBy+sbASBp0clfdNFEscGaCkcLLDjb977RB3DYyRdB0ZCMCERcGH0afZY/L+x+TZ8H9XTcSqqcUxef9E0cKRAUpo/pCCX26oxVNbTqf8McIUfx/J18SsCABAfbcXf/bcXiybVID/+NI0FGSaY/axiK4GRwYoYX1wtAP//OZhnOkPiI6S8tJxZECFhDXh/8IxpSQuHy/basC/3FaD22eWxuXjEV0OjgxQwmnp8+Gf3zyCD491io5CKWy/PC1uRQAA+n1h/PDF/Vh/pAM//cJUOGzGuH1sokthGaCEEVZUPLWlHr/ccBKBsCo6DqW4JyOrhXzcdw61YWd9L372pWm4kRsWUYLgNAElhIZuL77/wl6eIyBIuk0TNMtluM73c9Ex8KXZpfjn22p4GSIJx2teSLi3D7ZizUObWQQobl5QbxIdAQCwdu8ZrPzFZmyp7RIdhdIcRwZImEBYwT++fhAv72kVHSXtpdPIgEvKwDz/IwgisX4bv3tBBf5uzWRYjZy9pfjjyAAJcarTjVX/+xGLAMXdO9INCVcEAOD5nU1Y9eAW7KrvFR2F0hDLAMXdiztOY/WDm9DQx10EKb7C0OOhwCrRMUbU1OvDV3+9Hf/2zlFEFC6ipfjheBTFTSCs4IHnd+Ld430AeA48xd82eR7a1BzRMS5K1YAnt9TjSKsLj909G9lWXoJIsceRAYqL2nYnVvz8g7NFgEiMR0JrREcYtW2nenDbI5+gtsMtOgqlAZYBirnnt9VhzUNb0OJWREehNHZcnoBPI2NFx7gsTb0+fPGxbdhwrEN0FEpxLAMUM4Gwgj97Zhv+7s0TCKmcFiCxnlESd63AxXiCEXznd7vx2MY60VEohXHNAMVEc48HX3t8K0cDKCF0yfl4yTdfdIwrpmrAz9edwIl2N/7zy9NhNkTnyGWiz3BkgKJuR20bVv9iI4sAJYxXtBVIhR93b+xvxV1PbEeHi4d3UXQl//8OSijPbTyEe57eDU+E0wKUGPySBb8KLBcdI2oOtDhx2yNbcaC5X3QUSiEsAxQViqLgH5/fiH9Y14iwxn9WlDg+kBbDrVlFx4iqDlcQdz6xHa/vOyM6CqUI/tSmq+b2eHHPg2/hd4e80Lh/ACUQFTIeCt4sOkZMBCMq/uql/fjZe8egqtxVnq4OywBdldauHnzlF+9hW1fibe9KtEc3A3VKkegYMfXEptP47u/3IBjhGh26ciwDdMVO1Lfgzoc+wgmvRXQUomE9HkrNUYELfXisA3/y293wh1gI6MqwDNAV2XnoBL7x5Da0hFNrLpZSR4NciQ3hGtEx4mZrXTe++fROeIIR0VEoCbEM0GXRNA3vbdmN7zx/EF2qTXQcohE9r94kOkLcfdrQh7uf3AGnLyw6CiUZlgEaNVVV8fy7m/DjtxvgAkcEKHE5pSw8G7hWdAwhDrQ48dUnd6DHExQdhZIIywCNSiQSwRN/XI+fbuqBT+IaAUpsb0hLEUL6Lmo91ubCnU9sRyc3J6JRYhmgSwqFwnji5ffw4G4vgrJZdByiiwrDgEf8yXkOQTSd6vLiq0/uQJebIwR0aSwDdFH+QAC/evFNPLrPxyJASWGzvACdWqboGAnhdJcXX+eUAY0CywCNyO3x4rHn1uLJg0H4dHbRcYguSYOER0JrRMdIKLWdHtz91E70eUOio1ACYxmgYfU5XXjs93/E744r8OizRMchGpWj8iTsi1SKjpFwjre78Y3f7ORVBjQilgEawu3x4qkXXsPLtSqceofoOESj9htltegICetIqwv3PL0TrgALAQ3FMkCD+AMB/PaVN/HH2iB6jAWi4xCNWrtchLXBuaJjJLSDLU7c/8yn3LqYhmAZoHNCoTCeW/sOXjzYhw5jqeg4RJflZW2F6AhJYXdjH37y6iHRMSjBsAwQgIEjiF9+ez2e39WCMybOuVJy8UlWPB5YJjpG0nht3xk8+nGd6BiUQFgGCJqm4Y31G/Hs5hNoMFeLjkN02dZJS+DTeOnr5fjv9Sfw3qE20TEoQbAMpDlN07Bu4zb8Zv1e1JknAJBERyK6LAp0eCjAhYOXS9OAB14+gEMtTtFRKAGwDKS5Lbv24sl3tuGYaRI0FgFKQrvkWWhQ80XHSEr+sIJv/+5TdHDb4rTHMpDGdu0/jKfWrsch/Xio/KdASerx8M2iIyS1DlcQ3352N/whXmGQzvgMkKYOHa/FMy+/if0Yg3AaH+hCye2UPAabwpNEx0h6h8448cDL+6FpmugoJAjLQBqqa2jC0y+9joOhArikDNFxiK7Y79WVoiOkjPcOt+O/158QHYMEYRlIM82t7XjyhbU42ifhjL5YdByiK9Yn5eD3gcWiY6SURz8+hbV7W0THIAFYBtJIn9OFp/7wGk60OXHaMlF0HEpwZ1wqvrHWj9yfu2H5Nxem/cqD3a0Xn1cORjT83YYAKh90w/SvLlQ96MbT+z4/IOeDUxFMeNiDzJ+5cM9rfoSUz4elnQENEx72oLFfHVW+17AcCnRX9snRiH6y9hB2N/SKjkFxphcdgOIjEongxTfW4WhdIxodCxBW2ANpZH1+DYuf9mLpGD3eu9uKfKuE2l4VOeaLX3Fy5x/96PBo+M1tFoxzyGhzq1DPPt+rmoavr/Xjb681YmW1Hl95xY9f7wnjL+YbAQA/+TCAP5trQGX2pf9tBmHCo4Ebr/rzpKFCERV/+vs9eP37i1HusIqOQ3HCMpAm1m/ejq2f7kNX3gz0hY2i41CC+89PgijPkvHM7ZZzt43JufiT9Lq6CDY1RHD6hxlwWAZKQ9V5T+zdPg3dPg3fm2eEWS/htgl6HOsaGGnY1hzBp60KHrl5dBsHbZQXokfjepdY6fGG8P0X9mLtn18DvY6/OKQDfpfTwMFjJ/H6+x/DnVGFhnCm6DiUBN48EcHcYh3ueMWHgv9yY9YTHjy5J3SJtwljbokOP/8kiNL/dWPCwx789foA/OGBoYF8q4Riu4T1pyLwhTVsaVIwvVCHsKLhz98J4IlbLNDJl97rQoOEh4NrovJ50sgOtjjx0IZa0TEoTlgGUlxnTy9eeP09dIf0OKLy8CEandN9Kn61O4TxDhnvf8OKP59rxF+uC+DZ/SMXgtN9KrY2KTjcqeK1u6x4cJUJfzwaxvfeHdjQRpIkvHyHBT/dHETNYx7MKpJx/ywD/mNrCEur9DDrgcVPezHxEQ8e2TXyxzko1+CwUhb1z5mGenTjKexr6hMdg+KA0wQpLBgM4fm176KuuR21OQuhKNxhkEZH1YC5JTr8+/KBYftZxToc7lTx+J4w7p05/DSTqgGSBDz/JQuyzBIAHf53JfCVl/147GYzLAYJ11bo8el37Ofe5mSPgt8dDGPfn9qw5BkvfrjAiNXj9Zj6mBdLKnWYXjh0geBTEW49HC+KquGBlw/gnb+8FlYjny5SGUcGUpSmaXjrw03Ytf8Q2nNnwa3wPzKNXnGGhCn5g388TM6T0eQceaV/cYaM0gzpbBH4/G00AC2u4d/uT98O4H9uMkHVgH3tKu6oMaDAJuP6Kh02NQy9cqFVLsZboVlX9knRFanv9uLf3jkmOgbFGMtAitp98Cje/WgrnNkT0BK2iY5DSWZxuQ4negY/gZ/sUVGZNfKPjMXlOrS6NXhC2qC3kSWgLHPo2/1mbwgOi4TbJhqgnP1QYeXz18owu+H9QbvpCj4bulrP72zCxyc6RcegGGIZSEEtbR148Y334IIFx5RC0XEoCf1ooQk7WhT8+5Yg6npVvHAojF/vDeH78z6fIvjbDwP45mv+c3//+jQDcq0S7nvDj6NdCjY3RvA3HwRx/0wDLIbBU1SdXhX/uiWIh1cPTEPkWCRMzpPx4I4QtjdHsKE+gsXlg0ezPJIdT/mXxvCzpov5P388iD7vxReRUvJiGUgxPn8Az7/2Ds50dKHBMpEnEdIVmVeqw2t3WfCHw2FMfcyDn24O4sGVZtw9/fNzLNo82qBpA7tRwgf3WNEf0DD3117cvdaPWyfo8dDqoZcL/nBdAD9eZEJJxuc/gn77BQtePBLGLX/w42+uMWFe6eD1Au9KS+AHL4sVpcsdxN+uPSQ6BsWIpPFkipShaRqef+1dvP3hZkSKarDblys6EiWJBvPXRUe4qAh0WBp6EM0q/02L9t93zMBX5vBqjlTDkYEUsnXXPqzfvB05hSU44M8RHYcoanbIc1kEEsS/vHkELX0+0TEoylgGUkTTmTa88s56mIwGHNFKEdb4raXU8Vj4ZtER6Cx3MIIfv3wAqspB5VTCZ4wUEIlE8Md3P0RnTx8Uxxg0BSyXfiOiJFErV2NbeLzoGHSenfW9eGrradExKIpYBlLAJ7v3Y++hoygrK8MOV7boOERR9Vt1legINIz/Xn8SJzvcomNQlLAMJLmunj68uX4TzCYzjoQL4VN5pCuljh4pF38ILBQdg4YRiqj4l7eOiI5BUcIykMQ0TcOb6zeipb0D5vwKHPfxuFFKLa9iOVSw4CaqT+p68OHRDtExKApYBpLYviPHsfXTfSgtKsInrhzuKUApJQAzHgvcKDoGXcK/v3sMYWXkbaopObAMJCmP14fX130ERVXRpCtGX8Rw6TciSiIb5GvQr3Er7UR3utuL329vFB2DrhLLQJJ6f9M2nDjViNySSuxzZ4iOQxRVKiQ8HFwjOgaN0i831KLfx62KkxnLQBKqa2jCB5u3oyDPgZ2eXCicHqAUs1+ejuNKsegYNEpOfxgPflgrOgZdBZaBJBMKhbH2vY/g8njhtZWgNWQSHYko6n4dWS06Al2m53Y04lSXR3QMukIsA0lm88492H/0OCrLS7HHkyk6DlHUNctlWBeaLjoGXaaIquHf3zkmOgZdIZaBJNLe1YO3N2yG3WpDi+qAk4sGKQW9oN4kOgJdoQ3HO7Gltkt0DLoCLANJQlVVvL5uA9o6u1FcVIC9HrvoSERR55Iy8HTgetEx6Cr869vHoPDcgqTDMpAk9hw6hu17D6KytBjH/RnwKnrRkYii7m1pKYLgiFcyO9HhxoufNomOQZeJZSAJBIMhvPfxVmgaYLZl4ABHBSgFhaHHw4GVomNQFPzig5NwB8KiY9BlYBlIArv2H8aJUw2oKC3GYa8NAZ4/QCnoE3k+2tQc0TEoCro9ITzycZ3oGHQZWAYSnM8fwPubt8FoNEAymHCIowKUoh4JcZOhVPLbTxrQ6Q6IjkGjxDKQ4Lbt3o9TDc0oLynCEa8NYY3fMko9x+UJ2B0ZIzoGRVEwouLprQ2iY9Ao8ZklgbncHqzfvB02mxXQGXGEowKUop5RVomOQDHw/I5GuLh2ICmwDCSwLbv2oulMO8qKC3HUa0OQowKUgjrlfLwUnC86BsWAOxjBczt4iFEy4LNLgurtd+LDrTuRnZUBTdLjsJent1FqekW7EfxRlLqe3tqAQFgRHYMugf8DE9S23fvR1tGF4oJ8HPNZeQUBpSS/ZMHjgWWiY1AMdXuCeGVPi+gYdAksAwmo3+XGhk92ITsrE5os8woCSlnrpWvh1qyiY1CMPbn5NHclTHAsAwlo+54DaOvoRnFBHk77LfBzVIBSkAIZDwVvFh2D4qCp14d1h9tFx6CLYBlIMC63Bxu27kRmhh06nQ7HuVaAUtRe3QycUgpFx6A4+e22etER6CJYBhLM9r0Hcaa9EyVF+egN69EZNoqORBQTj4c4KpBOPm3ow+EzTtExaAQsAwnE4/Vhw9adsNtt0Ot0OO7jXCqlpga5EhvCNaJjUJw9/QlHBxIVy0AC+fTAETS3tqO0MB8RDTjFMkAp6jn1JtERSIC3D7Shyx0UHYOGwTKQIBRFwZZde2E2maDX61Hvt3CTIUpJTikLvw1cJzoGCRBSVLywk8cbJyI+2ySIk6cbcbqpBYUFuQDAKQJKWW9IyxCBXnQMEuS5nY0IRVTRMegCLAMJYs/BYwiGQrBbregL69ERMomORBR1IRjxiH+l6BgkUJc7iA+PdYiOQRdgGUgAfU4Xdh04hLycgbPcT3BUgFLUFnk+OrVM0TFIsDf3t4qOQBdgGUgAB46eRGdPH/Ic2VA0oJZlgFKQBgmPhNaIjkEJ4OMTnfAEI6Jj0HlYBgRTFAWffLofZqMROp2OCwcpZR2RJ2FfpFJ0DEoAwYiK9Ue4I2Ei4bOOYHUNzahrbEJRQR4ALhyk1PUbZbXoCJRA3jrAqYJEwjIg2O6DRxAIBGG3WdEf0aGdCwcpBbXLRXgtOFd0DEogW+u60ecNiY5BZ7EMCOR0e7Br/2Hk5mQDAE7wHAJKUS9pN4qOQAkmrGh4j4cXJQyWAYH2HzmOzu5e5OfmQNOAOr9FdCSiqPNKVjwRWCo6BiUgThUkDpYBQVRVxbbdB2A8u3CwK2zgUcWUkt6XlsCnmUXHoAS0s74Hna6A6BgElgFh6hqaUVvfhOKzCwebAvxhSalHgQ6/DPB0QhqeqgFvH2wTHYPAMiDM7oNH4PMHYLcNXD3QHOTCQUo9u+RZaFTzRMegBPbWQU4VJAKWAQFc5xYOZgEAvIqMnrBRcCqi6Hs8zFEBurh9Tf1o7vWJjpH2WAYEOFnfhK6ePuTnDmw/3MwpAkpBp+Qx2BSeJDoGJQFOFYjHMiDAiVP1UDUNev3AyW1NnCKgFPQ7dZXoCJQk3uRVBcKxDMRZOBzBwaO1yMqwAwAUDWhlGaAU0yvl4LnANaJjUJI41ubCqS6P6BhpjWUgzhpaWtHZ0wtH9sB6gdagCRGeRUAp5nUshwJeKkujt7W2W3SEtMZnoTirrW9CIBiExTwwGsCrCCjVBGHCI4GbRMegJLOrvld0hLTGMhBHmqZh/5HjMJtMkCQJABcPUurZKC9Cr2YXHYOSzE6WAaFYBuKos6cXTa3tcJy9pLAvrIdb0QtORRQ9GiQ8FFwjOgYloW5PEHWdXDcgCstAHNXVN8Hp9pxbPMhdBynVHJRrcEQpFR2DkhSnCsRhGYijo7WnIUsSZHngy871ApRqnoqsFh2BktjO+h7REdIWy0Cc+PwBHDl5CjnZmQCAoCqhI8RdByl1nJFL8FZolugYlMQ4MiAOy0CcnGpsRndvPxxZA+sFOkJGaJAEpyKKnhdVXkFAV6fNGUBTD7cmFoFlIE5q65ugKAqMRgMAoDtsEJyIKHo8kh1PBW4QHYNSwA5OFQjBMhAHqqpi3+Hj504oBIBuThFQCnlHuh5+8N80Xb2dpzlVIALLQBy0tHWgrbPr3K6DAEcGKHVEoMPDAS4cpOjgIkIxWAbioK6hGV6f/9zIgE+R4VO5VSulhh3yXLSoDtExKEW09PnR2u8XHSPtsAzEQdOZdsiyfG7XwR6OClAKeTTETYYoujg6EH8sAzGmaRpONTbBZrWcu41TBJQqTsrjsD0yTnQMSjFcNxB/LAMx1u9yo7vPOXjxIMsApYhn1ZWiI1AK2tXAMhBvLAMx1tbZDY/XN6gMcJqAUkG3lIc/BBaKjkEpqKHbi1BEFR0jrbAMxFh7ZzciEQVGw0ABCCgyPDyciFLAq1gOFVwIS9GnakBjj1d0jLTCMhBjZ9o7IZ230SCnCCgVBGDGrwIrRMegFHa6m2UgnlgGYkjTNNTWN8HG9QKUYjbI16Bfs4mOQSnsdBfLQDyxDMSQ0+1Bd18/7FaWAUodKiQ8HOTlhBRb9d0e0RHSCstADLV3dsPj9cJu+/yyQi4epGS3T56O40qx6BiU4uo5TRBXLAMx1NbZjXBYgck4sGd7QJXg5uJBSnJPRrj1MMUepwnii2UghgYWD36+erCfowKU5JrlMqwLTRcdg9JAjzcEpz8sOkbaYBmIkYGdB5thtZrP3eZReBkWJbfnuckQxRGnCuKHZSBGXB4vOrt7By0eZBmgZOaSMvFMYInoGJRGTndxEWG8sAzESPvZnQcz7CwDlBrelm5AEJzqovjhyED8sAzESHtXN0KRyLmdBwGWAUpeYejxcIBTBBRf3HgoflgGYqTP6QY0bdACQpYBSlafyPPRpuaIjkFphlcUxA/LQIx09/ZBpxv85O9lGaAk9UiImwxR/DX2eKFpmugYaYFlIEY6u3thMhnP/T2oSghr/HJT8jkmT8TuyBjRMSgN+UIK2l0B0THSAp+dYkBVVfT0O2E2fl4GfBwVoCT1jLJKdARKY+1OloF4YBmIAbfXB78/MGhkwK/yS03Jp1POx8vBeaJjUBrr58ZDccFnqBhwutwIhkIsA5T0XtZuBH9MkEj9vpDoCGmB/8tjwOn2IBgMDZom8HOagJKMT7LgicAy0TEozfX7ODIQDywDMeD2eKFq2qCrCTgyQMnmA+lauDXrpR9IFEN9LANxwWeoGPD4/ENuC7AMUBJRIOOh4M2iYxDByWmCuOAzVAz4fH5ceGUspwkomezRzcQppVB0DCKODMQJy0AMON0eyOftPAhwZICSyxMhjgpQYuDVBPHBZ6gY6HO6Bp1JAACqoCxEl6tBrsSG8BTRMYgAAL5gRHSEtMAyEAP9LjcMBv2g27ihJiWL36s8kIgShz+siI6QFlgGokxVVbg8XhguGBnQNGmEtyBKHP1SNp4NXCs6BtE5/hDLQDywDESZzx9AKBSGQc+RAUo+b0hLEYH+0g8kihMfy0BcsAxEWSAYQkRRoNcPvnqAawYo0YVgxKN+ThFQYuE0QXywDESZpmnQNA3SBVcTcJqAEt1meQE6tUzRMYgG4TRBfLAMRJmqqcBwZUBQHqLR0CDhYV5OSAkopKiIKBxbjTWWgSjTtIGXC8cBWAYokR2RJ+FApFJ0DKJhRVT+BI01loEoU1UVGjhNQMnlN8pq0RGIRmTS86kq1vgVjrKBNQPD3B7/KESj9lpwrugIRMMy6uUhv1xR9LEMRJmmacAwIwOc8SIiunxmjgrEBb/KUXZuZOCCIsuRASKiy2c28JC3eGAZiLJzlxaCawaIiK4Wy0B8sAxEmXp21euFU1wcGSAiunxmA5+m4oFf5SgbcdMhQXmIiJKZSc+RgXhgGYgyVRu4tPDCRQN6iXWAiOhycWQgPvhVjrLPFhBeOE1glnk9ARHR5eKagfhgGYgyVR3YgvDCBYQsA0REl4/TBPHBMhBlmqYNuwMhywAR0eXjNEF88KscZdpw2w+CZYCI6EpwmiA+WAaizGI2Qa/TIxyJDLqdZYCI6PJxZCA++FWOMovFDINBjwjLABHRVTNzzUBcsAxEmdVshkHPkQEiomiwGFkG4oFlIMqsFpYBIqJoKcw0i46QFlgGokyWZdjtVoTDF5QBnSIoERFR8ip3WEVHSAssAzGQZbcPGRmwcGSAiOiyleVYREdICywDMZCVaR8yMmBiGSAiuiySxDIQLywDMZCVkQFFGTwtoJcAg8RCQEQ0Wvl2E3cgjBOWgRiwWodf8MLRASKi0eN6gfhhGYgBq9k89KQicN0AEdHlKOcUQdywDMSA1WIGhtmW2MorCoiIRq0shyMD8cIyEANWixmSJEFVB48E5OgjI7wFERFdqNzBkYF4YRmIgZE2HsoxhAUlIiJKPuUcGYgbloEYsJgHzie48PJCB0cGiIhGjdME8cMyEANWixn6YUYGsvQRyBj+iGMiIvqcTpZQks2tiOOFZSAGMu02WC1m+APBQbfLEpDN0QEioksqyjRDr+NTVLzwKx0DOp0OFSVF8Pn8Q+7jugEiokvjzoPxxTIQIxUlRQiFhz7x84oCIqJL44ZD8cUyECP5uQ4AgHbBfgMOjgwQEV3SpKIM0RHSCstAjBTkOWDQ64eMDnBkgIjo0mZV5IiOkFZYBmIkPzcHVqsF3gvWDdh1Cg8sIiK6CKNOxtTSTNEx0grLQIxkZ2YgOzMDPn9g0O2SxNEBIqKLqSnN5GmFccYyECOSJKGytHjIyADAKwqIiC5mVjmnCOKNZSCGSosLoChDDyfiyAAR0chmV2aLjpB2WAZiqCDXAQ28ooCI6HLM5uLBuGMZiKGCXAfMJuOQnQgHygC3JSYiulBRphkl2dxwKN5YBmIoPzcHNosFPv/gdQNmWeNUARHRMGZVZIuOkJZYBmLIZrUgNycL3guuKACAElNwmLcgIkpvnCIQg2UghiRJQlVZKfzDlIFiY0hAIiKixMbFg2KwDMRYcWEeVGXoJkPFpiAkrhsgIjrHqJNRU5IlOkZaYhmIseKCfMg6GeHI4DUCJlmDg+sGiIjOmVySCbOBmw2JwDIQY5VlxcjKsMPp8gy5r5jrBoiIzpnNxYPCsAzEWHZmBspLiuB0uYfcxzJARPQ5Hk4kDstAHNRMrEYgOPSJv9gYgsx1A0REkCXgmupc0THSFstAHFSWFkOn0w05ztgoayjiVQVERJhRno08u0l0jLTFMhAHlWUlyM7MgNM9dN1AmXnoZYdEROlmxeRC0RHSGstAHGRl2FFZVgync+i6gXKuGyAiwo1TWAZEYhmIkynjqxEMDZ0SyDFEYNfxEkMiSl8VDismFGaIjpHWWAbiZGxFKYwGw7ALCcs4OkBEaWz55ALREdIey0CcjKkoRa4jG739riH3caqAiNLZjVwvIBzLQJyYTSZMnVA97H4DJaYg9NLQLYuJiFJdlsWAeWMcomOkPZaBOJo0bgxUTYOqDn7iN8gaqnhVARGloZU1hTDo+FQkGr8DcVRdVT6wNfEwlxiOt/oEJCIiEmvN9BLREQgsA3FVkOtARUkR+oZZN1BiDMEmKwJSERGJ4bAZsZi7DiYEloE4kiQJ06dMgD8QgKZpF9wHjOPoABGlkZU1hdBziiAh8LsQZxPHVsFiNsPr8w+5j1MFRJRO1kzjFEGiYBmIs+rKMoypKEVHd8+Q+7L1CvINPKuAiFJfnt2IRZwiSBgsA3EmyzIWzp4Ovz845KoCgKMDRJQeVk0tgk6WRMegs1gGBJg+eTxysjLQ5xy6kLDa4oeOxxoTUYq7e0Gl6Ah0HpYBAQrzcjF14jh0dvcOuc8ka6jgngNElMLmVzkwuThTdAw6D8uAIPNm1AAAwuGhhxRxqoCIUtm911SJjkAXYBkQpGbiOBTkOdDVM3R0oMwUhJl7DhBRCirOMmNlDc8iSDQsA4LYrBYsmDl12HUDsgSMswy99JCIKNndvaCCewskIH5HBJoxZSJMRiO8fu45QESpz6iX8bX5FaJj0DBYBgQaP6YCFWXF6OwaOlWQa4ggl3sOEFEKuWVaMXLtJtExaBgsAwLp9Xosmj0dHp9vyPbEADDV5hWQiogoNrhwMHGxDAg2bdJ4ZGdmoN/lHnJftcWPDN3Qqw2IiJLNzPJszCjPFh2DRsAyIFhpUQEmVY8Zds8BWQKm24ced0xElGy+xVGBhMYyIJgkSZg/swaKqiCiDL2ccILVBysvMySiJJZnN+HmacWiY9BFsAwkgGmTxqMg14GOrqGHF+kkYBpHB4goiX19fjmMej7dJDJ+dxJAZoYd1y+ci75+57CHF02y+rgJERElJYNOwt0LeQ5BomMZSBCL581EniMHncPsSGiQNdTwygIiSkIra4pQmGkWHYMugWUgQRTkOnDd/Fno6ukb9jLDKTYvDNLQUQMiokQlS8APlo0XHYNGgWUggVy3YA4c2Vno6u0bcp9J1jCFowNElES+OKsME4syRMegUWAZSCAlhfm4Zs4MdHb1jLgJkY6jA0SUBIx6GQ/cNEF0DBolloEEc/3COcjKsKO3f+gBRhadikk8s4CIksA3F1aiNNsiOgaNEstAgikvKcL8mdPQ3tk17OjAdLsHMobeTkSUKDLMevzFsnGiY9BlYBlIMJIk4fpFc2CzWuF0Dd1fwKZTMY6jA0SUwP7s+mpkW42iY9BlYBlIQNWV5Zg9bTJaO7uGvX8GRweIKEEVZppw/+IxomPQZWIZSECSJGHporkwG41weYZeQZClV7jvABElpB8unwCLUSc6Bl0mloEENWncGEyfPB5n2jqGvX9WhptnFhBRQhmbb8Nd88pFx6ArwDKQoCRJwrLF86HX6eDxDV0jYJQ1zM8cesUBEZEo/2flROhkSXQMugIsAwmsZkI1aiaOw5nW4UcHxln9KDIG45yKiGiomeXZWDWVJxMmK5aBBKbT6bD82gWQdTLcw6wdAIBrspyQuJiQiAT729WTREegq8AykOBm1UzEvBlT0Ximbdh9BxyGCBcTEpFQSyfmY8HYXNEx6CqwDCQ4WZZx+003wJGViY6unmEfMzvDDQsXExKRAAadhJ+sniw6Bl0lloEkUF5ShJuuX4Tu3n6EI5Eh93MxIRGJ8r0bxvEwohTAMpAkVly7EOOqytHY0jbs/eOtfhRyMSERxdHk4kxuO5wiWAaShN1mxW03Xg9FUeDxDr8dMRcTElG86GQJ//WV6TDo+DSSCvhdTCJzZ9Rg3owaNLa0DruYMNcQwWQuJiSiOPjeDdWYWpolOgZFCctAEpFlGV9YuRTZmRno6B5+MeHcDDfMXExIRDE0Lt+KHywbLzoGRRHLQJKpKC3GjUsWobunDxEuJiSiONNJwC/umg2jnk8fqYTfzSR043ULMbaybMTFhBOsfpSbAnFORUTp4LtLxmJaGacHUo2kDTf5TAlvx96DePTZl1BaVAC7zTrkfp8iY21XPgIqTw8TTQ360L/lOfhqt0P1OWEsGIucFd+FqXgCAKB/6/PwHtsCxd0FSdbDWDQO2Uu+CVPJxFG9f+eOV9C/6VlkzLkNjhXfPXd774Yn4T28AZLBjOzr74W9Zum5+7zHt8J7eAMKvvJP0f1kKaWNzbXgvR9dD5OeP1dSDUcGktS8GTWYO33KiIsJrToVS7L74x+MhuhZ9zACDfuRd8uPUXz/IzCPmYWOF/8eEXc3AMDgKIXjxj9D8f2PovDun0OfVYiOl/4Bis95yfcdbDsJ9/51MORXDbrdV7cT3mObUHDnT5Fzw33oXffwufenBr3o3/w7OG7686h/rpS6ZAn436/OZhFIUSwDSUqn0+H2mwYWE3Z29w77mApzEJOsvLpAJDUchO/EJ8heeh/M5VNhyClB9rV3w5BTDPe+9wAAtik3wFI1E4bsIhjzK5Gz7NvQQj6EOusv/r5DfnS/9d/IXfUDyGb7oPvCPc0wl0+DqXg8bFOuh2S0IuIcOPCq7+NnkDHrZugzC2LzSVNK+vZ1YzCzPFt0DIoRloEkVlVeghXXLURXTx9CofCwj1mY6UKWfvj7KA5UBdBUSDrDoJslvQnBliNDHq4pYbj3r4NkssFYMOai77r3g1/BUj0PlqqZQ+4z5o9BqL0OSsCDYHsdtEgQ+pwSBFqOINRxChlzbr2qT4vSS5XDgh/fNLppK0pOetEB6OqsumExjtaexvG6ekwaNwaSNPgscb2sYWl2P97szoMKnjMeb7LJClPJJDi3vQhDbjl0tmx4j21GsPU49DmfH/fqq9uF7jd/Di0chM6eg8K7fgqddeRFWt6jmxBqP4Xie38x7P2WsXNgq7kB7c/+CJLeiLw1P4JsMKH3/ceQu+ZHcO97F+69b0NnyYRj5V/AmF8Z9c+dUoME4H+/OovTAymOCwhTQF1DE/7nid9BkmWUFOYP+5iDHht2ubgCWIRwXxt63vslgs2HAUmGsagahpxSBNvrUPqdxwEAaigAxdsL1eeC+8D7CDQdRPE9/wOdLXvI+4u4utD27I9QeNdPz40etL/wExgLxg5aQHih/q0vQA16YZ+2Ah0v/wNK7n8U/rpdcO99G8Xf+mVMPndKft+/oRp/s4rHE6c6loEU8d5HW/H7tW+jsrwENotlyP2aBnzQ60BT0CwgHQEDT/hqyAe93YGuN/4TWsiPgjv+edjHnvn1d2CfdiOyFt055D7fye3oeu3fAOm8WT5NBSABkoSKv34Nkjz4t7hwTzM6X/3/UPyth+A5+AGCLUeR/4WfQA0F0PyLr6D8r16GbBp6VQqlt/mVWXjxTxdDljmqmOo4TZAiVly3ECdON2DH3kOYMmEsZHnwchBJApbk9OH1rnx4FH7bRZCNZshGM5SAB/76vci54b6RH6xp0JTh13qYK2eg+P5HBt3W8+4vYcgtQ+aCLw8pApqmoef9R5Gz7NuQjRZAU6GpZzes+uy1pl7x50WpKdeiw+PfnM8ikCb4rJAiDAY97rptFZpb29HY0ooxFWVDHmOWNSzL6cPbXD8QV/7TewAAekcpIn1t6Nv4NAyOMtinrYAaCsC5/SVYxy2Azu6A4nfBvfdtRNw9sE689tz76Hjx/8EyfhEy59wK2WSF8YJLCSWDCbI5Y8jtAOA58D50lkxYxy0AAJhKJ6N/6wsInjkO/+k9MORWDLkagdKbTtLw5L3z4LAZRUehOGEZSCHFBXn4ypob8fhzr6DP6UJOVuaQxxQYw5iX6cJOrh+IGzXoQ//mZxFxd0NnzoB14jXIXvJNSDo9oKkI97ag6/UNUPwu6CyZMBaNR9Hd/zloUV+4rx0m/+VvM614++Dc/jKKvvFf524zlUxE5vwvovOP/wLZmoW8NT+KyudJqePHy8didlWu6BgUR1wzkGJUVcVza9/Bux9twYSxVTAaDcM+7oPeHDQGhq4tIKL0dl2FBb//3jLRMSjOuM9AipFlGV9avRxTJ41HXUPTsLsTAsCS7H7uP0BEg5TYgF9/e4noGCQAy0AKstusuOdLa5DnyEbjmeEPMzLJGlY5emHhccdEhIEtzF/402thMXL2OB2xDKSoyrIS3HnLSoRCYfQ5h59rztArWOnohUHiSnKidKaDiofumoaqAq4lSlcsAyls8byZuPG6hTjT1oFgKDTsY/KMYSzL6YMELh0hSld/eW0xVkyvEh2DBGIZSGGSJOFLq5dj+uQJqKtvhqoOPwJQbg5icdalT8gjotSzcowJP7xlrugYJBjLQIqzWS2458u3oLQoH3X1Iy8onGTzYZbdHed0RCTS+AwFj9x/vegYlABYBtJAeUkR7rvrC8iw29DQ0jri4+ZkujHe4otjMiISpcAYwvPfuwEGw/CXH1N6YRlIEzUTqnHPl28BNKCto2vEx12X3Y9SUyCOyYgo3rLlIH7/7UUoyBm6MRmlJ5aBNLJg1jTcccuNcHm86Okbfo2ALAHLc/rg4B4ERCnJDj8e/9pUTKwoEh2FEgjLQBqRJAk3LVmEW1csQXtXN9we77CPM8oaVub2wKaLxDkhEcWSRfXjP1ZXYuG0CaKjUIJhGUgzsizji6uWY9k189DY0gp/IDjs42w6FascvTByDwKilGBS/fibRVlYs4RXDtBQLANpyGDQ4+4vrsH8WdNQ19CEcHj4EYAcQwQ3Onqh4x4EREnNqAbwnSkS7rllKSSJJ5bSUCwDacpqMeNbd9yGmgnVOHG6AYoy/AhAsSmEGx290HOEgCgpGdQg7pug4i++dgsMBm41TMNjGUhjjuws/MlXv4iqshLU1jeOuAdBmTnIKQOiJKRXQ/hGdQQ/vPtWmE0m0XEogbEMpLnSogLcf9cXkJuThdONLSMWgiJTCDfn9sDMg42IkoJeDeOrVQH89T23wmoxi45DCY5lgDBhbCW++ZXbYDTocaa9c8TH5RnDWJPbAysLAVFC02kRfLnci//7zdtgs1pEx6EkwDJAAIA50ybjq7evRiAYROtFNiXKMURwS143MnjZIVFCkrUIbi924/9963Zk2G2i41CSYBmgc25YNBdf/8LNCASCaG5tH/FxmXoFt+R1I5sbExElFJ0Wxi0FTvzDfbchK8MuOg4lEUkbaZKY0pKmadiyay+ee/UdKJqKqrKSES9FCigy3ut1oCdsjHNKIrqQSfXj1iIX/va+LyM3J0t0HEoyLAM0rB17D+LZV95EIBTC2IqyEQtBSJXwfq8DHSGuVCYSxR5x4tZiD370ra+gINchOg4lIZYBGtHew8fwzEtvoN/txoQxlSMWgogq4YO+HJwJcsUyUbw5wp1YXeTHD751F4ryc0XHoSTFMkAXdfhEHX7z4mvo6unDhLGVkOXhl5koGvBRXw4aA1y5TBQvpaEm3FCk4HvfvBOlRQWi41ASYxmgSzp5uhFP/WEtzrR3YsLYKuh0wxcCVQM+cWbhhI8rmIliSYKGKv9JXFtmwnfv/jLKS3gCIV0dlgEalYbmVvz6+T+ivvkMJlZXQa8feVvT414rtjuzoIB7oBNFmwEKqr2HsbymBN+643bk5+aIjkQpgGWARq2lrQO/fv6POHm6EROrx1x0n/OukAEb+nLgUbgXOlG0WBDCBO8hrFlYg7u/eDM3FKKoYRmgy9LR3YNfP/8qDp+ow4SxlTAZR76sMKDI+KgvB6280oDoqmXBi8mBY/jyikX4wsplPHSIooplgC5bT58TT/3hVew7dBxjq8ovuu+5qgG73Rk46MmIY0Ki1JKn9mKm1ICv3XoTli2ez2OIKepYBuiKON0ePPPS69ix7xCKC/KQm5N90cfX+83Y3J+NsMZNL4lGS4KG0lAz5mR4cO8dt2D21MmiI1GKYhmgKxYMhvDG+o/x7kdbodfrUVlWfNHfWPojOnzY60B/xBDHlETJySpHUOU+gtnlmbj/ri9gbGWZ6EiUwlgG6Kpomobtew7gxTfXobvXifFjKi46lxlWJWzuz0Y99yMgGlG5wY3CngOYN3U8vnXn7SjM42ZCFFssAxQV9U1n8LtX38Kx2tOoKi+F3Wa96OMPemz41JUJjZcfEp1jkFRMN7TD2FOH6xfOwd1fXMOTBykuWAYoapxuD/7w+rvYsmsfcrIzUZSfd9HHtwaN+KgvBwFVF6eERIkr3xDC5EgdNF8/1iy7Fl9avQJGI6fUKD5YBiiqIpEI1m38BG+s34hwJIKxFWUjbmEMAD5FxifOLG5jTGlLgoZpFifs3YeRlWHHl1Yvx/LF8y/6/4Yo2lgGKCb2HTmO59e+izPtHRg3puKi+xEAwCmfBdtcmQhylIDSiF0XwWx9C0JdjaiZOA5fu30VxlVViI5FaYhlgGLmTHsnnlv7NvYdPo6y4iJkZ118rwGOElA6GWv2odR9FHpNwYrrFuD2m5Zecq0NUaywDFBMeX1+vPruh/hg83bYbFaUFhVccsOUOp8F251ZCHJPAkpBBknFHEsntPYTqCwtxh233IS506dwIyESimWAYk5VVWzcvhuvvPMBXG4PxlaWXXLawKfI2O7M4iWIlFJKTQGMC51GxNOLRXNm4M5bb0JBrkN0LCKWAYqf2vomvPL2ehw6Xos8Rw4K8hyX/G2oOWDCJ84sHnhESc2ui2COtRfhtpPIzsrAF1ctw9JFcy96+idRPLEMUFz5AwG8v2kb1n38Cdxe36hGCSKqhL0eOw557NyXgJKKDA3T7B5URFrQ1dmFqZMGFglWV5aLjkY0CMsACXHydCNefns9Dh+vQ37u6EYJesN6bO3PRmf44uWBKBGUmgJYYO9Hb1sDZEnGTUsW4tYbb+Cxw5SQWAZIGH8ggHUbt2Hdxk/g8fowpqIUZtPFjzvWNOCkz4q97gx4eRkiJaAMXQTzM11wRLrRdKYNY8pLccctN2H21ElcJEgJi2WAhDt5uhGvvvshDh6vRVaGfVRXHCgacNRrwwGPnTsYUkIwSipmZrgx3tCP5jOtkCUJi+bOwJdWLUd+bo7oeEQXxTJACSEYDGHTzj14+8NN6OzuQ0VZMTJHsSd7WJVw2GvDIY8dIV6KSAJI0DDJ6sPsDBf6ujrQ53RhYnUVbrvpBsyq4WgAJQeWAUoorR1deP39j7Fj70HIsoyqsuJRrbgOqBIOeuw46rUhwlJAcVJuCmBBpgtyoB9NZ9qRn5uNVTcsxtJr5sNqMYuORzRqLAOUcFRVxe4DR/D6+x/jVFMLCnIdo1pgCAzsT7DfnYHjPitUXnlAMaGh3BTEDLsHOZIHjS1tkAAsmjMdt6y4HmXFhaIDEl02lgFKWE63B+s3bcPH23ejt68fhQV5yMvJHlUpcEd02OvOQJ3fwssRKSokaKi2+DHd7kGWHEJLewc8Xh8mVlfhluVLMHvqJB4uREmLZYASXmtHFz7augtbP92LfpcHJUX5yMnKHFUp6AvrscedgYaAGWApoCugl1RMtPowzeaFTRdBZ3cvunp6UVpUiNVLF+Pa+bMueRUMUaJjGaCk0djSig+37MSOfQfh9flRWlSArMyLH370me6QAQc8djQGzJw+oFExywqm2LyYYvPCLGvod7nR0tqBnKwMLFs8H8uvXQBHdpbomERRwTJASUXTNJxqbMb7m7Zjz8GjCASDqCgtHvVpbz5FxgmfFcd9Vni5xTENw66LYJrdg4kWP3SSCpfbg9aOLpiMRiyYNRU3L7sO5SVFomMSRRXLACUlTdNwrK4e72/chv1HT0CJRFBeVgybZXS7u6ka0Bww45jPipagCZxCIIc+jOl2D8Za/JCgoafPiY6ubtisFkyfPAHLFs9HzYRqXipIKYllgJKaqqo4eKwW72/6BIdPnIIkSagoLbqsOVxXRIdjXhtO+i0IcgOjtCJBQ6kpiBqbF+XmIFRVRWdPL7p6+pCTlYn5M2pw3YI5GFdVzhJAKY1lgFJCJBLB3sPH8f6mbThxqh56nR4lRQWXda13RAPq/RYc89p4/kGKyzeEUG3xo9rih0WnIqIoaO/sRp/ThXxHDq6bPwuL583iZYKUNlgGKKWEQmHsPngE67fswOnGFoTDERTkO5CbnXVZl331hPU45rWhzm/hJkYpIlMXQbXFj3FWH7L0CoCBfy9nOjrh8/lRUliAGxbNxcLZ07l9MKUdlgFKSZFIBMfq6rFj70HsPXwcff1O2O02FBfkXfLI5POFVAmn/RY0BsxoDZqgcG1BUjHLCsZa/Bhn8aPAGD53uz8QQEtbJyKRCCrLirHsmvmYP3MqMjPsAtMSicMyQCmvvasHew8dw9Zde9Hc2g4NQGF+LrIzMy5rHjisSmgJmtAUMKMpaOL6ggSll1RUmgOotvhRZgpCPvst1jQNLrcHbZ3dkCQJ48dUYPni+Zg9bTIsZm4dTOmNZYDSRjAYwqETddi2ez8On6iDy+1BdlYmCvNzYRjF+QfnUzWgI2REY8CMpoAZLl6mKJQOGopNQVRb/KgyB2CQP/+x5vX70dndC6/Xjwy7FROrx2DpNfMwfdJ4GAz8vhEBLAOUhjRNQ0tbBz49cATbdu9Ha2cX9Do9igrykGGzXtGq8b6w/lwx6AwbwEsVY0uChnxDGCWmIEpMQRQYQ9Cf9yUPhkLo6umD0+WGyWRERUkxFsyahmmTxqG8pIhXBhBdgGWA0prX58eBoyewddc+nDjdAK8vgKxMO3Jzsq/41DmfIqMpYEZjwIy2kJELEKNAggaHIYwSYwglpiCKjKFBv/0DQERR0NPbj56+fuhkGcWF+Zg3owbTJ09AdWXZqE6/JEpXLANEGNiv4HRTC/YeOoY9h46ho7sHwWAIVosZjpxsZNptV/TbpKoBvWEDOsIGdIaM6AgZ4eGUwqhk68/+5m8ModgUhEke+qNKVVX0OV3o7umDoqrIc2Rj9tQpmFkzEZPGVfHMAKJRYhkgukA4HEF98xmcON2AvYeOo6WtHW6PD0ajAbk5WcjOzIROd+W/7fsUGR0hIzpDRnSHDegJGxBK89EDvaQiRx9BriGM4rNP/ladOuxjNU2Dy+NFV3cvgqEQsjMzMGVCNeZOn4KaCdW8IoDoCrAMEF2Eqqpo7ejCiVMNOHisFrX1jeh3uSFJEnKyM+HIzoLRYLjqj+OK6NATNpwrBz1hA/wpeLWCBA0ZOgU5hjAc+ggchjAchjAydQpGGnjRNA1enx/9Ljdcbg80TUOGzYYxFaVYMGsaaiZWozAvN76fCFGKYRkgugzdvf2orW/EkZOncPhEHbp7+6GoCjLtdjhysmA1m6O2OC2gSvAqOngU3bCvvYoOWoIuVLTKCjL1EWTpI8jUKcg6++cMfWTQQr/haJoGXyAAp9MNp9sDVVVhtViQ58hGzYRqVFeWobKsBKVFBVwISBQlLANEV8jr86O2vgknTtVj35ET6OzuhT8YhAQJdpsVmRk2ZNis0Oli8xu+qgF+VYZnmKLgU3RQNAkKAFWToJ7/53OvL/1EqpNUmCQNJln9/EXSYDz/7/LnjzHKKqyyOmRx38VomoZAMASna+DJPxKJwGI2w5GdhSnjx2DcmApUlZWgpDCfiwCJYoRlgCgKwuEImlvb0dLegcaWNhw/VY/u3n54vD6oqgqzyQS7zQq7zQqrJXqjB1dL0TC4KJwtC/qzT+66GMRUVRU+fwBenx9OtwfhcAQmowE5WRmYWD0GE8ZWoqqsBGXFhTAar34KhogujWWAKAZUVUVXbx9a2jpxpr0TJ083oLm1HR6vD75AEBIAo8EAm80Ku9UCm9VyWWcnJANN0xAMheD1+eH1+eHzB6AqKiQJsFgGPudxlWWYOG4MqspKLvu0SSKKHpYBojgJBIPo6OpBe1cP2jq70dB8Bk1n2uD2+uD1+c89TpJlmIwGmIxGmIxGmE0Dr/V6XcKMKHxG0zSEIxH4A0H4AwH4A0EEAkFomgYNgMlogM1igSM7C2PKS1BSVID8XAcK8xzId+TAZOLpkESJgGWASKBwOIKO7h509vTC5fbC6fagr9+Fzp5edPf2wecPIBgKIRgKIRIZOGlP0wCDQf95YTAZodPpIEsSpPNeZPnsa0mCJMnn/j4cTdOgqirCEQWRSAQR5ezriILwub8P3KaqGmRZgnY2jF6vh8VsgtViRn6uA2VFBchz5CA3Jwu5OVkoyHUg4wr3aSCi+GAZIEpQn11S53R74PZ44XJ74Dz7uqu3D109fejrd8IfDEFVFKiadvZJ/exrTYOmqef+/tkLJGnYpYOyLEOv00Gv18OgH3it1+lgs1pgt1uRYbUiw26D3WaF2WSCxTzwkp2ZAUdOFnIyM7nXP1GSYhkgSmKKosDt9SEUDkNRVCiKAlVVB/6sKmdfD9x+/p/Vz25TVUiQYDZ//uT+2RO92WSC2WRMubUMRDQUywAREVGaY+UnIiJKcywDRDHw6KOPoqqqCmazGQsWLMCuXbtERyIiGhHLAFGUvfTSS3jggQfwT//0T9i7dy9mzJiBlStXorOzU3Q0IqJhcc0AUZQtWLAA8+bNwyOPPAJgYAOi8vJy/OAHP8BPfvITwemIiIbiyABRFIVCIezZswcrVqw4d5ssy1ixYgW2b98uMBkR0chYBoiiqLu7G4qioLCwcNDthYWFaG9vF5SKiOjiWAaIiIjSHMsAURTl5eVBp9Oho6Nj0O0dHR0oKioSlIqI6OJYBoiiyGg0Ys6cOdiwYcO521RVxYYNG7Bo0SKByYiIRsaNxImi7IEHHsC9996LuXPnYv78+XjwwQfh9Xpx3333iY5GRDQslgGiKLvrrrvQ1dWFf/zHf0R7eztmzpyJdevWDVlUSESUKLjPABERUZrjmgEiIqI0xzJARESU5lgGiIiI0hzLABERUZpjGSAiIkpzLANERERpjmWAiIgozbEMEBERpTmWASIiojTHMkBERJTmWAaIiIjS3P8P/oG915vUYyYAAAAASUVORK5CYII=", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "\n", "groups = df_full['class'].value_counts()\n", "sizes = [groups[0], groups[1]]\n", "labels = list(map(lambda e: str(e), groups.index))\n", "\n", "fig1, ax1 = plt.subplots()\n", "ax1.pie(sizes, labels=labels, autopct='%1.1f%%', shadow=True, startangle=90)\n", "ax1.axis('equal')\n", "\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Import RuleKit" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from rulekit.classification import RuleClassifier\n", "from rulekit.params import Measures" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Helper function for calculating metrics" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import sklearn.tree as scikit\n", "from sklearn.datasets import load_iris\n", "import math\n", "from sklearn.preprocessing import MultiLabelBinarizer\n", "from sklearn import metrics\n", "import pandas as pd\n", "import numpy as np\n", "\n", "\n", "x = df_full.drop(['class'], axis=1)\n", "y = df_full['class']\n", "\n", "def get_prediction_metrics(measure: str, y_pred, y_true, classification_metrics: dict) -> tuple[pd.DataFrame, np.ndarray]:\n", " confusion_matrix = metrics.confusion_matrix(y_true, y_pred)\n", " tn, fp, fn, tp = confusion_matrix.ravel()\n", " sensitivity = tp / (tp + fn)\n", " specificity = tn / (tn + fp)\n", " npv = tn / (tn + fn)\n", " ppv = tp / (tp + fp)\n", "\n", " dictionary = {\n", " 'Measure': measure,\n", " 'Accuracy': metrics.accuracy_score(y_true, y_pred),\n", " 'MAE': metrics.mean_absolute_error(y_true, y_pred),\n", " 'Kappa': metrics.cohen_kappa_score(y_true, y_pred),\n", " 'Balanced accuracy': metrics.balanced_accuracy_score(y_true, y_pred),\n", " 'Logistic loss': metrics.log_loss(y_true, y_pred),\n", " 'Precision': metrics.log_loss(y_true, y_pred),\n", " 'Sensitivity': sensitivity,\n", " 'Specificity': specificity,\n", " 'NPV': npv,\n", " 'PPV': ppv,\n", " 'psep': ppv + npv - 1,\n", " 'Fall-out': fp / (fp + tn),\n", " \"Youden's J statistic\": sensitivity + specificity - 1,\n", " 'Lift': (tp / (tp + fp)) / ((tp + fn) / (tp + tn + fp + fn)),\n", " 'F-measure': 2 * tp / (2 * tp + fp + fn),\n", " 'Fowlkes-Mallows index': metrics.fowlkes_mallows_score(y_true, y_pred),\n", " 'False positive': fp,\n", " 'False negative': fn,\n", " 'True positive': tp,\n", " 'True negative': tn,\n", " 'Rules per example': classification_metrics['rules_per_example'],\n", " 'Voting conflicts': classification_metrics['voting_conflicts'],\n", " 'Geometric mean': math.sqrt(specificity * sensitivity),\n", " 'Geometric mean': math.sqrt(specificity * sensitivity),\n", " }\n", " return pd.DataFrame.from_records([dictionary], index='Measure'), confusion_matrix\n", "\n", "def get_ruleset_stats(measure: str, model) -> pd.DataFrame:\n", " return pd.DataFrame.from_records([{'Measure': measure, **model.stats.__dict__}], index='Measure')\n", " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Rule induction on full dataset" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
time_total_stime_growing_stime_pruning_srules_countconditions_per_ruleinduced_conditions_per_ruleavg_rule_coverageavg_rule_precisionavg_rule_qualitypvalueFDR_pvalueFWER_pvaluefraction_significantfraction_FDR_significantfraction_FWER_significant
Measure
C22.0482911.7687580.1849681804.16666714.2833330.1400090.9175570.4794620.0592100.0649410.9234940.7666670.7500000.555556
Correlation1.9304911.5085170.403818594.94915354.1186440.3945140.6975420.1863130.0307650.0320320.1002310.8813560.8813560.864407
RSS2.2816671.8470200.401133624.01612946.2096770.5979850.8405030.3268380.0064310.0065570.0138790.9677420.9516130.919355
\n", "
" ], "text/plain": [ " time_total_s time_growing_s time_pruning_s rules_count \\\n", "Measure \n", "C2 2.048291 1.768758 0.184968 180 \n", "Correlation 1.930491 1.508517 0.403818 59 \n", "RSS 2.281667 1.847020 0.401133 62 \n", "\n", " conditions_per_rule induced_conditions_per_rule \\\n", "Measure \n", "C2 4.166667 14.283333 \n", "Correlation 4.949153 54.118644 \n", "RSS 4.016129 46.209677 \n", "\n", " avg_rule_coverage avg_rule_precision avg_rule_quality \\\n", "Measure \n", "C2 0.140009 0.917557 0.479462 \n", "Correlation 0.394514 0.697542 0.186313 \n", "RSS 0.597985 0.840503 0.326838 \n", "\n", " pvalue FDR_pvalue FWER_pvalue fraction_significant \\\n", "Measure \n", "C2 0.059210 0.064941 0.923494 0.766667 \n", "Correlation 0.030765 0.032032 0.100231 0.881356 \n", "RSS 0.006431 0.006557 0.013879 0.967742 \n", "\n", " fraction_FDR_significant fraction_FWER_significant \n", "Measure \n", "C2 0.750000 0.555556 \n", "Correlation 0.881356 0.864407 \n", "RSS 0.951613 0.919355 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AccuracyMAEKappaBalanced accuracyLogistic lossPrecisionSensitivitySpecificityNPVPPV...LiftF-measureFowlkes-Mallows indexFalse positiveFalse negativeTrue positiveTrue negativeRules per exampleVoting conflictsGeometric mean
Measure
C20.9690400.0309600.6885250.7783761.0693141.0693140.5588240.9979290.9698070.950000...14.4400000.7037040.96705157595240925.201625841.00.746770
Correlation0.9167960.0832040.3249610.6629252.8738092.8738090.3705880.9552610.9556570.368421...5.6000000.3695010.91295610810763230623.2763161850.00.594986
RSS0.9260840.0739160.2276260.5886082.5529992.5529990.2000000.9772160.9454910.382022...5.8067420.2625480.9248025513634235937.0750772082.00.442090
\n", "

3 rows × 23 columns

\n", "
" ], "text/plain": [ " Accuracy MAE Kappa Balanced accuracy Logistic loss \\\n", "Measure \n", "C2 0.969040 0.030960 0.688525 0.778376 1.069314 \n", "Correlation 0.916796 0.083204 0.324961 0.662925 2.873809 \n", "RSS 0.926084 0.073916 0.227626 0.588608 2.552999 \n", "\n", " Precision Sensitivity Specificity NPV PPV ... \\\n", "Measure ... \n", "C2 1.069314 0.558824 0.997929 0.969807 0.950000 ... \n", "Correlation 2.873809 0.370588 0.955261 0.955657 0.368421 ... \n", "RSS 2.552999 0.200000 0.977216 0.945491 0.382022 ... \n", "\n", " Lift F-measure Fowlkes-Mallows index False positive \\\n", "Measure \n", "C2 14.440000 0.703704 0.967051 5 \n", "Correlation 5.600000 0.369501 0.912956 108 \n", "RSS 5.806742 0.262548 0.924802 55 \n", "\n", " False negative True positive True negative Rules per example \\\n", "Measure \n", "C2 75 95 2409 25.201625 \n", "Correlation 107 63 2306 23.276316 \n", "RSS 136 34 2359 37.075077 \n", "\n", " Voting conflicts Geometric mean \n", "Measure \n", "C2 841.0 0.746770 \n", "Correlation 1850.0 0.594986 \n", "RSS 2082.0 0.442090 \n", "\n", "[3 rows x 23 columns]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Confusion matrix - C2\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
024095
17595
\n", "
" ], "text/plain": [ " 0 1\n", "0 2409 5\n", "1 75 95" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Confusion matrix - Correlation\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
02306108
110763
\n", "
" ], "text/plain": [ " 0 1\n", "0 2306 108\n", "1 107 63" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Confusion matrix - RSS\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
0235955
113634
\n", "
" ], "text/plain": [ " 0 1\n", "0 2359 55\n", "1 136 34" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from IPython.display import display\n", "\n", "# C2\n", "clf = RuleClassifier(\n", " induction_measure=Measures.C2,\n", " pruning_measure=Measures.C2,\n", " voting_measure=Measures.C2,\n", ")\n", "clf.fit(x, y)\n", "c2_ruleset = clf.model\n", "prediction, classification_metrics = clf.predict(x, return_metrics=True)\n", "\n", "prediction_metric, c2_confusion_matrix = get_prediction_metrics('C2', prediction, y, classification_metrics)\n", "model_stats = get_ruleset_stats('C2', clf.model)\n", "\n", "# Correlation\n", "clf = RuleClassifier(\n", " induction_measure=Measures.Correlation,\n", " pruning_measure=Measures.Correlation,\n", " voting_measure=Measures.Correlation,\n", ")\n", "clf.fit(x, y)\n", "corr_ruleset = clf.model\n", "prediction, classification_metrics = clf.predict(x, return_metrics=True)\n", "\n", "tmp, corr_confusion_matrix = get_prediction_metrics('Correlation', prediction, y, classification_metrics)\n", "prediction_metric = pd.concat([prediction_metric, tmp])\n", "model_stats = pd.concat([model_stats, get_ruleset_stats('Correlation', clf.model)])\n", "\n", "# RSS\n", "clf = RuleClassifier(\n", " induction_measure=Measures.RSS,\n", " pruning_measure=Measures.RSS,\n", " voting_measure=Measures.RSS,\n", ")\n", "clf.fit(x, y)\n", "rss_ruleset = clf.model\n", "prediction, classification_metrics = clf.predict(x, return_metrics=True)\n", "tmp, rss_confusion_matrix = get_prediction_metrics('RSS', prediction, y, classification_metrics)\n", "prediction_metric = pd.concat([prediction_metric, tmp])\n", "model_stats = pd.concat([model_stats, get_ruleset_stats('RSS', clf.model)])\n", "\n", "display(model_stats)\n", "display(prediction_metric)\n", "\n", "print('Confusion matrix - C2')\n", "display(pd.DataFrame(c2_confusion_matrix))\n", "\n", "print('Confusion matrix - Correlation')\n", "display(pd.DataFrame(corr_confusion_matrix))\n", "\n", "print('Confusion matrix - RSS')\n", "display(pd.DataFrame(rss_confusion_matrix))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### C2 Measure generated rules" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "IF gimpuls = (-inf, 32.50) THEN class = {0}\n", "IF gimpuls = (-inf, 54.50) AND senergy = (-inf, 3700) THEN class = {0}\n", "IF gimpuls = (-inf, 54.50) AND genergy = <1865, inf) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND goimpuls = (-inf, -0.50) AND genergy = (-inf, 13675) AND nbumps = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND genergy = (-inf, 17640) AND nbumps = (-inf, 0.50) THEN class = {0}\n", "IF genergy = <1635, 13675) AND goimpuls = (-inf, -0.50) AND nbumps = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND gimpuls = (-inf, 772.50) AND genergy = (-inf, 17640) AND senergy = (-inf, 650) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND maxenergy = (-inf, 950) AND gimpuls = (-inf, 772.50) AND genergy = (-inf, 17640) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND goimpuls = (-inf, -5.50) AND genergy = (-inf, 13675) AND senergy = (-inf, 2200) AND nbumps2 = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND gimpuls = (-inf, 772.50) AND genergy = (-inf, 17640) AND senergy = (-inf, 2200) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND maxenergy = (-inf, 3500) AND genergy = (-inf, 17640) AND nbumps2 = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND maxenergy = (-inf, 3500) AND gimpuls = (-inf, 772.50) AND genergy = (-inf, 17640) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND gimpuls = (-inf, 772.50) AND genergy = (-inf, 17640) AND nbumps3 = (-inf, 0.50) AND senergy = (-inf, 25000) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND gimpuls = (-inf, 772.50) AND genergy = (-inf, 17640) AND nbumps3 = (-inf, 0.50) THEN class = {0}\n", "IF gimpuls = (-inf, 772.50) AND genergy = <1865, 17640) AND senergy = (-inf, 4400) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF gimpuls = (-inf, 217) AND genergy = <1865, inf) AND goimpuls = (-inf, -5.50) AND nbumps4 = (-inf, 0.50) AND nbumps2 = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = (-inf, 68) AND genergy = <1865, 17640) AND senergy = (-inf, 25000) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF ghazard = {c} THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND gimpuls = (-inf, 536) AND genergy = (-inf, 18585) AND nbumps = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND genergy = (-inf, 18585) AND nbumps = (-inf, 0.50) THEN class = {0}\n", "IF maxenergy = (-inf, 950) AND gimpuls = (-inf, 536) AND genergy = (-inf, 18585) THEN class = {0}\n", "IF gimpuls = (-inf, 536) AND genergy = <1865, 18585) AND nbumps3 = (-inf, 1.50) AND senergy = (-inf, 27100) THEN class = {0}\n", "IF goenergy = <297.50, inf) THEN class = {0}\n", "IF senergy = <115450, inf) THEN class = {0}\n", "IF genergy = <1789250, inf) THEN class = {0}\n", "IF gimpuls = (-inf, 786) AND genergy = <1865, 18810) AND nbumps3 = (-inf, 1.50) AND senergy = (-inf, 27100) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND genergy = (-inf, 51290) AND goimpuls = (-inf, -0.50) AND shift = {N} AND nbumps = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND gimpuls = (-inf, 184.50) AND goimpuls = (-inf, 27.50) AND nbumps = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND genergy = (-inf, 51290) AND shift = {N} AND nbumps = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = <-73.50, inf) AND goimpuls = (-inf, -0.50) AND shift = {N} AND nbumps = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = <-73.50, inf) AND goimpuls = (-inf, 96.50) AND shift = {N} AND nbumps = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = <-55.50, inf) AND goimpuls = (-inf, 96.50) AND shift = {N} AND senergy = (-inf, 2150) THEN class = {0}\n", "IF goimpuls = <-70.50, 96.50) AND genergy = <4640, inf) AND shift = {N} AND nbumps2 = (-inf, 0.50) THEN class = {0}\n", "IF gimpuls = <135, inf) AND goimpuls = (-inf, 230.50) AND genergy = <9110, inf) AND shift = {N} AND senergy = (-inf, 2150) THEN class = {0}\n", "IF genergy = <9110, inf) AND shift = {N} AND senergy = <2400, 9500) AND nbumps3 = (-inf, 1.50) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND gimpuls = (-inf, 395) AND genergy = (-inf, 19310) AND goimpuls = (-inf, -0.50) AND nbumps = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND gimpuls = (-inf, 786) AND genergy = (-inf, 19310) AND senergy = (-inf, 650) THEN class = {0}\n", "IF goenergy = <-54.50, inf) AND genergy = <10915, 19310) AND goimpuls = <-50.50, 230.50) AND nbumps2 = (-inf, 1.50) AND nbumps = <0.50, inf) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND gimpuls = (-inf, 786) AND genergy = (-inf, 19510) AND senergy = (-inf, 650) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND gimpuls = (-inf, 392.50) AND genergy = (-inf, 20525) AND goimpuls = (-inf, -0.50) AND nbumps = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = <-84.50, 118) AND genergy = (-inf, 20525) AND senergy = (-inf, 550) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND gimpuls = (-inf, 319.50) AND goimpuls = (-inf, -0.50) AND seismoacoustic = {a} AND nbumps = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND gimpuls = (-inf, 319.50) AND goimpuls = (-inf, -0.50) AND nbumps = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND gimpuls = (-inf, 362.50) AND goimpuls = (-inf, -0.50) AND nbumps = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND gimpuls = (-inf, 319.50) AND goimpuls = (-inf, -0.50) AND senergy = (-inf, 550) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND gimpuls = (-inf, 362.50) AND goimpuls = (-inf, -0.50) AND senergy = (-inf, 550) THEN class = {0}\n", "IF goenergy = <-84.50, 118) AND gimpuls = (-inf, 362.50) AND goimpuls = (-inf, 96.50) AND nbumps = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = <-84.50, 118) AND gimpuls = (-inf, 362.50) AND goimpuls = (-inf, 96.50) AND senergy = (-inf, 550) THEN class = {0}\n", "IF goenergy = <-84.50, 118) AND gimpuls = (-inf, 380.50) AND goimpuls = (-inf, 96.50) AND nbumps = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = <-84.50, 120.50) AND gimpuls = (-inf, 395.50) AND maxenergy = (-inf, 350) AND goimpuls = (-inf, 96.50) AND senergy = (-inf, 550) THEN class = {0}\n", "IF goenergy = <-84.50, 120.50) AND gimpuls = (-inf, 449.50) AND maxenergy = (-inf, 350) AND genergy = (-inf, 32875) THEN class = {0}\n", "IF goenergy = <-84.50, 120.50) AND gimpuls = (-inf, 449.50) AND maxenergy = (-inf, 350) AND goimpuls = (-inf, 96.50) AND senergy = (-inf, 550) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND gimpuls = (-inf, 449.50) AND goimpuls = (-inf, 96.50) AND senergy = (-inf, 550) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND gimpuls = (-inf, 537.50) AND genergy = (-inf, 25125) AND goimpuls = (-inf, 27.50) AND nbumps = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = <-84.50, 114.50) AND gimpuls = (-inf, 537.50) AND maxenergy = (-inf, 350) AND genergy = (-inf, 31790) THEN class = {0}\n", "IF goenergy = <-84.50, 114.50) AND genergy = (-inf, 31790) AND senergy = (-inf, 550) THEN class = {0}\n", "IF goenergy = <116.50, inf) AND gimpuls = (-inf, 788.50) AND genergy = <20930, 31790) THEN class = {0}\n", "IF goenergy = <-84.50, 114.50) AND genergy = (-inf, 32770) AND senergy = (-inf, 550) THEN class = {0}\n", "IF goenergy = <-84.50, 87.50) AND gimpuls = (-inf, 1342.50) AND goimpuls = (-inf, 96) AND senergy = (-inf, 550) THEN class = {0}\n", "IF goenergy = <-84.50, 87.50) AND gimpuls = (-inf, 1732) AND goimpuls = (-inf, 96) AND senergy = (-inf, 550) THEN class = {0}\n", "IF goenergy = <-84.50, 87.50) AND gimpuls = (-inf, 2168) AND goimpuls = (-inf, 96) AND senergy = (-inf, 550) THEN class = {0}\n", "IF goenergy = <-84.50, 87.50) AND genergy = (-inf, 1674705) AND goimpuls = (-inf, 96) AND senergy = (-inf, 550) THEN class = {0}\n", "IF ghazard = {a} AND goenergy = <57, inf) AND gimpuls = (-inf, 514.50) AND goimpuls = <-1.50, 96.50) AND senergy = (-inf, 550) THEN class = {0}\n", "IF goenergy = (-inf, 104.50) AND gimpuls = <523, 1342.50) AND goimpuls = <17.50, inf) AND genergy = <46870, inf) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF goenergy = <29.50, 104.50) AND gimpuls = <522, 2168) AND senergy = (-inf, 250) THEN class = {0}\n", "IF goenergy = <-19, inf) AND goimpuls = <4.50, 312) AND genergy = <4455, 34260) AND nbumps = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = <8.50, inf) AND gimpuls = <523, 1342.50) AND goimpuls = (-inf, 96.50) AND senergy = (-inf, 250) THEN class = {0}\n", "IF genergy = <36470, 42165) AND goimpuls = <5.50, inf) AND senergy = (-inf, 550) THEN class = {0}\n", "IF goenergy = <119.50, inf) AND gimpuls = <516, 1210) AND goimpuls = (-inf, 118.50) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF gimpuls = <144.50, 1210) AND genergy = <42430, inf) AND goimpuls = <59.50, inf) AND senergy = (-inf, 250) THEN class = {0}\n", "IF gimpuls = <813.50, 1427.50) AND goimpuls = <104.50, inf) AND senergy = (-inf, 350) THEN class = {0}\n", "IF gimpuls = (-inf, 319) AND genergy = <1865, 19670) AND goimpuls = (-inf, -6.50) AND senergy = (-inf, 9600) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND gimpuls = (-inf, 362.50) AND goimpuls = (-inf, -0.50) AND senergy = (-inf, 650) THEN class = {0}\n", "IF goenergy = <-44.50, inf) AND gimpuls = <324.50, inf) AND genergy = (-inf, 32770) AND goimpuls = (-inf, 105.50) AND nbumps = <0.50, 1.50) THEN class = {0}\n", "IF goenergy = <-73.50, 14.50) AND gimpuls = (-inf, 1342.50) AND genergy = <36280, inf) AND senergy = (-inf, 650) THEN class = {0}\n", "IF goimpuls = <-6.50, inf) AND genergy = <49585, inf) AND senergy = (-inf, 650) AND nbumps = <0.50, inf) THEN class = {0}\n", "IF goenergy = <-54.50, inf) AND genergy = (-inf, 64725) AND senergy = <650, 750) THEN class = {0}\n", "IF goenergy = <-33.50, inf) AND maxenergy = (-inf, 950) AND gimpuls = (-inf, 537.50) AND genergy = (-inf, 25125) AND goimpuls = <-41.50, -0.50) THEN class = {0}\n", "IF goenergy = <-84.50, 114.50) AND gimpuls = (-inf, 587.50) AND genergy = (-inf, 27275) AND nbumps3 = (-inf, 0.50) AND senergy = (-inf, 25250) THEN class = {0}\n", "IF goenergy = (-inf, 114.50) AND genergy = <1865, 28515) AND senergy = (-inf, 7500) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF goenergy = (-inf, -20.50) AND gimpuls = (-inf, 537.50) AND genergy = <20610, 28515) AND nbumps2 = (-inf, 0.50) THEN class = {0}\n", "IF genergy = (-inf, 28515) AND nbumps = <5.50, inf) THEN class = {0}\n", "IF maxenergy = (-inf, 3500) AND genergy = <20270, 28515) AND goimpuls = (-inf, -8.50) AND nbumps2 = <0.50, 1.50) THEN class = {0}\n", "IF genergy = <3260, 28515) AND senergy = <8500, inf) AND nbumps = (-inf, 2.50) THEN class = {0}\n", "IF goenergy = <-36.50, inf) AND genergy = (-inf, 28515) AND senergy = <5050, inf) THEN class = {0}\n", "IF ghazard = {a} AND goenergy = <-53.50, 40.50) AND genergy = <20560, 29105) AND nbumps2 = <0.50, inf) THEN class = {0}\n", "IF goenergy = (-inf, 14.50) AND maxenergy = (-inf, 550) AND gimpuls = (-inf, 1252.50) AND nbumps = (-inf, 2.50) THEN class = {0}\n", "IF goenergy = <-40.50, 28.50) AND gimpuls = (-inf, 2168) AND genergy = <40210, inf) AND senergy = (-inf, 850) AND seismic = {a} THEN class = {0}\n", "IF goenergy = (-inf, 104.50) AND gimpuls = (-inf, 362.50) AND genergy = <1865, inf) AND goimpuls = (-inf, 66.50) AND senergy = (-inf, 7500) AND nbumps2 = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = (-inf, 88.50) AND gimpuls = (-inf, 1210) AND goimpuls = (-inf, 96) AND genergy = <1865, inf) AND senergy = (-inf, 7500) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF goenergy = <17.50, inf) AND gimpuls = (-inf, 1210) AND goimpuls = (-inf, 66.50) AND nbumps2 = (-inf, 0.50) AND nbumps = <0.50, inf) THEN class = {0}\n", "IF gimpuls = (-inf, 1210) AND genergy = <7815, inf) AND senergy = <1500, 7500) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF goenergy = (-inf, 88.50) AND gimpuls = (-inf, 1252.50) AND goimpuls = (-inf, 96) AND genergy = <1865, inf) AND senergy = (-inf, 7500) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1342.50) AND goimpuls = <-54.50, inf) AND genergy = <7870, inf) AND senergy = <1500, inf) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF goenergy = <-40.50, 31.50) AND gimpuls = (-inf, 1485) AND genergy = <44960, inf) AND senergy = (-inf, 5500) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF gimpuls = <1441.50, inf) AND genergy = (-inf, 117575) THEN class = {0}\n", "IF goenergy = (-inf, 87.50) AND gimpuls = (-inf, 1752) AND goimpuls = (-inf, 96) AND nbumps3 = (-inf, 0.50) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF goimpuls = <-40.50, inf) AND genergy = <422215, inf) AND seismoacoustic = {a} AND senergy = <2500, inf) AND nbumps2 = (-inf, 0.50) THEN class = {0}\n", "IF genergy = <29945, 31245) THEN class = {0}\n", "IF goenergy = <-33.50, inf) AND genergy = <29155, 31615) AND goimpuls = (-inf, 105.50) AND nbumps3 = (-inf, 1.50) THEN class = {0}\n", "IF genergy = <31805, 32680) THEN class = {0}\n", "IF goenergy = (-inf, 158.50) AND maxenergy = (-inf, 650) AND gimpuls = (-inf, 1210) AND goimpuls = (-inf, 96.50) THEN class = {0}\n", "IF genergy = <32925, 34315) THEN class = {0}\n", "IF maxenergy = (-inf, 750) AND genergy = <35480, 45240) AND nbumps = <0.50, inf) THEN class = {0}\n", "IF ghazard = {a} AND goenergy = <-27.50, inf) AND maxenergy = (-inf, 750) AND gimpuls = (-inf, 2056) AND genergy = (-inf, 715465) AND senergy = <850, inf) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND gimpuls = (-inf, 305.50) AND goimpuls = (-inf, 17.50) AND senergy = (-inf, 2300) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND gimpuls = (-inf, 305.50) AND maxenergy = (-inf, 3500) AND goimpuls = (-inf, -5.50) THEN class = {0}\n", "IF gimpuls = (-inf, 305.50) AND genergy = <29195, inf) AND goimpuls = (-inf, 96) AND senergy = (-inf, 9850) THEN class = {0}\n", "IF senergy = <71000, inf) AND nbumps2 = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = (-inf, 88.50) AND gimpuls = (-inf, 1141.50) AND maxenergy = (-inf, 7500) AND genergy = <1865, inf) AND goimpuls = (-inf, 96) AND nbumps3 = (-inf, 2.50) AND nbumps2 = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = <-72.50, inf) AND gimpuls = (-inf, 1372) AND genergy = <55365, inf) AND senergy = <1500, inf) AND nbumps2 = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = (-inf, 87.50) AND genergy = (-inf, 1733075) AND nbumps3 = (-inf, 1.50) AND nbumps2 = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = <-32.50, inf) AND gimpuls = (-inf, 2681) AND genergy = <173815, 1026530) AND nbumps3 = (-inf, 2.50) AND nbumps2 = (-inf, 0.50) THEN class = {0}\n", "IF ghazard = {a} AND goenergy = <0.50, 87.50) AND maxenergy = <550, 850) THEN class = {0}\n", "IF ghazard = {a} AND goenergy = <-29.50, inf) AND gimpuls = <259.50, inf) AND maxenergy = <550, inf) AND genergy = (-inf, 39305) AND goimpuls = <-39.50, inf) AND senergy = (-inf, 4400) AND nbumps3 = (-inf, 2.50) THEN class = {0}\n", "IF goenergy = <-18.50, 105.50) AND genergy = <9110, 39695) AND goimpuls = <-41.50, inf) AND nbumps3 = (-inf, 2.50) THEN class = {0}\n", "IF gimpuls = (-inf, 361.50) AND maxenergy = (-inf, 3500) AND senergy = <3250, inf) THEN class = {0}\n", "IF goenergy = <-37.50, inf) AND gimpuls = (-inf, 361.50) AND maxenergy = (-inf, 35000) AND genergy = <38315, inf) AND senergy = <950, inf) THEN class = {0}\n", "IF goenergy = <-18.50, inf) AND gimpuls = <334.50, 804.50) AND maxenergy = <550, inf) AND genergy = (-inf, 44750) AND senergy = (-inf, 25150) AND nbumps2 = <0.50, 3.50) THEN class = {0}\n", "IF senergy = <1250, 1550) AND nbumps2 = (-inf, 1.50) THEN class = {0}\n", "IF genergy = <44780, 45255) THEN class = {0}\n", "IF goenergy = (-inf, 158.50) AND senergy = <1150, 1650) AND nbumps2 = (-inf, 2.50) THEN class = {0}\n", "IF genergy = <46690, 48545) THEN class = {0}\n", "IF goenergy = (-inf, 68) AND gimpuls = (-inf, 769.50) AND genergy = <43280, 49095) AND nbumps3 = (-inf, 3.50) THEN class = {0}\n", "IF goenergy = (-inf, 95.50) AND gimpuls = (-inf, 514.50) AND goimpuls = <-7, 96.50) AND genergy = <40245, inf) AND nbumps3 = (-inf, 2.50) THEN class = {0}\n", "IF goenergy = <-73.50, inf) AND gimpuls = (-inf, 514.50) AND maxenergy = <550, 8500) AND genergy = <49265, 108000) THEN class = {0}\n", "IF goenergy = <-53.50, inf) AND gimpuls = (-inf, 1836) AND nbumps3 = (-inf, 0.50) AND nbumps4 = <0.50, inf) AND nbumps2 = (-inf, 1.50) THEN class = {0}\n", "IF maxenergy = (-inf, 1500) AND gimpuls = <673.50, 1210) AND senergy = <1700, inf) THEN class = {0}\n", "IF goenergy = <-84.50, inf) AND gimpuls = (-inf, 1245.50) AND genergy = <49585, 58435) AND goimpuls = (-inf, 96.50) AND nbumps3 = (-inf, 1.50) AND nbumps2 = (-inf, 2.50) THEN class = {0}\n", "IF ghazard = {a} AND goenergy = (-inf, 68.50) AND gimpuls = <526, 606) AND genergy = <17700, inf) AND senergy = (-inf, 9550) AND nbumps2 = (-inf, 1.50) THEN class = {0}\n", "IF goenergy = <-43.50, 87.50) AND senergy = (-inf, 3150) AND nbumps3 = (-inf, 1.50) AND nbumps2 = (-inf, 1.50) AND nbumps = <1.50, inf) THEN class = {0}\n", "IF goenergy = (-inf, 120.50) AND gimpuls = (-inf, 1029.50) AND genergy = <58515, 61125) AND nbumps2 = (-inf, 2.50) THEN class = {0}\n", "IF gimpuls = <393.50, 725.50) AND genergy = <81935, inf) AND nbumps3 = (-inf, 2.50) THEN class = {0}\n", "IF goenergy = <11.50, 68.50) AND maxenergy = (-inf, 2500) AND gimpuls = <556, inf) AND genergy = (-inf, 1482055) AND nbumps2 = <0.50, inf) THEN class = {0}\n", "IF goenergy = (-inf, 28.50) AND gimpuls = <354, 791.50) AND genergy = <81505, 366505) AND nbumps3 = (-inf, 2.50) THEN class = {0}\n", "IF goenergy = <-31.50, 104.50) AND gimpuls = <325, 2068.50) AND goimpuls = (-inf, 50.50) AND senergy = (-inf, 5750) AND nbumps3 = (-inf, 1.50) AND nbumps2 = <0.50, 1.50) THEN class = {0}\n", "IF goenergy = <-9.50, inf) AND gimpuls = <938.50, 2902.50) AND maxenergy = (-inf, 3500) AND genergy = <80845, 508210) AND nbumps = <0.50, inf) THEN class = {0}\n", "IF senergy = <5050, 5750) THEN class = {0}\n", "IF gimpuls = <887.50, 977) AND goimpuls = (-inf, -6.50) AND senergy = (-inf, 85450) THEN class = {0}\n", "IF ghazard = {a} AND goenergy = <-38.50, inf) AND gimpuls = <813.50, 1151) AND maxenergy = <3500, inf) AND goimpuls = (-inf, 89.50) AND nbumps2 = (-inf, 2.50) THEN class = {0}\n", "IF goenergy = <-27.50, inf) AND genergy = <123990, 544010) AND senergy = (-inf, 17850) AND nbumps = <3.50, inf) THEN class = {0}\n", "IF goenergy = (-inf, 68.50) AND maxenergy = <7500, inf) AND genergy = (-inf, 189505) AND goimpuls = <32.50, inf) THEN class = {0}\n", "IF goenergy = <-29.50, inf) AND gimpuls = (-inf, 2078.50) AND goimpuls = (-inf, -5.50) AND genergy = <138665, inf) AND senergy = <3250, inf) AND nbumps2 = (-inf, 1.50) AND nbumps = <1.50, inf) THEN class = {0}\n", "IF goenergy = <-15.50, 53.50) AND gimpuls = (-inf, 2917) AND goimpuls = <-7.50, inf) AND nbumps3 = (-inf, 1.50) AND senergy = <7500, inf) THEN class = {0}\n", "IF goenergy = <-88.50, 87.50) AND genergy = (-inf, 1713980) AND goimpuls = (-inf, 89.50) AND senergy = (-inf, 18500) AND nbumps3 = (-inf, 4.50) AND nbumps2 = (-inf, 3.50) THEN class = {0}\n", "IF goenergy = <-29.50, -2.50) AND genergy = (-inf, 450275) AND senergy = <27300, inf) AND nbumps = (-inf, 5.50) THEN class = {0}\n", "IF goenergy = <22.50, inf) AND gimpuls = <364, inf) AND genergy = (-inf, 144410) AND nbumps3 = <3.50, inf) THEN class = {1}\n", "IF gimpuls = <364, inf) AND goimpuls = (-inf, 21.50) AND nbumps3 = <3.50, inf) AND senergy = <10150, inf) THEN class = {1}\n", "IF goenergy = <-15, inf) AND goimpuls = (-inf, 44.50) AND senergy = <13850, inf) AND nbumps3 = (-inf, 3.50) AND nbumps = <5.50, inf) THEN class = {1}\n", "IF gimpuls = <2208.50, 2361.50) AND genergy = <493095, inf) AND nbumps2 = <0.50, inf) THEN class = {1}\n", "IF gimpuls = <3011, inf) AND genergy = (-inf, 1005720) AND nbumps2 = <0.50, inf) THEN class = {1}\n", "IF gimpuls = <1328, 1361.50) AND nbumps2 = <0.50, inf) THEN class = {1}\n", "IF goenergy = (-inf, -29.50) AND gimpuls = <1328, inf) AND goimpuls = <-29, -14.50) THEN class = {1}\n", "IF ghazard = {a} AND goenergy = <-10.50, inf) AND gimpuls = <1328, 1443.50) AND goimpuls = <-1, inf) AND nbumps2 = (-inf, 1.50) THEN class = {1}\n", "IF gimpuls = <1328, 2109) AND maxenergy = (-inf, 7500) AND goimpuls = (-inf, -5.50) AND genergy = (-inf, 642325) AND senergy = <850, 9350) AND seismoacoustic = {a} AND nbumps = (-inf, 3.50) THEN class = {1}\n", "IF gimpuls = <1394.50, 2004) AND goimpuls = <-25, 13) AND genergy = <393900, inf) AND senergy = (-inf, 38250) AND nbumps2 = <0.50, inf) AND nbumps = <1.50, 3.50) THEN class = {1}\n", "IF gimpuls = <1747.50, 3018) AND goimpuls = <-25, 20.50) AND nbumps3 = (-inf, 1.50) AND senergy = (-inf, 32750) THEN class = {1}\n", "IF goenergy = <-16.50, inf) AND gimpuls = <1831, 2945.50) AND genergy = <254130, 1133675) AND seismic = {b} AND senergy = <1600, 32750) THEN class = {1}\n", "IF maxenergy = (-inf, 25000) AND gimpuls = <364, inf) AND goimpuls = <1.50, inf) AND nbumps3 = <1.50, 4.50) AND senergy = <4300, inf) AND nbumps = <4.50, 6.50) THEN class = {1}\n", "IF gimpuls = <740.50, 887.50) AND goimpuls = (-inf, 9) AND nbumps = <2.50, inf) THEN class = {1}\n", "IF gimpuls = <764.50, 1288.50) AND genergy = <61240, 213225) AND goimpuls = <-22.50, 58.50) AND senergy = (-inf, 27350) AND nbumps3 = (-inf, 1.50) AND nbumps = <2.50, inf) THEN class = {1}\n", "IF gimpuls = <379, 484) AND goimpuls = (-inf, 12.50) AND senergy = (-inf, 10350) AND nbumps = <2.50, inf) THEN class = {1}\n", "IF goenergy = (-inf, -4.50) AND maxenergy = <3500, inf) AND goimpuls = <-50, inf) AND genergy = (-inf, 52070) AND senergy = <5750, 15200) AND nbumps = <2.50, 5.50) AND nbumps2 = (-inf, 2.50) THEN class = {1}\n", "IF goenergy = (-inf, 123.50) AND goimpuls = <-70.50, 32.50) AND seismoacoustic = {a} AND senergy = (-inf, 27350) AND nbumps = <2.50, 4.50) THEN class = {1}\n", "IF goenergy = <-30.50, inf) AND gimpuls = <1139.50, 1270.50) AND goimpuls = (-inf, 105) AND genergy = <54930, 220205) AND senergy = (-inf, 38250) AND nbumps3 = (-inf, 1.50) THEN class = {1}\n", "IF goenergy = <-51, inf) AND gimpuls = <754.50, 1048) AND goimpuls = (-inf, 62.50) AND genergy = (-inf, 99210) AND senergy = (-inf, 201650) AND nbumps = <1.50, 2.50) AND nbumps2 = (-inf, 1.50) THEN class = {1}\n", "IF goenergy = (-inf, 144) AND gimpuls = <361.50, 728.50) AND maxenergy = <450, inf) AND genergy = <32455, inf) AND goimpuls = <-12.50, 8.50) AND senergy = (-inf, 7600) AND nbumps2 = <0.50, inf) AND nbumps = (-inf, 2.50) THEN class = {1}\n", "IF ghazard = {a} AND gimpuls = <160, 256) AND maxenergy = (-inf, 4500) AND genergy = (-inf, 21865) AND nbumps = <1.50, inf) THEN class = {1}\n", "IF goenergy = (-inf, 106.50) AND gimpuls = <110, 649.50) AND genergy = (-inf, 46930) AND senergy = (-inf, 40500) AND nbumps = <1.50, 2.50) THEN class = {1}\n", "IF gimpuls = <110, inf) AND senergy = <550, inf) AND nbumps2 = <0.50, inf) THEN class = {1}\n", "IF goenergy = <-78.50, inf) AND gimpuls = <32.50, 237.50) AND maxenergy = <3500, inf) AND goimpuls = <-74.50, 68.50) AND nbumps3 = (-inf, 2.50) AND nbumps2 = (-inf, 2.50) AND nbumps = (-inf, 4.50) THEN class = {1}\n", "IF gimpuls = <767.50, 813.50) AND genergy = (-inf, 75455) AND goimpuls = <1, inf) AND senergy = (-inf, 1300) AND nbumps = (-inf, 1.50) THEN class = {1}\n", "IF ghazard = {a} AND goenergy = (-inf, 106.50) AND gimpuls = <131, 735) AND maxenergy = (-inf, 350) AND genergy = <48545, 66335) AND goimpuls = <-72, inf) THEN class = {1}\n", "IF ghazard = {a} AND goenergy = <5.50, inf) AND gimpuls = <396, 732.50) AND genergy = <40050, 50765) AND goimpuls = (-inf, 79.50) AND senergy = (-inf, 350) THEN class = {1}\n", "IF goenergy = <-37.50, 152.50) AND gimpuls = <571, 651) AND genergy = <20840, 36590) AND nbumps = (-inf, 0.50) THEN class = {1}\n", "IF ghazard = {a} AND goenergy = <-22, 33.50) AND gimpuls = <361.50, 525.50) AND genergy = <25145, 42200) AND goimpuls = <-27.50, 8.50) AND nbumps = (-inf, 0.50) THEN class = {1}\n", "IF goenergy = <-45.50, inf) AND gimpuls = <380.50, 542.50) AND genergy = <17635, 21260) AND shift = {W} AND nbumps = (-inf, 0.50) THEN class = {1}\n", "IF gimpuls = <240, 324.50) AND genergy = <18585, 25665) AND goimpuls = <-49.50, 37.50) AND shift = {W} AND senergy = (-inf, 3350) AND nbumps = (-inf, 2.50) THEN class = {1}\n", "IF ghazard = {a} AND goenergy = <-59.50, -10.50) AND gimpuls = <88, 269.50) AND maxenergy = (-inf, 4500) AND goimpuls = <-42.50, 4.50) AND genergy = <4565, 21365) THEN class = {1}\n", "IF gimpuls = <54.50, 60.50) AND genergy = <1510, 4735) AND goimpuls = (-inf, 33) THEN class = {1}\n" ] } ], "source": [ "for rule in c2_ruleset.rules:\n", " print(rule)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Correlation Measure generated rules" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "IF gimpuls = (-inf, 1252.50) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1342.50) AND goimpuls = (-inf, 96.50) AND senergy = (-inf, 550) THEN class = {0}\n", "IF gimpuls = (-inf, 1342.50) AND goimpuls = (-inf, 312) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1410) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1653.50) AND genergy = (-inf, 1006585) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1684) AND goimpuls = (-inf, 312) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1798) AND goimpuls = (-inf, 312) AND genergy = (-inf, 1006585) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF gimpuls = (-inf, 2733) AND nbumps2 = (-inf, 0.50) THEN class = {0}\n", "IF gimpuls = (-inf, 3146) AND genergy = (-inf, 1733075) AND goimpuls = (-inf, 312) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF goimpuls = (-inf, 312) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF senergy = (-inf, 2350) AND nbumps2 = (-inf, 1.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1331) AND nbumps = (-inf, 2.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1655.50) AND nbumps = (-inf, 2.50) AND nbumps2 = (-inf, 1.50) THEN class = {0}\n", "IF ghazard = {a} AND gimpuls = <334.50, 2892) AND genergy = (-inf, 318735) AND goimpuls = <31.50, inf) AND senergy = <350, inf) AND nbumps = (-inf, 2.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1832.50) AND nbumps = (-inf, 2.50) THEN class = {0}\n", "IF gimpuls = (-inf, 3146) AND genergy = (-inf, 1713980) AND goimpuls = (-inf, 312) AND nbumps = (-inf, 2.50) AND nbumps2 = (-inf, 1.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1253.50) AND nbumps3 = (-inf, 1.50) AND nbumps5 = (-inf, 0.50) AND nbumps2 = (-inf, 1.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1253.50) AND nbumps3 = (-inf, 1.50) AND nbumps2 = (-inf, 2.50) THEN class = {0}\n", "IF goenergy = (-inf, 104.50) AND genergy = (-inf, 32675) AND senergy = (-inf, 2350) THEN class = {0}\n", "IF gimpuls = (-inf, 1253.50) AND nbumps3 = (-inf, 1.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1150.50) AND nbumps3 = (-inf, 2.50) AND nbumps2 = (-inf, 1.50) THEN class = {0}\n", "IF maxenergy = (-inf, 4500) AND gimpuls = (-inf, 769.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1253.50) AND nbumps3 = (-inf, 3.50) AND nbumps2 = (-inf, 1.50) THEN class = {0}\n", "IF goenergy = (-inf, 123.50) AND gimpuls = (-inf, 1028.50) AND maxenergy = <1500, inf) AND genergy = <31805, 373295) AND goimpuls = <-54.50, inf) AND senergy = (-inf, 14350) AND seismic = {a} AND nbumps2 = (-inf, 2.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1253.50) AND nbumps3 = (-inf, 2.50) AND nbumps2 = (-inf, 2.50) THEN class = {0}\n", "IF genergy = (-inf, 378500) AND nbumps3 = (-inf, 2.50) AND nbumps = (-inf, 5.50) THEN class = {0}\n", "IF goenergy = (-inf, 68.50) AND gimpuls = (-inf, 901) AND genergy = <21765, inf) AND nbumps3 = (-inf, 3.50) AND nbumps2 = <1.50, 3.50) AND nbumps = <3.50, inf) THEN class = {0}\n", "IF gimpuls = (-inf, 1150.50) AND senergy = (-inf, 20650) THEN class = {0}\n", "IF gimpuls = (-inf, 1378) AND maxenergy = (-inf, 75000) AND goimpuls = (-inf, 312) AND nbumps4 = (-inf, 2.50) AND nbumps = (-inf, 8.50) THEN class = {0}\n", "IF goenergy = <-4.50, inf) AND gimpuls = (-inf, 2185.50) AND genergy = <135285, 1505475) AND senergy = (-inf, 5750) AND nbumps2 = <0.50, inf) THEN class = {0}\n", "IF goenergy = <-0.50, 104.50) AND maxenergy = (-inf, 5500) AND goimpuls = <20.50, inf) AND genergy = <101710, inf) AND nbumps = <1.50, inf) THEN class = {0}\n", "IF goenergy = <-29.50, inf) AND goimpuls = (-inf, 6.50) AND genergy = <392530, inf) AND senergy = <7250, inf) AND nbumps2 = (-inf, 1.50) THEN class = {0}\n", "IF gimpuls = (-inf, 3881.50) AND nbumps = (-inf, 4.50) AND nbumps2 = (-inf, 2.50) THEN class = {0}\n", "IF goenergy = <-21.50, 81.50) AND gimpuls = <549.50, inf) AND genergy = (-inf, 537160) AND goimpuls = <-13.50, 89.50) AND nbumps2 = (-inf, 3.50) AND nbumps = <3.50, inf) THEN class = {0}\n", "IF maxenergy = <1500, inf) AND gimpuls = <994.50, 1959) AND goimpuls = <-34, 95) AND genergy = (-inf, 662435) AND senergy = (-inf, 36050) AND nbumps3 = <0.50, 4.50) AND nbumps2 = <0.50, 2.50) THEN class = {1}\n", "IF goenergy = (-inf, 96) AND maxenergy = <1500, inf) AND gimpuls = <712, 2257.50) AND genergy = <61250, 662435) AND goimpuls = (-inf, 95) AND nbumps3 = <0.50, inf) AND senergy = (-inf, 27350) AND nbumps2 = <0.50, inf) AND nbumps = (-inf, 6.50) THEN class = {1}\n", "IF goenergy = (-inf, 96) AND maxenergy = <1500, inf) AND gimpuls = <538.50, inf) AND goimpuls = <-34, 109) AND genergy = <61250, 826925) AND senergy = (-inf, 36050) AND nbumps3 = (-inf, 4.50) AND nbumps2 = <0.50, inf) AND nbumps = (-inf, 6.50) THEN class = {1}\n", "IF goenergy = (-inf, 186) AND maxenergy = <1500, inf) AND gimpuls = <538.50, inf) AND genergy = <58310, 934630) AND goimpuls = <-55, inf) AND senergy = (-inf, 40650) AND nbumps2 = <0.50, inf) THEN class = {1}\n", "IF ghazard = {a} AND gimpuls = <521.50, inf) AND genergy = <58310, 799855) AND goimpuls = <-23.50, 64.50) AND senergy = <850, 36050) AND nbumps = <1.50, 3.50) AND nbumps2 = <0.50, inf) THEN class = {1}\n", "IF goenergy = (-inf, 84) AND gimpuls = <894.50, inf) AND genergy = <66235, 1161025) AND goimpuls = <-46, 77.50) AND senergy = <650, inf) AND nbumps3 = (-inf, 2.50) AND nbumps = <1.50, 5.50) AND nbumps2 = <0.50, 3.50) THEN class = {1}\n", "IF goenergy = <-34.50, 96) AND gimpuls = <521.50, 1548.50) AND maxenergy = (-inf, 7500) AND genergy = <34360, 207270) AND goimpuls = <-22.50, inf) AND nbumps = <1.50, inf) THEN class = {1}\n", "IF goenergy = (-inf, 135.50) AND gimpuls = <378, inf) AND genergy = <32635, 622815) AND goimpuls = (-inf, 10.50) AND senergy = (-inf, 36050) AND nbumps = <1.50, inf) THEN class = {1}\n", "IF goenergy = (-inf, 106.50) AND gimpuls = <306, 542) AND genergy = <19245, 81890) AND senergy = <750, 12050) AND nbumps = <1.50, 3.50) THEN class = {1}\n", "IF ghazard = {a} AND goenergy = (-inf, -1.50) AND gimpuls = <153.50, 289) AND genergy = (-inf, 37085) AND senergy = (-inf, 40500) AND nbumps3 = (-inf, 3.50) AND nbumps = <1.50, inf) AND nbumps2 = <0.50, inf) THEN class = {1}\n", "IF ghazard = {a} AND goenergy = <-65.50, 27) AND gimpuls = <98.50, 346) AND goimpuls = <-70.50, 8.50) AND genergy = (-inf, 64310) AND senergy = <2350, inf) AND nbumps3 = (-inf, 3.50) AND nbumps2 = <0.50, inf) THEN class = {1}\n", "IF ghazard = {a} AND goenergy = <-50.50, inf) AND gimpuls = <1328.50, inf) AND genergy = (-inf, 1062020) AND goimpuls = <-33.50, 39.50) AND senergy = <850, 38250) AND nbumps = (-inf, 7.50) THEN class = {1}\n", "IF goenergy = (-inf, 56.50) AND gimpuls = <1253.50, inf) AND maxenergy = (-inf, 65000) AND genergy = <52565, 716085) AND goimpuls = <-60.50, 73) AND senergy = <350, inf) AND nbumps3 = (-inf, 2.50) AND nbumps4 = (-inf, 1.50) AND nbumps2 = (-inf, 2.50) AND nbumps = (-inf, 4.50) THEN class = {1}\n", "IF gimpuls = <1342, 3508) AND maxenergy = (-inf, 7500) AND genergy = <77100, inf) AND goimpuls = (-inf, 68.50) AND shift = {W} AND senergy = (-inf, 13350) AND nbumps2 = (-inf, 3.50) THEN class = {1}\n", "IF ghazard = {a} AND goenergy = <-59.50, 45.50) AND gimpuls = <110, 762) AND genergy = <12145, 134125) AND goimpuls = <-53.50, inf) AND senergy = <550, 950) THEN class = {1}\n", "IF goenergy = (-inf, 128.50) AND genergy = <10495, inf) AND shift = {W} AND senergy = (-inf, 36050) AND nbumps3 = <0.50, inf) AND nbumps2 = (-inf, 4.50) AND nbumps = (-inf, 6.50) THEN class = {1}\n", "IF goenergy = <-78.50, inf) AND gimpuls = <32.50, inf) AND maxenergy = <250, inf) AND goimpuls = <-74.50, inf) AND senergy = <350, inf) THEN class = {1}\n", "IF goenergy = (-inf, 176.50) AND gimpuls = <449.50, inf) AND genergy = <49095, inf) THEN class = {1}\n", "IF ghazard = {a} AND goenergy = <68, 124.50) AND gimpuls = <725.50, 1445.50) AND maxenergy = (-inf, 2500) AND genergy = (-inf, 127635) AND goimpuls = <16, inf) AND senergy = (-inf, 4700) AND nbumps2 = (-inf, 1.50) THEN class = {1}\n", "IF ghazard = {a} AND goenergy = <15.50, 160) AND gimpuls = <133.50, 732.50) AND maxenergy = (-inf, 5500) AND genergy = <40050, 52010) AND nbumps3 = (-inf, 0.50) AND nbumps2 = (-inf, 1.50) THEN class = {1}\n", "IF ghazard = {a} AND goenergy = (-inf, 152.50) AND gimpuls = <361.50, 653.50) AND maxenergy = (-inf, 7500) AND genergy = <32680, 36470) AND nbumps3 = (-inf, 0.50) THEN class = {1}\n", "IF goenergy = <-37.50, 124.50) AND gimpuls = <537.50, 621) AND genergy = <17635, 28105) AND shift = {W} AND nbumps = (-inf, 0.50) THEN class = {1}\n", "IF ghazard = {a} AND goenergy = <-37.50, 181) AND gimpuls = <240, 470.50) AND genergy = <20485, 27430) AND goimpuls = <-43, inf) AND shift = {W} AND senergy = (-inf, 450) THEN class = {1}\n", "IF goenergy = <-55.50, 297.50) AND gimpuls = <217.50, 796) AND genergy = <13725, 49585) AND goimpuls = <-42.50, inf) AND shift = {W} AND senergy = (-inf, 1050) AND nbumps2 = (-inf, 0.50) THEN class = {1}\n", "IF goenergy = (-inf, 7.50) AND gimpuls = <54.50, 2085.50) AND genergy = <1510, 569300) AND goimpuls = <-72.50, 28.50) AND senergy = (-inf, 115450) AND seismoacoustic = {a} AND nbumps4 = (-inf, 1.50) AND nbumps2 = (-inf, 3.50) THEN class = {1}\n" ] } ], "source": [ "for rule in corr_ruleset.rules:\n", " print(rule)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### RSS Measure generated rules" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "IF genergy = (-inf, 126350) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1210) AND goimpuls = (-inf, 233.50) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1342.50) AND goimpuls = (-inf, 233.50) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1410) AND goimpuls = (-inf, 233.50) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1485) AND goimpuls = (-inf, 96.50) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1653.50) AND goimpuls = (-inf, 96.50) AND genergy = (-inf, 1006585) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1752) AND goimpuls = (-inf, 96.50) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1822) AND goimpuls = (-inf, 96.50) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF goenergy = (-inf, 104.50) AND gimpuls = (-inf, 2168) AND goimpuls = (-inf, 96.50) AND senergy = (-inf, 550) THEN class = {0}\n", "IF gimpuls = (-inf, 2733) AND genergy = (-inf, 1026530) AND goimpuls = (-inf, 312) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF gimpuls = (-inf, 2733) AND goimpuls = (-inf, 312) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF genergy = (-inf, 1733075) AND goimpuls = (-inf, 312) AND nbumps = (-inf, 1.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1342.50) AND goimpuls = (-inf, 96.50) AND senergy = (-inf, 650) THEN class = {0}\n", "IF maxenergy = (-inf, 550) AND goimpuls = (-inf, 312) THEN class = {0}\n", "IF goenergy = (-inf, 104.50) AND maxenergy = (-inf, 650) AND gimpuls = (-inf, 1210) AND senergy = (-inf, 1550) THEN class = {0}\n", "IF maxenergy = (-inf, 650) AND gimpuls = (-inf, 1732) AND goimpuls = (-inf, 233.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1141.50) AND goimpuls = (-inf, 312) AND nbumps3 = (-inf, 2.50) AND nbumps2 = (-inf, 0.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1141.50) AND goimpuls = (-inf, 312) AND nbumps3 = (-inf, 3.50) AND nbumps2 = (-inf, 0.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1372) AND nbumps2 = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = (-inf, 104.50) AND gimpuls = (-inf, 1655.50) AND genergy = (-inf, 1006585) AND goimpuls = (-inf, 96) AND nbumps3 = (-inf, 2.50) AND nbumps2 = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = (-inf, 104.50) AND gimpuls = (-inf, 1760.50) AND goimpuls = (-inf, 96) AND nbumps3 = (-inf, 3.50) AND nbumps2 = (-inf, 0.50) THEN class = {0}\n", "IF gimpuls = (-inf, 2892) AND goimpuls = (-inf, 312) AND nbumps3 = (-inf, 2.50) AND nbumps2 = (-inf, 0.50) THEN class = {0}\n", "IF nbumps2 = (-inf, 0.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1210) AND goimpuls = (-inf, 96.50) AND nbumps = (-inf, 2.50) THEN class = {0}\n", "IF maxenergy = (-inf, 750) AND gimpuls = (-inf, 1732) AND goimpuls = (-inf, 96.50) AND genergy = (-inf, 703425) THEN class = {0}\n", "IF goenergy = (-inf, 104.50) AND maxenergy = (-inf, 850) AND gimpuls = (-inf, 2888) AND goimpuls = (-inf, 96) THEN class = {0}\n", "IF genergy = (-inf, 31245) AND nbumps3 = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = (-inf, 105.50) AND genergy = (-inf, 31245) AND senergy = (-inf, 4400) AND nbumps2 = (-inf, 2.50) THEN class = {0}\n", "IF goenergy = (-inf, 105.50) AND gimpuls = (-inf, 664.50) AND senergy = (-inf, 27100) AND nbumps = (-inf, 3.50) AND nbumps2 = (-inf, 1.50) THEN class = {0}\n", "IF genergy = (-inf, 31245) AND goimpuls = (-inf, 233.50) AND senergy = (-inf, 24700) AND nbumps2 = (-inf, 1.50) THEN class = {0}\n", "IF gimpuls = (-inf, 380.50) AND goimpuls = (-inf, 105.50) AND nbumps4 = (-inf, 0.50) AND nbumps = (-inf, 5.50) THEN class = {0}\n", "IF genergy = (-inf, 31245) AND goimpuls = (-inf, 105.50) AND senergy = (-inf, 27650) THEN class = {0}\n", "IF gimpuls = (-inf, 664.50) AND goimpuls = (-inf, 105.50) AND nbumps3 = (-inf, 3.50) AND nbumps4 = (-inf, 2.50) AND nbumps2 = (-inf, 4) THEN class = {0}\n", "IF goenergy = (-inf, 105.50) AND maxenergy = (-inf, 7500) AND genergy = (-inf, 44750) AND senergy = (-inf, 13700) THEN class = {0}\n", "IF gimpuls = (-inf, 1414) AND genergy = (-inf, 48545) AND goimpuls = (-inf, 233.50) THEN class = {0}\n", "IF goenergy = (-inf, 104.50) AND goimpuls = (-inf, 96) AND senergy = (-inf, 1950) AND nbumps2 = (-inf, 2.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1836) AND goimpuls = (-inf, 233.50) AND nbumps3 = (-inf, 0.50) AND nbumps5 = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = (-inf, 104.50) AND maxenergy = (-inf, 1500) AND genergy = (-inf, 531845) THEN class = {0}\n", "IF goenergy = (-inf, 104.50) AND genergy = (-inf, 61125) AND goimpuls = (-inf, 96.50) AND nbumps = (-inf, 6.50) THEN class = {0}\n", "IF goenergy = (-inf, 94.50) AND gimpuls = (-inf, 698) AND genergy = <45830, 105885) AND goimpuls = <-41.50, inf) AND senergy = <3950, 29200) THEN class = {0}\n", "IF gimpuls = (-inf, 2068.50) AND goimpuls = (-inf, 233.50) AND senergy = (-inf, 4400) AND nbumps = (-inf, 2.50) AND nbumps2 = (-inf, 1.50) THEN class = {0}\n", "IF goimpuls = (-inf, 96.50) AND nbumps3 = (-inf, 1.50) AND nbumps = (-inf, 2.50) AND nbumps2 = (-inf, 1.50) THEN class = {0}\n", "IF gimpuls = (-inf, 1139.50) AND nbumps3 = (-inf, 1.50) AND nbumps5 = (-inf, 0.50) THEN class = {0}\n", "IF goenergy = (-inf, 104.50) AND gimpuls = (-inf, 1139.50) AND genergy = (-inf, 366505) AND nbumps3 = (-inf, 2.50) AND nbumps5 = (-inf, 0.50) AND nbumps = (-inf, 4.50) AND nbumps2 = (-inf, 2.50) THEN class = {0}\n", "IF goenergy = (-inf, 87.50) AND gimpuls = (-inf, 1655) AND genergy = (-inf, 1505475) AND nbumps = (-inf, 4.50) THEN class = {0}\n", "IF gimpuls = (-inf, 2185.50) AND genergy = (-inf, 1505475) AND goimpuls = (-inf, 96) AND senergy = (-inf, 5750) AND nbumps2 = (-inf, 2.50) THEN class = {0}\n", "IF goenergy = (-inf, 87.50) AND gimpuls = (-inf, 1328) AND senergy = (-inf, 85450) AND nbumps2 = (-inf, 3.50) THEN class = {0}\n", "IF goenergy = (-inf, 87.50) AND maxenergy = (-inf, 4500) AND goimpuls = (-inf, 96) AND senergy = (-inf, 12000) THEN class = {0}\n", "IF genergy = (-inf, 189505) AND goimpuls = (-inf, 312) AND nbumps4 = (-inf, 1.50) AND nbumps2 = (-inf, 1.50) THEN class = {0}\n", "IF goenergy = <-88.50, inf) AND gimpuls = (-inf, 2917) AND goimpuls = (-inf, 312) AND nbumps3 = (-inf, 1.50) AND nbumps2 = (-inf, 2.50) THEN class = {0}\n", "IF goenergy = (-inf, 104.50) AND goimpuls = (-inf, 96.50) AND seismic = {a} AND nbumps3 = (-inf, 3.50) AND senergy = (-inf, 20650) THEN class = {0}\n", "IF goenergy = (-inf, 68.50) AND gimpuls = (-inf, 2681) AND genergy = (-inf, 1026530) AND goimpuls = (-inf, 96.50) AND nbumps3 = (-inf, 3.50) AND nbumps2 = (-inf, 3.50) AND nbumps = (-inf, 6.50) THEN class = {0}\n", "IF gimpuls = <521.50, inf) AND genergy = <57680, inf) THEN class = {1}\n", "IF goenergy = (-inf, 123) AND senergy = <550, inf) THEN class = {1}\n", "IF ghazard = {a} AND goenergy = <68.50, 105.50) AND gimpuls = <483, inf) AND genergy = <46530, 51605) AND nbumps = (-inf, 1.50) THEN class = {1}\n", "IF ghazard = {a} AND goenergy = <7, 58) AND gimpuls = <396, 836) AND genergy = <34315, 43280) AND goimpuls = <-21.50, 28.50) AND nbumps = (-inf, 0.50) THEN class = {1}\n", "IF ghazard = {a} AND goenergy = (-inf, 160) AND gimpuls = <362.50, 732.50) AND maxenergy = (-inf, 850) AND genergy = <32680, 66275) AND senergy = (-inf, 1350) THEN class = {1}\n", "IF goenergy = <14.50, 297.50) AND gimpuls = <133.50, 797) AND maxenergy = (-inf, 1500) AND genergy = <27275, 52010) AND nbumps3 = (-inf, 0.50) THEN class = {1}\n", "IF goenergy = <-37.50, 122) AND gimpuls = <537.50, 796) AND genergy = <16805, 29510) AND goimpuls = <-36.50, inf) AND senergy = (-inf, 250) THEN class = {1}\n", "IF ghazard = {a} AND goenergy = <-37.50, inf) AND gimpuls = <240, 473.50) AND genergy = <20485, 25310) AND goimpuls = <-43, inf) AND shift = {W} AND senergy = (-inf, 450) THEN class = {1}\n", "IF goenergy = <-55.50, 124.50) AND gimpuls = <194.50, inf) AND genergy = <9060, inf) AND goimpuls = <-60.50, inf) AND nbumps2 = (-inf, 4.50) THEN class = {1}\n", "IF goenergy = (-inf, 7.50) AND gimpuls = <54.50, 2085.50) AND genergy = <1510, 569300) AND goimpuls = <-72.50, 28.50) AND senergy = (-inf, 92850) AND seismoacoustic = {a} AND nbumps4 = (-inf, 1.50) AND nbumps2 = (-inf, 3.50) THEN class = {1}\n" ] } ], "source": [ "for rule in rss_ruleset.rules:\n", " print(rule)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Stratified K-Folds cross-validation" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "from IPython.display import display\n", "from sklearn.model_selection import StratifiedKFold\n", "\n", "N_SPLITS = 10\n", "\n", "skf = StratifiedKFold(n_splits=10)\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\cezar\\AppData\\Local\\Temp\\ipykernel_36476\\114429488.py:19: RuntimeWarning: invalid value encountered in scalar divide\n", " ppv = tp / (tp + fp)\n", "C:\\Users\\cezar\\AppData\\Local\\Temp\\ipykernel_36476\\114429488.py:36: RuntimeWarning: invalid value encountered in scalar divide\n", " 'Lift': (tp / (tp + fp)) / ((tp + fn) / (tp + tn + fp + fn)),\n", "C:\\Users\\cezar\\AppData\\Local\\Temp\\ipykernel_36476\\114429488.py:19: RuntimeWarning: invalid value encountered in scalar divide\n", " ppv = tp / (tp + fp)\n", "C:\\Users\\cezar\\AppData\\Local\\Temp\\ipykernel_36476\\114429488.py:36: RuntimeWarning: invalid value encountered in scalar divide\n", " 'Lift': (tp / (tp + fp)) / ((tp + fn) / (tp + tn + fp + fn)),\n", "C:\\Users\\cezar\\AppData\\Local\\Temp\\ipykernel_36476\\114429488.py:19: RuntimeWarning: invalid value encountered in scalar divide\n", " ppv = tp / (tp + fp)\n", "C:\\Users\\cezar\\AppData\\Local\\Temp\\ipykernel_36476\\114429488.py:36: RuntimeWarning: invalid value encountered in scalar divide\n", " 'Lift': (tp / (tp + fp)) / ((tp + fn) / (tp + tn + fp + fn)),\n" ] } ], "source": [ "c2_ruleset_stats = pd.DataFrame()\n", "c2_prediction_metrics = pd.DataFrame()\n", "c2_confusion_matrix = np.array([[0.0, 0.0], [0.0, 0.0]])\n", "\n", "for train_index, test_index in skf.split(x, y):\n", " x_train, x_test = x.iloc[train_index], x.iloc[test_index]\n", " y_train, y_test = y.iloc[train_index], y.iloc[test_index]\n", "\n", " clf = RuleClassifier(\n", " induction_measure=Measures.C2,\n", " pruning_measure=Measures.C2,\n", " voting_measure=Measures.C2,\n", " )\n", " clf.fit(x_train, y_train)\n", " c2_ruleset = clf.model\n", " prediction, classification_metrics = clf.predict(x_test, return_metrics=True)\n", " tmp, confusion_matrix = get_prediction_metrics('C2', prediction, y_test, classification_metrics)\n", " \n", " c2_prediction_metrics = pd.concat([c2_prediction_metrics, tmp])\n", " c2_ruleset_stats = pd.concat([c2_ruleset_stats, get_ruleset_stats('C2', c2_ruleset)])\n", " c2_confusion_matrix += confusion_matrix\n", "\n", "c2_confusion_matrix /= N_SPLITS" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Rules characteristics " ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "time_total_s 1.127413\n", "time_growing_s 0.937007\n", "time_pruning_s 0.151819\n", "rules_count 167.700000\n", "conditions_per_rule 4.135487\n", "induced_conditions_per_rule 13.639392\n", "avg_rule_coverage 0.167726\n", "avg_rule_precision 0.918341\n", "avg_rule_quality 0.485287\n", "pvalue 0.046021\n", "FDR_pvalue 0.049992\n", "FWER_pvalue 0.603958\n", "fraction_significant 0.806265\n", "fraction_FDR_significant 0.787313\n", "fraction_FWER_significant 0.633591\n", "dtype: float64" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display(c2_ruleset_stats.mean())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Rules evaluation (average)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Accuracy 0.899071\n", "MAE 0.100929\n", "Kappa 0.047367\n", "Balanced accuracy 0.535887\n", "Logistic loss 3.486010\n", "Precision 3.486010\n", "Sensitivity 0.117647\n", "Specificity 0.954127\n", "NPV 0.939956\n", "PPV 0.197777\n", "psep 0.140239\n", "Fall-out 0.045873\n", "Youden's J statistic 0.071774\n", "Lift 3.013186\n", "F-measure 0.073023\n", "Fowlkes-Mallows index 0.901979\n", "False positive 11.100000\n", "False negative 15.000000\n", "True positive 2.000000\n", "True negative 230.300000\n", "Rules per example 24.002394\n", "Voting conflicts 111.300000\n", "Geometric mean 0.180079\n", "dtype: float64" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display(c2_prediction_metrics.mean())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Confusion matrix (average)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
0230.311.1
115.02.0
\n", "
" ], "text/plain": [ " 0 1\n", "0 230.3 11.1\n", "1 15.0 2.0" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display(pd.DataFrame(c2_confusion_matrix))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Hyperparameters tuning\n", "\n", "This one gonna take a while..." ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Best BAC: 0.626780 using {'induction_measure': , 'minsupp_new': 5, 'pruning_measure': , 'voting_measure': }\n" ] } ], "source": [ "from sklearn.datasets import make_blobs\n", "from sklearn.model_selection import StratifiedKFold\n", "from sklearn.model_selection import GridSearchCV\n", "from sklearn.linear_model import LogisticRegression\n", "from rulekit.params import Measures\n", "# define dataset\n", "import numpy as np\n", "\n", "N_SPLITS = 3\n", "\n", "# define models and parameters\n", "model = RuleClassifier()\n", "minsupp_new = range(3, 15, 2)\n", "measures_choice = [Measures.C2, Measures.RSS, Measures.WeightedLaplace, Measures.Correlation]\n", "# define grid search\n", "grid = {\n", " 'minsupp_new': minsupp_new, \n", " 'induction_measure': measures_choice, \n", " 'pruning_measure': measures_choice, \n", " 'voting_measure': measures_choice\n", "}\n", "cv = StratifiedKFold(n_splits=N_SPLITS)\n", "grid_search = GridSearchCV(estimator=model, param_grid=grid, cv=cv, scoring='balanced_accuracy')\n", "grid_result = grid_search.fit(x, y)\n", "# summarize results\n", "\n", "print(\"Best BAC: %f using %s\" % (grid_result.best_score_, grid_result.best_params_))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Building model with tuned hyperparameters\n", "\n", "### Split dataset to train and test (80%/20%)." ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from IPython.display import display\n", "from rulekit.params import Measures\n", "\n", "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=True)\n", "\n", "\n", "clf = RuleClassifier(**grid_result.best_params_)\n", "clf.fit(x_train, y_train)\n", "ruleset = clf.model\n", "ruleset_stats = get_ruleset_stats('Best', ruleset)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Rules evaluation" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "time_total_s 0.221808\n", "time_growing_s 0.183045\n", "time_pruning_s 0.029816\n", "rules_count 42.000000\n", "conditions_per_rule 2.404762\n", "induced_conditions_per_rule 9.357143\n", "avg_rule_coverage 0.498744\n", "avg_rule_precision 0.866231\n", "avg_rule_quality 1.103251\n", "pvalue 0.021661\n", "FDR_pvalue 0.022077\n", "FWER_pvalue 0.038384\n", "fraction_significant 0.928571\n", "fraction_FDR_significant 0.928571\n", "fraction_FWER_significant 0.928571\n", "dtype: float64" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display(ruleset_stats.mean())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Validate model on test dataset" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Accuracy 0.903288\n", "MAE 0.096712\n", "Kappa 0.355362\n", "Balanced accuracy 0.729543\n", "Logistic loss 3.340361\n", "Precision 3.340361\n", "Sensitivity 0.531250\n", "Specificity 0.927835\n", "NPV 0.967742\n", "PPV 0.326923\n", "psep 0.294665\n", "Fall-out 0.072165\n", "Youden's J statistic 0.459085\n", "Lift 5.281851\n", "F-measure 0.404762\n", "Fowlkes-Mallows index 0.897822\n", "False positive 35.000000\n", "False negative 15.000000\n", "True positive 17.000000\n", "True negative 450.000000\n", "Rules per example 20.839458\n", "Voting conflicts 446.000000\n", "Geometric mean 0.702077\n", "dtype: float64" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
045035
11517
\n", "
" ], "text/plain": [ " 0 1\n", "0 450 35\n", "1 15 17" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "prediction, classification_metrics = clf.predict(x_test, return_metrics=True)\n", "prediction_metrics, confusion_matrix = get_prediction_metrics('Best', prediction, y_test, classification_metrics)\n", "\n", "display(prediction_metrics.mean())\n", "display(pd.DataFrame(confusion_matrix))" ] } ], "metadata": { "kernelspec": { "display_name": "env", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.10" }, "orig_nbformat": 2 }, "nbformat": 4, "nbformat_minor": 2 }