{ "cells": [ { "cell_type": "code", "execution_count": 4, "id": "7c5d059b-ed8a-4e2e-9420-25890f648895", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_42878/2472232159.py:7: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n", " df = pd.read_sql('select * from data_safeidx', con=engine)\n" ] } ], "source": [ "import pandas as pd\n", "import psycopg2 as pg\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "from sklearn.model_selection import train_test_split\n", "engine = pg.connect(\"dbname='safeidx' user='fbk_mpba' host='172.104.247.67' port='5432' password='fbk2024$'\")\n", "df = pd.read_sql('select * from data_safeidx', con=engine)" ] }, { "cell_type": "code", "execution_count": 6, "id": "03aa2a04-93fa-469e-a678-685cacdebd6c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
difficultycausetownprovincegenderequipmenthelmetdestinationdiagnosisindiaagecountryinjury_sideinjury_general_locationevacuation_vehicles
0novicefall_aloneSIKLOSFskiNonehospital_emergency_roomdistortionNone32.0UngheriaLlower_limbs[akja]
1advancedfall_aloneMALMOMskiNonehospital_emergency_roombruiseNone32.0SveziaRskull_or_face[akja]
2advancedfall_aloneCALDAROBZFskiNonedomicileotherNone12.0ItaliaRNone[snowmobile]
3advancedcollision_personLINZMskiNonehospital_emergency_roombruiseNone58.0AustriaRlower_limbs[snowmobile]
4advancedcollision_personRUSAVAMskiNoneotherbruiseNone25.0Repubblica CecaLlower_limbs[other]
\n", "
" ], "text/plain": [ " difficulty cause town province gender equipment helmet \\\n", "0 novice fall_alone SIKLOS F ski None \n", "1 advanced fall_alone MALMO M ski None \n", "2 advanced fall_alone CALDARO BZ F ski None \n", "3 advanced collision_person LINZ M ski None \n", "4 advanced collision_person RUSAVA M ski None \n", "\n", " destination diagnosis india age country \\\n", "0 hospital_emergency_room distortion None 32.0 Ungheria \n", "1 hospital_emergency_room bruise None 32.0 Svezia \n", "2 domicile other None 12.0 Italia \n", "3 hospital_emergency_room bruise None 58.0 Austria \n", "4 other bruise None 25.0 Repubblica Ceca \n", "\n", " injury_side injury_general_location evacuation_vehicles \n", "0 L lower_limbs [akja] \n", "1 R skull_or_face [akja] \n", "2 R None [snowmobile] \n", "3 R lower_limbs [snowmobile] \n", "4 L lower_limbs [other] " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 99, "id": "babc2e8b-1030-4e8a-aa41-6d2a788959a5", "metadata": {}, "outputs": [ { "ename": "AttributeError", "evalue": "'Series' object has no attribute 'evacuation_vehicles'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m/tmp/ipykernel_42878/3368331793.py\u001b[0m in \u001b[0;36m?\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mev\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mrow\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miterrows\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mev\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mev\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munion\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mevacuation_vehicles\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m~/miniconda3/envs/pid/lib/python3.11/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 6289\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_accessors\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6290\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_info_axis\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_can_hold_identifiers_and_holds_name\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6291\u001b[0m ):\n\u001b[1;32m 6292\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 6293\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mAttributeError\u001b[0m: 'Series' object has no attribute 'evacuation_vehicles'" ] } ], "source": [ "ev = set({})\n", "for i,row in df.iterrows():\n", " ev = ev.union(set(row.evacuation_vehicles))" ] }, { "cell_type": "code", "execution_count": 8, "id": "c8d6cc1c-f4f5-44ec-8652-b135963452ab", "metadata": {}, "outputs": [], "source": [ "for c in ev:\n", " df[c] = False\n", "for i,row in df.iterrows():\n", " for c in row.evacuation_vehicles:\n", " df.loc[i,c] = True" ] }, { "cell_type": "code", "execution_count": 9, "id": "758c3317-1b02-4aed-b94d-7b6998d23797", "metadata": {}, "outputs": [], "source": [ "df.drop(columns=['town','province','evacuation_vehicles'],inplace=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "adadc0dc-9d6e-4277-8956-d1d4b2492e7e", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 100, "id": "33617e77-7c2b-41a3-96c0-8930aa5ac869", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(array([1.3808e+04, 0.0000e+00, 8.8100e+02, 0.0000e+00, 0.0000e+00,\n", " 3.3690e+03, 0.0000e+00, 1.5200e+02, 0.0000e+00, 1.1000e+01]),\n", " array([0. , 0.4, 0.8, 1.2, 1.6, 2. , 2.4, 2.8, 3.2, 3.6, 4. ]),\n", " )" ] }, "execution_count": 100, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "labeled = df[~pd.isna(df.india)]\n", "plt.hist(labeled.india)" ] }, { "cell_type": "code", "execution_count": 101, "id": "2bda819b-4d4b-4e71-960a-53dd74d80b71", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_42878/2289208715.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " labeled['is_i1']=0\n" ] }, { "data": { "text/plain": [ "(array([ 3532., 0., 0., 0., 0., 0., 0., 0.,\n", " 0., 14689.]),\n", " array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]),\n", " )" ] }, "execution_count": 101, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjkAAAGdCAYAAADwjmIIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAsVElEQVR4nO3df1RVdb7/8ReIgJnn4I8LeL6h0o9RKUdLJ6TSpokljeRc7thNkzHvDMk0QZNSKo6F9hOjrLQcuVYzutbo1ZyV3EIHZXCMSQkV5aqkVDdKHe+BWsg5Sokg+/vHLHadpBI7gOfT87HWXmvO/rz3Z7/3Z6zzWtt9dkGWZVkCAAAwTHB3NwAAANAZCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACOFdHcD3am1tVXHjx9Xnz59FBQU1N3tAACA82BZlk6ePCmXy6Xg4K+/X/O9DjnHjx9XTExMd7cBAAAuwNGjR3XZZZd97fj3OuT06dNH0j8XyeFwdHM3AADgfHi9XsXExNjf41/nex1y2v6KyuFwEHIAAAgw3/aoCQ8eAwAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABgppLsbAAAA325I9qbubqHDPlqc3K3n504OAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEbqcMgpLS3VpEmT5HK5FBQUpIKCgq+tvffeexUUFKQXXnjBZ399fb1SU1PlcDgUERGhtLQ0nTp1yqdm//79GjdunMLDwxUTE6O8vLxz5t+wYYOGDRum8PBwjRgxQps3b+7o5QAAAEN1OOQ0NjZq5MiRWr58+TfWbdy4Ue+8845cLtc5Y6mpqaqqqlJxcbEKCwtVWlqq9PR0e9zr9WrChAkaPHiwKioq9Mwzz2jRokVauXKlXbNz507dddddSktL0759+5SSkqKUlBQdPHiwo5cEAAAMFGRZlnXBBwcFaePGjUpJSfHZ/49//EPx8fHasmWLkpOTNWvWLM2aNUuSdOjQIcXFxWn37t0aM2aMJKmoqEgTJ07UsWPH5HK5tGLFCi1YsEBut1uhoaGSpOzsbBUUFOjw4cOSpClTpqixsVGFhYX2eceOHatRo0YpPz//vPr3er1yOp3yeDxyOBwXugwAAHS6IdmburuFDvtocXKnzHu+399+fyantbVV06dP15w5c3T11VefM15WVqaIiAg74EhSYmKigoODVV5ebteMHz/eDjiSlJSUpOrqap04ccKuSUxM9Jk7KSlJZWVlX9tbU1OTvF6vzwYAAMzk95Dz9NNPKyQkRL/97W/bHXe73YqMjPTZFxISon79+sntdts1UVFRPjVtn7+tpm28Pbm5uXI6nfYWExPTsYsDAAABw68hp6KiQkuXLtWqVasUFBTkz6n9Yv78+fJ4PPZ29OjR7m4JAAB0Er+GnL///e+qq6vToEGDFBISopCQEH388cd68MEHNWTIEElSdHS06urqfI5raWlRfX29oqOj7Zra2lqfmrbP31bTNt6esLAwORwOnw0AAJjJryFn+vTp2r9/vyorK+3N5XJpzpw52rJliyQpISFBDQ0NqqiosI/btm2bWltbFR8fb9eUlpaqubnZrikuLtbQoUPVt29fu6akpMTn/MXFxUpISPDnJQEAgAAV0tEDTp06pQ8++MD+XFNTo8rKSvXr10+DBg1S//79fep79uyp6OhoDR06VJI0fPhw3XbbbZo5c6by8/PV3NyszMxMTZ061f65+bRp0/Too48qLS1N8+bN08GDB7V06VI9//zz9rwPPPCAbr75Zi1ZskTJyclat26d9uzZ4/MzcwAA8P3V4Ts5e/bs0bXXXqtrr71WkpSVlaVrr71WOTk55z3HmjVrNGzYMN16662aOHGibrrpJp9w4nQ6tXXrVtXU1Gj06NF68MEHlZOT4/MunRtuuEFr167VypUrNXLkSP35z39WQUGBrrnmmo5eEgAAMNB3ek9OoOM9OQCAQMF7cr7Qbe/JAQAAuBgQcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwUodDTmlpqSZNmiSXy6WgoCAVFBTYY83NzZo3b55GjBih3r17y+Vy6e6779bx48d95qivr1dqaqocDociIiKUlpamU6dO+dTs379f48aNU3h4uGJiYpSXl3dOLxs2bNCwYcMUHh6uESNGaPPmzR29HAAAYKgOh5zGxkaNHDlSy5cvP2fss88+0969e/XII49o7969ev3111VdXa2f/exnPnWpqamqqqpScXGxCgsLVVpaqvT0dHvc6/VqwoQJGjx4sCoqKvTMM89o0aJFWrlypV2zc+dO3XXXXUpLS9O+ffuUkpKilJQUHTx4sKOXBAAADBRkWZZ1wQcHBWnjxo1KSUn52prdu3fr+uuv18cff6xBgwbp0KFDiouL0+7duzVmzBhJUlFRkSZOnKhjx47J5XJpxYoVWrBggdxut0JDQyVJ2dnZKigo0OHDhyVJU6ZMUWNjowoLC+1zjR07VqNGjVJ+fv559e/1euV0OuXxeORwOC5wFQAA6HxDsjd1dwsd9tHi5E6Z93y/vzv9mRyPx6OgoCBFRERIksrKyhQREWEHHElKTExUcHCwysvL7Zrx48fbAUeSkpKSVF1drRMnTtg1iYmJPudKSkpSWVnZ1/bS1NQkr9frswEAADN1asg5ffq05s2bp7vuustOWm63W5GRkT51ISEh6tevn9xut10TFRXlU9P2+dtq2sbbk5ubK6fTaW8xMTHf7QIBAMBFq9NCTnNzs+68805ZlqUVK1Z01mk6ZP78+fJ4PPZ29OjR7m4JAAB0kpDOmLQt4Hz88cfatm2bz9+XRUdHq66uzqe+paVF9fX1io6Otmtqa2t9ato+f1tN23h7wsLCFBYWduEXBgAAAobf7+S0BZz3339ff/3rX9W/f3+f8YSEBDU0NKiiosLet23bNrW2tio+Pt6uKS0tVXNzs11TXFysoUOHqm/fvnZNSUmJz9zFxcVKSEjw9yUBAIAA1OGQc+rUKVVWVqqyslKSVFNTo8rKSh05ckTNzc264447tGfPHq1Zs0Znz56V2+2W2+3WmTNnJEnDhw/XbbfdppkzZ2rXrl3asWOHMjMzNXXqVLlcLknStGnTFBoaqrS0NFVVVWn9+vVaunSpsrKy7D4eeOABFRUVacmSJTp8+LAWLVqkPXv2KDMz0w/LAgAAAl2Hf0K+fft23XLLLefsnzFjhhYtWqTY2Nh2j/vb3/6mH//4x5L++TLAzMxMvfnmmwoODtbkyZO1bNkyXXrppXb9/v37lZGRod27d2vAgAG6//77NW/ePJ85N2zYoIcfflgfffSRrrrqKuXl5WnixInnfS38hBwAECj4CfkXzvf7+zu9JyfQEXIAAIGCkPOFi+Y9OQAAAN2BkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAI3U45JSWlmrSpElyuVwKCgpSQUGBz7hlWcrJydHAgQPVq1cvJSYm6v333/epqa+vV2pqqhwOhyIiIpSWlqZTp0751Ozfv1/jxo1TeHi4YmJilJeXd04vGzZs0LBhwxQeHq4RI0Zo8+bNHb0cAABgqA6HnMbGRo0cOVLLly9vdzwvL0/Lli1Tfn6+ysvL1bt3byUlJen06dN2TWpqqqqqqlRcXKzCwkKVlpYqPT3dHvd6vZowYYIGDx6siooKPfPMM1q0aJFWrlxp1+zcuVN33XWX0tLStG/fPqWkpCglJUUHDx7s6CUBAAADBVmWZV3wwUFB2rhxo1JSUiT98y6Oy+XSgw8+qIceekiS5PF4FBUVpVWrVmnq1Kk6dOiQ4uLitHv3bo0ZM0aSVFRUpIkTJ+rYsWNyuVxasWKFFixYILfbrdDQUElSdna2CgoKdPjwYUnSlClT1NjYqMLCQrufsWPHatSoUcrPzz+v/r1er5xOpzwejxwOx4UuAwAAnW5I9qbubqHDPlqc3Cnznu/3t1+fyampqZHb7VZiYqK9z+l0Kj4+XmVlZZKksrIyRURE2AFHkhITExUcHKzy8nK7Zvz48XbAkaSkpCRVV1frxIkTds2Xz9NW03YeAADw/Rbiz8ncbrckKSoqymd/VFSUPeZ2uxUZGenbREiI+vXr51MTGxt7zhxtY3379pXb7f7G87SnqalJTU1N9mev19uRywMAAAHke/XrqtzcXDmdTnuLiYnp7pYAAEAn8WvIiY6OliTV1tb67K+trbXHoqOjVVdX5zPe0tKi+vp6n5r25vjyOb6upm28PfPnz5fH47G3o0ePdvQSAQBAgPBryImNjVV0dLRKSkrsfV6vV+Xl5UpISJAkJSQkqKGhQRUVFXbNtm3b1Nraqvj4eLumtLRUzc3Ndk1xcbGGDh2qvn372jVfPk9bTdt52hMWFiaHw+GzAQAAM3U45Jw6dUqVlZWqrKyU9M+HjSsrK3XkyBEFBQVp1qxZeuKJJ/TGG2/owIEDuvvuu+VyuexfYA0fPly33XabZs6cqV27dmnHjh3KzMzU1KlT5XK5JEnTpk1TaGio0tLSVFVVpfXr12vp0qXKysqy+3jggQdUVFSkJUuW6PDhw1q0aJH27NmjzMzM774qAAAg4HX4weM9e/bolltusT+3BY8ZM2Zo1apVmjt3rhobG5Wenq6GhgbddNNNKioqUnh4uH3MmjVrlJmZqVtvvVXBwcGaPHmyli1bZo87nU5t3bpVGRkZGj16tAYMGKCcnByfd+nccMMNWrt2rR5++GH97ne/01VXXaWCggJdc801F7QQAADALN/pPTmBjvfkAAACBe/J+UK3vCcHAADgYkHIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIfg85Z8+e1SOPPKLY2Fj16tVLV1xxhR5//HFZlmXXWJalnJwcDRw4UL169VJiYqLef/99n3nq6+uVmpoqh8OhiIgIpaWl6dSpUz41+/fv17hx4xQeHq6YmBjl5eX5+3IAAECA8nvIefrpp7VixQq99NJLOnTokJ5++mnl5eXpxRdftGvy8vK0bNky5efnq7y8XL1791ZSUpJOnz5t16SmpqqqqkrFxcUqLCxUaWmp0tPT7XGv16sJEyZo8ODBqqio0DPPPKNFixZp5cqV/r4kAAAQgIKsL99i8YPbb79dUVFRevXVV+19kydPVq9evfSnP/1JlmXJ5XLpwQcf1EMPPSRJ8ng8ioqK0qpVqzR16lQdOnRIcXFx2r17t8aMGSNJKioq0sSJE3Xs2DG5XC6tWLFCCxYskNvtVmhoqCQpOztbBQUFOnz48Hn16vV65XQ65fF45HA4/LkMAAD41ZDsTd3dQod9tDi5U+Y93+9vv9/JueGGG1RSUqL33ntPkvQ///M/evvtt/XTn/5UklRTUyO3263ExET7GKfTqfj4eJWVlUmSysrKFBERYQccSUpMTFRwcLDKy8vtmvHjx9sBR5KSkpJUXV2tEydO+PuyAABAgAnx94TZ2dnyer0aNmyYevToobNnz+rJJ59UamqqJMntdkuSoqKifI6Lioqyx9xutyIjI30bDQlRv379fGpiY2PPmaNtrG/fvuf01tTUpKamJvuz1+v9LpcKAAAuYn6/k/Paa69pzZo1Wrt2rfbu3avVq1fr2Wef1erVq/19qg7Lzc2V0+m0t5iYmO5uCQAAdBK/h5w5c+YoOztbU6dO1YgRIzR9+nTNnj1bubm5kqTo6GhJUm1trc9xtbW19lh0dLTq6up8xltaWlRfX+9T094cXz7HV82fP18ej8fejh49+h2vFgAAXKz8HnI+++wzBQf7TtujRw+1trZKkmJjYxUdHa2SkhJ73Ov1qry8XAkJCZKkhIQENTQ0qKKiwq7Ztm2bWltbFR8fb9eUlpaqubnZrikuLtbQoUPb/asqSQoLC5PD4fDZAACAmfweciZNmqQnn3xSmzZt0kcffaSNGzfqueee07/9279JkoKCgjRr1iw98cQTeuONN3TgwAHdfffdcrlcSklJkSQNHz5ct912m2bOnKldu3Zpx44dyszM1NSpU+VyuSRJ06ZNU2hoqNLS0lRVVaX169dr6dKlysrK8vclAQCAAOT3B49ffPFFPfLII7rvvvtUV1cnl8ulX//618rJybFr5s6dq8bGRqWnp6uhoUE33XSTioqKFB4ebtesWbNGmZmZuvXWWxUcHKzJkydr2bJl9rjT6dTWrVuVkZGh0aNHa8CAAcrJyfF5lw4AAPj+8vt7cgIJ78kBAAQK3pPzhW57Tw4AAMDFgJADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJE6JeT84x//0C9+8Qv1799fvXr10ogRI7Rnzx573LIs5eTkaODAgerVq5cSExP1/vvv+8xRX1+v1NRUORwORUREKC0tTadOnfKp2b9/v8aNG6fw8HDFxMQoLy+vMy4HAAAEIL+HnBMnTujGG29Uz5499Ze//EXvvvuulixZor59+9o1eXl5WrZsmfLz81VeXq7evXsrKSlJp0+ftmtSU1NVVVWl4uJiFRYWqrS0VOnp6fa41+vVhAkTNHjwYFVUVOiZZ57RokWLtHLlSn9fEgAACEBBlmVZ/pwwOztbO3bs0N///vd2xy3Lksvl0oMPPqiHHnpIkuTxeBQVFaVVq1Zp6tSpOnTokOLi4rR7926NGTNGklRUVKSJEyfq2LFjcrlcWrFihRYsWCC3263Q0FD73AUFBTp8+PB59er1euV0OuXxeORwOPxw9QAAdI4h2Zu6u4UO+2hxcqfMe77f336/k/PGG29ozJgx+vd//3dFRkbq2muv1csvv2yP19TUyO12KzEx0d7ndDoVHx+vsrIySVJZWZkiIiLsgCNJiYmJCg4OVnl5uV0zfvx4O+BIUlJSkqqrq3XixIl2e2tqapLX6/XZAACAmfwecj788EOtWLFCV111lbZs2aLf/OY3+u1vf6vVq1dLktxutyQpKirK57ioqCh7zO12KzIy0mc8JCRE/fr186lpb44vn+OrcnNz5XQ67S0mJuY7Xi0AALhY+T3ktLa26rrrrtNTTz2la6+9Vunp6Zo5c6by8/P9faoOmz9/vjwej70dPXq0u1sCAACdxO8hZ+DAgYqLi/PZN3z4cB05ckSSFB0dLUmqra31qamtrbXHoqOjVVdX5zPe0tKi+vp6n5r25vjyOb4qLCxMDofDZwMAAGbye8i58cYbVV1d7bPvvffe0+DBgyVJsbGxio6OVklJiT3u9XpVXl6uhIQESVJCQoIaGhpUUVFh12zbtk2tra2Kj4+3a0pLS9Xc3GzXFBcXa+jQoT6/5AIAAN9Pfg85s2fP1jvvvKOnnnpKH3zwgdauXauVK1cqIyNDkhQUFKRZs2bpiSee0BtvvKEDBw7o7rvvlsvlUkpKiqR/3vm57bbbNHPmTO3atUs7duxQZmampk6dKpfLJUmaNm2aQkNDlZaWpqqqKq1fv15Lly5VVlaWvy8JAAAEoBB/T/ijH/1IGzdu1Pz58/XYY48pNjZWL7zwglJTU+2auXPnqrGxUenp6WpoaNBNN92koqIihYeH2zVr1qxRZmambr31VgUHB2vy5MlatmyZPe50OrV161ZlZGRo9OjRGjBggHJycnzepQMAAL6//P6enEDCe3IAAIGC9+R8odvekwMAAHAxIOQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYKRODzmLFy9WUFCQZs2aZe87ffq0MjIy1L9/f1166aWaPHmyamtrfY47cuSIkpOTdckllygyMlJz5sxRS0uLT8327dt13XXXKSwsTFdeeaVWrVrV2ZcDAAACRKeGnN27d+s///M/9cMf/tBn/+zZs/Xmm29qw4YNeuutt3T8+HH9/Oc/t8fPnj2r5ORknTlzRjt37tTq1au1atUq5eTk2DU1NTVKTk7WLbfcosrKSs2aNUv33HOPtmzZ0pmXBAAAAkSnhZxTp04pNTVVL7/8svr27Wvv93g8evXVV/Xcc8/pJz/5iUaPHq0//vGP2rlzp9555x1J0tatW/Xuu+/qT3/6k0aNGqWf/vSnevzxx7V8+XKdOXNGkpSfn6/Y2FgtWbJEw4cPV2Zmpu644w49//zznXVJAAAggHRayMnIyFBycrISExN99ldUVKi5udln/7BhwzRo0CCVlZVJksrKyjRixAhFRUXZNUlJSfJ6vaqqqrJrvjp3UlKSPUd7mpqa5PV6fTYAAGCmkM6YdN26ddq7d6927959zpjb7VZoaKgiIiJ89kdFRcntdts1Xw44beNtY99U4/V69fnnn6tXr17nnDs3N1ePPvroBV8XAAAIHH6/k3P06FE98MADWrNmjcLDw/09/Xcyf/58eTweezt69Gh3twQAADqJ30NORUWF6urqdN111ykkJEQhISF66623tGzZMoWEhCgqKkpnzpxRQ0ODz3G1tbWKjo6WJEVHR5/za6u2z99W43A42r2LI0lhYWFyOBw+GwAAMJPfQ86tt96qAwcOqLKy0t7GjBmj1NRU+3/37NlTJSUl9jHV1dU6cuSIEhISJEkJCQk6cOCA6urq7Jri4mI5HA7FxcXZNV+eo62mbQ4AAPD95vdncvr06aNrrrnGZ1/v3r3Vv39/e39aWpqysrLUr18/ORwO3X///UpISNDYsWMlSRMmTFBcXJymT5+uvLw8ud1uPfzww8rIyFBYWJgk6d5779VLL72kuXPn6le/+pW2bdum1157TZs2bfL3JQEAgADUKQ8ef5vnn39ewcHBmjx5spqampSUlKTf//739niPHj1UWFio3/zmN0pISFDv3r01Y8YMPfbYY3ZNbGysNm3apNmzZ2vp0qW67LLL9MorrygpKak7LgkAAFxkgizLsrq7ie7i9XrldDrl8Xh4PgcAcFEbkh14f1Px0eLkTpn3fL+/+W9XAQAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwUrf8hPz7gKfgAQDoXtzJAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYye8hJzc3Vz/60Y/Up08fRUZGKiUlRdXV1T41p0+fVkZGhvr3769LL71UkydPVm1trU/NkSNHlJycrEsuuUSRkZGaM2eOWlpafGq2b9+u6667TmFhYbryyiu1atUqf18OAAAIUH4POW+99ZYyMjL0zjvvqLi4WM3NzZowYYIaGxvtmtmzZ+vNN9/Uhg0b9NZbb+n48eP6+c9/bo+fPXtWycnJOnPmjHbu3KnVq1dr1apVysnJsWtqamqUnJysW265RZWVlZo1a5buuecebdmyxd+XBAAAAlCQZVlWZ57gk08+UWRkpN566y2NHz9eHo9H//Iv/6K1a9fqjjvukCQdPnxYw4cPV1lZmcaOHau//OUvuv3223X8+HFFRUVJkvLz8zVv3jx98sknCg0N1bx587Rp0yYdPHjQPtfUqVPV0NCgoqKi8+rN6/XK6XTK4/HI4XD49bqHZG/y63xd4aPFyd3dAgDga/C98oXz/f7u9GdyPB6PJKlfv36SpIqKCjU3NysxMdGuGTZsmAYNGqSysjJJUllZmUaMGGEHHElKSkqS1+tVVVWVXfPlOdpq2uZoT1NTk7xer88GAADM1Kkhp7W1VbNmzdKNN96oa665RpLkdrsVGhqqiIgIn9qoqCi53W675ssBp228beybarxerz7//PN2+8nNzZXT6bS3mJiY73yNAADg4tSpIScjI0MHDx7UunXrOvM0523+/PnyeDz2dvTo0e5uCQAAdJKQzpo4MzNThYWFKi0t1WWXXWbvj46O1pkzZ9TQ0OBzN6e2tlbR0dF2za5du3zma/v11ZdrvvqLrNraWjkcDvXq1avdnsLCwhQWFvadrw0AAFz8/H4nx7IsZWZmauPGjdq2bZtiY2N9xkePHq2ePXuqpKTE3lddXa0jR44oISFBkpSQkKADBw6orq7OrikuLpbD4VBcXJxd8+U52mra5gAAAN9vfr+Tk5GRobVr1+q///u/1adPH/sZGqfTqV69esnpdCotLU1ZWVnq16+fHA6H7r//fiUkJGjs2LGSpAkTJiguLk7Tp09XXl6e3G63Hn74YWVkZNh3Yu6991699NJLmjt3rn71q19p27Zteu2117RpU+A9fQ4AAPzP73dyVqxYIY/Hox//+McaOHCgva1fv96uef7553X77bdr8uTJGj9+vKKjo/X666/b4z169FBhYaF69OihhIQE/eIXv9Ddd9+txx57zK6JjY3Vpk2bVFxcrJEjR2rJkiV65ZVXlJSU5O9LAgAAAajT35NzMeM9Ob54Tw4AXLz4XvnCRfOeHAAAgO5AyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASAEfcpYvX64hQ4YoPDxc8fHx2rVrV3e3BAAALgIBHXLWr1+vrKwsLVy4UHv37tXIkSOVlJSkurq67m4NAAB0s4AOOc8995xmzpypX/7yl4qLi1N+fr4uueQS/eEPf+ju1gAAQDcL6e4GLtSZM2dUUVGh+fPn2/uCg4OVmJiosrKydo9pampSU1OT/dnj8UiSvF6v3/trbfrM73N2ts5YBwCAf/C9cu68lmV9Y13AhpxPP/1UZ8+eVVRUlM/+qKgoHT58uN1jcnNz9eijj56zPyYmplN6DDTOF7q7AwCASTr7e+XkyZNyOp1fOx6wIedCzJ8/X1lZWfbn1tZW1dfXq3///goKCvLbebxer2JiYnT06FE5HA6/zQtfrHPXYa27BuvcNVjnrtGZ62xZlk6ePCmXy/WNdQEbcgYMGKAePXqotrbWZ39tba2io6PbPSYsLExhYWE++yIiIjqrRTkcDv4B6gKsc9dhrbsG69w1WOeu0Vnr/E13cNoE7IPHoaGhGj16tEpKSux9ra2tKikpUUJCQjd2BgAALgYBeydHkrKysjRjxgyNGTNG119/vV544QU1Njbql7/8ZXe3BgAAullAh5wpU6bok08+UU5Ojtxut0aNGqWioqJzHkbuamFhYVq4cOE5fzUG/2Kduw5r3TVY567BOneNi2Gdg6xv+/0VAABAAArYZ3IAAAC+CSEHAAAYiZADAACMRMgBAABGIuRcoOXLl2vIkCEKDw9XfHy8du3a9Y31GzZs0LBhwxQeHq4RI0Zo8+bNXdRpYOvIOr/88ssaN26c+vbtq759+yoxMfFb/3/BP3X0z3ObdevWKSgoSCkpKZ3boEE6utYNDQ3KyMjQwIEDFRYWph/84Af8++M8dHSdX3jhBQ0dOlS9evVSTEyMZs+erdOnT3dRt4GptLRUkyZNksvlUlBQkAoKCr71mO3bt+u6665TWFiYrrzySq1atapzm7TQYevWrbNCQ0OtP/zhD1ZVVZU1c+ZMKyIiwqqtrW23fseOHVaPHj2svLw8691337Uefvhhq2fPntaBAwe6uPPA0tF1njZtmrV8+XJr37591qFDh6z/+I//sJxOp3Xs2LEu7jywdHSd29TU1Fj/7//9P2vcuHHWv/7rv3ZNswGuo2vd1NRkjRkzxpo4caL19ttvWzU1Ndb27dutysrKLu48sHR0ndesWWOFhYVZa9assWpqaqwtW7ZYAwcOtGbPnt3FnQeWzZs3WwsWLLBef/11S5K1cePGb6z/8MMPrUsuucTKysqy3n33XevFF1+0evToYRUVFXVaj4ScC3D99ddbGRkZ9uezZ89aLpfLys3Nbbf+zjvvtJKTk332xcfHW7/+9a87tc9A19F1/qqWlharT58+1urVqzurRSNcyDq3tLRYN9xwg/XKK69YM2bMIOScp46u9YoVK6zLL7/cOnPmTFe1aISOrnNGRob1k5/8xGdfVlaWdeONN3ZqnyY5n5Azd+5c6+qrr/bZN2XKFCspKanT+uKvqzrozJkzqqioUGJior0vODhYiYmJKisra/eYsrIyn3pJSkpK+tp6XNg6f9Vnn32m5uZm9evXr7PaDHgXus6PPfaYIiMjlZaW1hVtGuFC1vqNN95QQkKCMjIyFBUVpWuuuUZPPfWUzp4921VtB5wLWecbbrhBFRUV9l9pffjhh9q8ebMmTpzYJT1/X3THd2FAv/G4O3z66ac6e/bsOW9VjoqK0uHDh9s9xu12t1vvdrs7rc9AdyHr/FXz5s2Ty+U65x8qfOFC1vntt9/Wq6++qsrKyi7o0BwXstYffvihtm3bptTUVG3evFkffPCB7rvvPjU3N2vhwoVd0XbAuZB1njZtmj799FPddNNNsixLLS0tuvfee/W73/2uK1r+3vi670Kv16vPP/9cvXr18vs5uZMDIy1evFjr1q3Txo0bFR4e3t3tGOPkyZOaPn26Xn75ZQ0YMKC72zFea2urIiMjtXLlSo0ePVpTpkzRggULlJ+f392tGWX79u166qmn9Pvf/1579+7V66+/rk2bNunxxx/v7tbwHXEnp4MGDBigHj16qLa21md/bW2toqOj2z0mOjq6Q/W4sHVu8+yzz2rx4sX661//qh/+8Ied2WbA6+g6/+///q8++ugjTZo0yd7X2toqSQoJCVF1dbWuuOKKzm06QF3In+mBAweqZ8+e6tGjh71v+PDhcrvdOnPmjEJDQzu150B0Iev8yCOPaPr06brnnnskSSNGjFBjY6PS09O1YMECBQdzP8Afvu670OFwdMpdHIk7OR0WGhqq0aNHq6SkxN7X2tqqkpISJSQktHtMQkKCT70kFRcXf209LmydJSkvL0+PP/64ioqKNGbMmK5oNaB1dJ2HDRumAwcOqLKy0t5+9rOf6ZZbblFlZaViYmK6sv2AciF/pm+88UZ98MEHdpCUpPfee08DBw4k4HyNC1nnzz777Jwg0xYsLf7zjn7TLd+FnfZIs8HWrVtnhYWFWatWrbLeffddKz093YqIiLDcbrdlWZY1ffp0Kzs7267fsWOHFRISYj377LPWoUOHrIULF/IT8vPQ0XVevHixFRoaav35z3+2/u///s/eTp482V2XEBA6us5fxa+rzl9H1/rIkSNWnz59rMzMTKu6utoqLCy0IiMjrSeeeKK7LiEgdHSdFy5caPXp08f6r//6L+vDDz+0tm7dal1xxRXWnXfe2V2XEBBOnjxp7du3z9q3b58lyXruueesffv2WR9//LFlWZaVnZ1tTZ8+3a5v+wn5nDlzrEOHDlnLly/nJ+QXqxdffNEaNGiQFRoaal1//fXWO++8Y4/dfPPN1owZM3zqX3vtNesHP/iBFRoaal199dXWpk2burjjwNSRdR48eLAl6Zxt4cKFXd94gOnon+cvI+R0TEfXeufOnVZ8fLwVFhZmXX755daTTz5ptbS0dHHXgacj69zc3GwtWrTIuuKKK6zw8HArJibGuu+++6wTJ050feMB5G9/+1u7/85tW9sZM2ZYN9988znHjBo1ygoNDbUuv/xy649//GOn9hhkWdyLAwAA5uGZHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACM9P8Bq1nsS1SKH9IAAAAASUVORK5CYII=", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "labeled['is_i1']=0\n", "labeled.loc[labeled.india.isin(['i1','i0']),'is_i1']=1\n", "plt.hist(labeled.is_i1)" ] }, { "cell_type": "code", "execution_count": 103, "id": "e8c139d9-bf61-45ec-9da1-7eaf4ff754b4", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_42878/1116779111.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " labeled[c] = labeled[c].astype(np.float32).fillna(np.nan)\n", "/tmp/ipykernel_42878/1116779111.py:4: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " labeled[c] = labeled[c].fillna('None').astype('category')\n", "/tmp/ipykernel_42878/1116779111.py:4: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " labeled[c] = labeled[c].fillna('None').astype('category')\n", "/tmp/ipykernel_42878/1116779111.py:4: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " labeled[c] = labeled[c].fillna('None').astype('category')\n", "/tmp/ipykernel_42878/1116779111.py:4: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " labeled[c] = labeled[c].fillna('None').astype('category')\n", "/tmp/ipykernel_42878/1116779111.py:4: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " labeled[c] = labeled[c].fillna('None').astype('category')\n", "/tmp/ipykernel_42878/1116779111.py:4: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " labeled[c] = labeled[c].fillna('None').astype('category')\n", "/tmp/ipykernel_42878/1116779111.py:4: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " labeled[c] = labeled[c].fillna('None').astype('category')\n", "/tmp/ipykernel_42878/1116779111.py:4: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " labeled[c] = labeled[c].fillna('None').astype('category')\n", "/tmp/ipykernel_42878/1116779111.py:4: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " labeled[c] = labeled[c].fillna('None').astype('category')\n", "/tmp/ipykernel_42878/1116779111.py:4: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " labeled[c] = labeled[c].fillna('None').astype('category')\n", "/tmp/ipykernel_42878/1116779111.py:4: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " labeled[c] = labeled[c].fillna('None').astype('category')\n" ] } ], "source": [ "labeled[c] = labeled[c].astype(np.float32).fillna(np.nan)\n", "for c in labeled.columns:\n", " if c!='age':\n", " labeled[c] = labeled[c].fillna('None').astype('category')\n" ] }, { "cell_type": "code", "execution_count": 104, "id": "fabf354e-f39e-4cde-af84-c65a277d309a", "metadata": {}, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split( labeled.drop(columns=['india','is_i1']),\n", " labeled.is_i1, test_size=0.33, random_state=0,stratify=labeled.is_i1)" ] }, { "cell_type": "code", "execution_count": 105, "id": "cf6cf5d8-d43e-499e-98a5-65ecd0b8ccda", "metadata": {}, "outputs": [], "source": [ "X_train, X_valid, y_train, y_valid = train_test_split(X_train,y_train, test_size=0.33, random_state=0,stratify=y_train)" ] }, { "cell_type": "code", "execution_count": 147, "id": "774ad570-a60b-475b-80cf-4f9b9949cc9d", "metadata": {}, "outputs": [], "source": [ "weight_train = (1-y_train.values.astype(int))*5+1 ## peso classi unbalanced" ] }, { "cell_type": "code", "execution_count": 148, "id": "4bed71a8-6420-4432-8362-3fe24eb394fb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([6, 6, 1, ..., 1, 1, 1])" ] }, "execution_count": 148, "metadata": {}, "output_type": "execute_result" } ], "source": [ "weight_train" ] }, { "cell_type": "code", "execution_count": 149, "id": "1fcc5234-abad-459a-9420-810833657796", "metadata": {}, "outputs": [], "source": [ "from catboost import CatBoostClassifier, Pool\n", "\n", "train_data = Pool(data=X_train,\n", " label=y_train,\n", " weight=weight_train,cat_features=[c for c in X_train.columns if c!='age'])\n", "valid_data = Pool(data=X_valid,\n", " label=y_valid,cat_features=[c for c in X_train.columns if c!='age']\n", " )\n", "model = CatBoostClassifier(iterations=1000)\n", "\n", "model.fit(train_data,eval_set=valid_data,verbose=False,early_stopping_rounds=100)\n", "preds_class = model.predict(valid_data,)" ] }, { "cell_type": "code", "execution_count": 150, "id": "8df84007-110b-4f31-bc7d-53e7d5c9a178", "metadata": {}, "outputs": [], "source": [ "preds_class_valid = model.predict(valid_data)\n", "preds_class_train= model.predict(train_data)" ] }, { "cell_type": "code", "execution_count": 151, "id": "55627ec4-fd24-4815-98d4-d8462bbfdd9a", "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import confusion_matrix,matthews_corrcoef,accuracy_score" ] }, { "cell_type": "code", "execution_count": null, "id": "877e914d-44d4-4299-8d3c-24c4fc353317", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 152, "id": "776395d9-a8e2-4fda-90e1-11d6dbe80de8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 622 159]\n", " [ 778 2470]]\n", "0.4623150659581546\n", "0.7674360883593944\n", "########################################\n", "[[1436 149]\n", " [1358 5236]]\n", "0.5834604503367398\n", "0.8157476464115417\n" ] } ], "source": [ "print(confusion_matrix(y_valid,preds_class_valid))\n", "print(matthews_corrcoef(y_valid,preds_class_valid))\n", "print(accuracy_score(y_valid,preds_class_valid))\n", "print('########################################')\n", "print(confusion_matrix(y_train,preds_class_train))\n", "print(matthews_corrcoef(y_train,preds_class_train))\n", "print(accuracy_score(y_train,preds_class_train))" ] }, { "cell_type": "code", "execution_count": 157, "id": "34c397bc-529a-4c52-b30e-957b28021200", "metadata": {}, "outputs": [ { "ename": "TypeError", "evalue": "DMatrix.__init__() got an unexpected keyword argument 'scale_pos_weight'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[157], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mxgboost\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mxgb\u001b[39;00m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;66;03m# Create regression matrices\u001b[39;00m\n\u001b[0;32m----> 5\u001b[0m dtrain \u001b[38;5;241m=\u001b[39m \u001b[43mxgb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDMatrix\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_train\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mastype\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mint\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43menable_categorical\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43mscale_pos_weight\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m100\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;66;03m#,weight=weight_train)\u001b[39;00m\n\u001b[1;32m 6\u001b[0m dvalid \u001b[38;5;241m=\u001b[39m xgb\u001b[38;5;241m.\u001b[39mDMatrix(X_valid, y_valid\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mint\u001b[39m), enable_categorical\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", "File \u001b[0;32m~/miniconda3/envs/pid/lib/python3.11/site-packages/xgboost/core.py:730\u001b[0m, in \u001b[0;36mrequire_keyword_args..throw_if..inner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 728\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m k, arg \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(sig\u001b[38;5;241m.\u001b[39mparameters, args):\n\u001b[1;32m 729\u001b[0m kwargs[k] \u001b[38;5;241m=\u001b[39m arg\n\u001b[0;32m--> 730\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[0;31mTypeError\u001b[0m: DMatrix.__init__() got an unexpected keyword argument 'scale_pos_weight'" ] } ], "source": [ "##try with xgboost\n", "import xgboost as xgb\n", "\n", "# Create regression matrices\n", "dtrain = xgb.DMatrix(X_train, y_train.astype(int), enable_categorical=True)#,weight=weight_train)\n", "dvalid = xgb.DMatrix(X_valid, y_valid.astype(int), enable_categorical=True)" ] }, { "cell_type": "code", "execution_count": 154, "id": "7bb54c1d-ed2c-496a-b149-d567246bfee0", "metadata": {}, "outputs": [], "source": [ "params = {\"objective\": \"multi:softprob\", \"num_class\": 2}\n", "n = 1000\n", "\n", "results = xgb.train(\n", " params, dtrain,\n", " num_boost_round=n,\n", " evals = [(dtrain, \"train\"), (dvalid, \"valid\")],\n", " verbose_eval=False,\n", " early_stopping_rounds=100\n", " # metrics=[\"mlogloss\", \"auc\", \"merror\"],\n", ")\n" ] }, { "cell_type": "code", "execution_count": 155, "id": "945e209e-1722-434e-b0bf-5104b3234eb9", "metadata": {}, "outputs": [], "source": [ "preds_class_valid = results.predict(dvalid)\n", "preds_class_train= results.predict(dtrain)" ] }, { "cell_type": "code", "execution_count": 156, "id": "65228d59-e4c2-411d-8535-b36c15a55bfd", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 492 289]\n", " [ 471 2777]]\n", "0.4495009568013873\n", "0.811367585008687\n", "########################################\n", "[[1584 1]\n", " [ 389 6205]]\n", "0.8688028589259579\n", "0.9523169091575987\n" ] } ], "source": [ "print(confusion_matrix(y_valid,preds_class_valid.argmax(1)))\n", "print(matthews_corrcoef(y_valid,preds_class_valid.argmax(1)))\n", "print(accuracy_score(y_valid,preds_class_valid.argmax(1)))\n", "print('########################################')\n", "print(confusion_matrix(y_train,preds_class_train.argmax(1)))\n", "print(matthews_corrcoef(y_train,preds_class_train.argmax(1)))\n", "print(accuracy_score(y_train,preds_class_train.argmax(1)))" ] }, { "cell_type": "code", "execution_count": null, "id": "95461970-fad9-4c50-84d3-e139bedfec3f", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 5 }