{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "7c5d059b-ed8a-4e2e-9420-25890f648895", "metadata": { "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_69977/3424305685.py:1: DeprecationWarning: \n", "Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),\n", "(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)\n", "but was not found to be installed on your system.\n", "If this would cause problems for you,\n", "please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466\n", " \n", " import pandas as pd\n", "/home/agobbi/miniconda3/envs/pid/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n", "/tmp/ipykernel_69977/3424305685.py:19: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n", " df = pd.read_sql('select * from fbk_export_20240212', con=engine) ##data_safeidx\n" ] } ], "source": [ "import pandas as pd\n", "import psycopg2 as pg\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "from sklearn.model_selection import train_test_split\n", "import xgboost as xgb\n", "from sklearn.metrics import confusion_matrix,matthews_corrcoef,accuracy_score\n", "import optuna\n", "import pickle\n", "from sklearn.feature_selection import SequentialFeatureSelector\n", "reload_data = True\n", "def norm(x):\n", " if len(x)==1 and x[0]=='':\n", " return []\n", " else:\n", " return x\n", "if reload_data:\n", " engine = pg.connect(\"dbname='safeidx' user='fbk_mpba' host='172.104.247.67' port='5432' password='fbk2024$'\")\n", " df = pd.read_sql('select * from fbk_export_20240212', con=engine) \n", " with open('data.pkl','wb') as f:\n", " pickle.dump(df,f)\n", "else:\n", " with open('data.pkl','rb') as f:\n", " df = pickle.load(f)\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "b7fb69bc-5308-4c06-a184-6fa5d79534da", "metadata": {}, "outputs": [], "source": [ "## these columns can lead to overfit!\n", "df.drop(columns=['dateandtime','skiarea_id','day_of_year','minute_of_day','year'], inplace=True)" ] }, { "cell_type": "code", "execution_count": 3, "id": "8100ef4a-cfb9-4ce3-966e-577b6d51096e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | skiarea_name | \n", "season | \n", "difficulty | \n", "cause | \n", "town | \n", "province | \n", "gender | \n", "equipment | \n", "helmet | \n", "destination | \n", "diagnosis | \n", "india | \n", "age | \n", "country | \n", "injury_side | \n", "injury_general_location | \n", "evacuation_vehicles | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "Pampeago | \n", "2009 | \n", "novice | \n", "fall_alone | \n", "SIKLOS | \n", "\n", " | F | \n", "ski | \n", "None | \n", "hospital_emergency_room | \n", "distortion | \n", "None | \n", "32.0 | \n", "Ungheria | \n", "L | \n", "lower_limbs | \n", "[akja] | \n", "
| 1 | \n", "Pampeago | \n", "2009 | \n", "advanced | \n", "fall_alone | \n", "MALMO | \n", "\n", " | M | \n", "ski | \n", "None | \n", "hospital_emergency_room | \n", "bruise | \n", "None | \n", "32.0 | \n", "Svezia | \n", "R | \n", "skull_or_face | \n", "[akja] | \n", "
| 2 | \n", "Pampeago | \n", "2009 | \n", "advanced | \n", "fall_alone | \n", "CALDARO | \n", "BZ | \n", "F | \n", "ski | \n", "None | \n", "domicile | \n", "other | \n", "None | \n", "12.0 | \n", "Italia | \n", "R | \n", "None | \n", "[snowmobile] | \n", "
| 3 | \n", "Pampeago | \n", "2009 | \n", "advanced | \n", "collision_person | \n", "LINZ | \n", "\n", " | M | \n", "ski | \n", "None | \n", "hospital_emergency_room | \n", "bruise | \n", "None | \n", "58.0 | \n", "Austria | \n", "R | \n", "lower_limbs | \n", "[snowmobile] | \n", "
| 4 | \n", "Pampeago | \n", "2009 | \n", "advanced | \n", "collision_person | \n", "RUSAVA | \n", "\n", " | M | \n", "ski | \n", "None | \n", "other | \n", "bruise | \n", "None | \n", "25.0 | \n", "Repubblica Ceca | \n", "L | \n", "lower_limbs | \n", "[other] | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 150102 | \n", "Speikboden | \n", "2024 | \n", "intermediate | \n", "fall_alone | \n", "Dettmannsdorf OT Dettmannsdorf - Kölzow | \n", "None | \n", "F | \n", "ski | \n", "True | \n", "hospital_emergency_room | \n", "other | \n", "i2 | \n", "37.0 | \n", "Germania | \n", "L | \n", "lower_limbs | \n", "[snowmobile_sled, helicopter] | \n", "
| 150103 | \n", "Bardonecchia | \n", "2024 | \n", "easy | \n", "fall_alone | \n", "Milano | \n", "Milano | \n", "M | \n", "ski | \n", "True | \n", "None | \n", "wound | \n", "None | \n", "33.0 | \n", "Italia | \n", "L | \n", "upper_limbs | \n", "[indipendently] | \n", "
| 150104 | \n", "Klausberg | \n", "2024 | \n", "easy | \n", "collision_person | \n", "Düsseldorf | \n", "None | \n", "F | \n", "ski | \n", "True | \n", "traumacenter | \n", "other | \n", "i2 | \n", "66.0 | \n", "Germania | \n", "L | \n", "upper_limbs | \n", "[car, ski_lift, snowmobile] | \n", "
| 150105 | \n", "Moena Lusia | \n", "2024 | \n", "intermediate | \n", "fall_alone | \n", "Rimini | \n", "Rimini | \n", "F | \n", "ski | \n", "True | \n", "domicile | \n", "bruise | \n", "None | \n", "14.0 | \n", "Italia | \n", "R | \n", "lower_limbs | \n", "[ski_lift, snowmobile] | \n", "
| 150106 | \n", "Porta Vescovo - Arabba | \n", "2024 | \n", "intermediate | \n", "fall_alone | \n", "Busto Garolfo | \n", "Milano | \n", "F | \n", "ski | \n", "True | \n", "domicile | \n", "distortion | \n", "None | \n", "58.0 | \n", "Italia | \n", "L | \n", "lower_limbs | \n", "[akja] | \n", "
150107 rows × 17 columns
\n", "| \n", " | skiarea_name | \n", "season | \n", "difficulty | \n", "cause | \n", "gender | \n", "equipment | \n", "helmet | \n", "destination | \n", "diagnosis | \n", "india | \n", "... | \n", "ambulance | \n", "privat_helicopter | \n", "skiarea_ambulance | \n", "snowmobile | \n", "quad | \n", "helicopter | \n", "akja | \n", "offroad_vehicle | \n", "other | \n", "indipendently | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "Kronplatz | \n", "2018 | \n", "None | \n", "fall_alone | \n", "F | \n", "ski | \n", "True | \n", "hospital_emergency_room | \n", "distortion | \n", "1 | \n", "... | \n", "False | \n", "False | \n", "False | \n", "True | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "
| 1 | \n", "Kronplatz | \n", "2018 | \n", "easy | \n", "illness | \n", "M | \n", "ski | \n", "True | \n", "domicile | \n", "other | \n", "0 | \n", "... | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "
| 2 | \n", "Kronplatz | \n", "2018 | \n", "advanced | \n", "fall_alone | \n", "M | \n", "ski | \n", "True | \n", "hospital_emergency_room | \n", "wound | \n", "1 | \n", "... | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "True | \n", "False | \n", "False | \n", "False | \n", "
| 3 | \n", "Kronplatz | \n", "2018 | \n", "advanced | \n", "fall_alone | \n", "M | \n", "ski | \n", "True | \n", "hospital_emergency_room | \n", "fracture | \n", "1 | \n", "... | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "True | \n", "False | \n", "False | \n", "False | \n", "
| 4 | \n", "Kronplatz | \n", "2018 | \n", "intermediate | \n", "fall_alone | \n", "M | \n", "ski | \n", "True | \n", "hospital_emergency_room | \n", "other | \n", "1 | \n", "... | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "True | \n", "False | \n", "False | \n", "False | \n", "
5 rows × 27 columns
\n", "| \n", " | skiarea_name | \n", "season | \n", "difficulty | \n", "cause | \n", "gender | \n", "equipment | \n", "helmet | \n", "destination | \n", "diagnosis | \n", "india | \n", "... | \n", "ambulance | \n", "privat_helicopter | \n", "skiarea_ambulance | \n", "snowmobile | \n", "quad | \n", "helicopter | \n", "akja | \n", "offroad_vehicle | \n", "other | \n", "indipendently | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "Kronplatz | \n", "2018 | \n", "None | \n", "fall_alone | \n", "F | \n", "ski | \n", "True | \n", "hospital_emergency_room | \n", "distortion | \n", "i1 | \n", "... | \n", "False | \n", "False | \n", "False | \n", "True | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "
| 1 | \n", "Kronplatz | \n", "2018 | \n", "easy | \n", "illness | \n", "M | \n", "ski | \n", "True | \n", "domicile | \n", "other | \n", "i0 | \n", "... | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "
| 2 | \n", "Kronplatz | \n", "2018 | \n", "advanced | \n", "fall_alone | \n", "M | \n", "ski | \n", "True | \n", "hospital_emergency_room | \n", "wound | \n", "i1 | \n", "... | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "True | \n", "False | \n", "False | \n", "False | \n", "
| 3 | \n", "Kronplatz | \n", "2018 | \n", "advanced | \n", "fall_alone | \n", "M | \n", "ski | \n", "True | \n", "hospital_emergency_room | \n", "fracture | \n", "i1 | \n", "... | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "True | \n", "False | \n", "False | \n", "False | \n", "
| 4 | \n", "Kronplatz | \n", "2018 | \n", "intermediate | \n", "fall_alone | \n", "M | \n", "ski | \n", "True | \n", "hospital_emergency_room | \n", "other | \n", "i1 | \n", "... | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "True | \n", "False | \n", "False | \n", "False | \n", "
5 rows × 27 columns
\n", "| \n", " | mcc | \n", "acc | \n", "FS | \n", "
|---|---|---|---|
| 0 | \n", "0.024347 | \n", "0.256763 | \n", "1 | \n", "
| 1 | \n", "0.303988 | \n", "0.585659 | \n", "2 | \n", "
| 2 | \n", "0.324149 | \n", "0.614856 | \n", "3 | \n", "
| 3 | \n", "0.374640 | \n", "0.672821 | \n", "4 | \n", "
| 4 | \n", "0.394719 | \n", "0.687419 | \n", "5 | \n", "
| 5 | \n", "0.399983 | \n", "0.695578 | \n", "6 | \n", "
| 6 | \n", "0.431865 | \n", "0.709532 | \n", "7 | \n", "
| 7 | \n", "0.430755 | \n", "0.710176 | \n", "8 | \n", "
| 8 | \n", "0.430538 | \n", "0.710176 | \n", "9 | \n", "
| 9 | \n", "0.434400 | \n", "0.715758 | \n", "10 | \n", "
| 10 | \n", "0.429238 | \n", "0.713826 | \n", "11 | \n", "
| 11 | \n", "0.430244 | \n", "0.717046 | \n", "12 | \n", "
| 12 | \n", "0.435320 | \n", "0.720481 | \n", "13 | \n", "
| 13 | \n", "0.444619 | \n", "0.728210 | \n", "14 | \n", "
| 14 | \n", "0.451718 | \n", "0.733577 | \n", "15 | \n", "
| 15 | \n", "0.449903 | \n", "0.731000 | \n", "16 | \n", "
| 16 | \n", "0.467224 | \n", "0.745170 | \n", "17 | \n", "
| 17 | \n", "0.461310 | \n", "0.742164 | \n", "18 | \n", "
| 18 | \n", "0.466519 | \n", "0.745814 | \n", "19 | \n", "
| 19 | \n", "0.465960 | \n", "0.745170 | \n", "20 | \n", "
| 20 | \n", "0.468924 | \n", "0.745599 | \n", "21 | \n", "
| 21 | \n", "0.468924 | \n", "0.745599 | \n", "22 | \n", "
| 22 | \n", "0.468924 | \n", "0.745599 | \n", "23 | \n", "
| \n", " | skiarea_name | \n", "season | \n", "difficulty | \n", "cause | \n", "gender | \n", "equipment | \n", "helmet | \n", "destination | \n", "diagnosis | \n", "india | \n", "... | \n", "car | \n", "ski_lift | \n", "akja | \n", "other | \n", "indipendently | \n", "snowmobile | \n", "snowmobile_sled | \n", "ambulance | \n", "skiarea_ambulance | \n", "helicopter | \n", "
|---|
0 rows × 27 columns
\n", "