-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathInsurance_analysis.py
More file actions
36 lines (25 loc) · 951 Bytes
/
Copy pathInsurance_analysis.py
File metadata and controls
36 lines (25 loc) · 951 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv("insurance.csv")
data.isnull().sum()
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
data["sex"] = le.fit_transform(data["sex"])
data["smoker"] = le.fit_transform(data["smoker"])
data = pd.get_dummies(data, columns = ['region'], drop_first = True)
X = data.drop(['smoker'], axis=1)
y = data['smoker']
from sklearn.model_selection import train_test_split
X_train,X_val,y_train,y_val = train_test_split(X, y, train_size=0.75, random_state=101)
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()
rfc.fit(X_train,y_train)
y_pred = rfc.predict(X_val)
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_val,y_pred)
cm
from sklearn.model_selection import cross_val_score
accuracy = cross_val_score(estimator = rfc , X = X_val, y = y_pred, cv = 100)
accuracy.mean()