diff --git "a/\360\237\221\211 EDA_CarInsuranceClaim.ipynb" "b/\360\237\221\211 EDA_CarInsuranceClaim.ipynb" new file mode 100644 index 000000000..7d87ab7dc --- /dev/null +++ "b/\360\237\221\211 EDA_CarInsuranceClaim.ipynb" @@ -0,0 +1,20 @@ +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt + +# Load dataset +df = pd.read_csv("car_insurance.csv") + +# Basic info +print(df.info()) +print(df.describe()) +print(df.isnull().sum()) + +# Data cleaning +df.drop_duplicates(inplace=True) +df.fillna(df.median(), inplace=True) + +# Visualization +sns.countplot(x='Vehicle_Age', hue='Claim', data=df) +plt.title("Vehicle Age vs Claim Probability") +plt.show()