1+ # -*- coding: utf-8 -*-
2+ """campus_placement_analysis&prediction.ipynb
3+
4+ Automatically generated by Colaboratory.
5+
6+ Original file is located at
7+ https://colab.research.google.com/drive/1H8LaqWZcR6OqEhPmavKEbh2duuiTLypC
8+
9+ # **Campus Placement Analysis & Prediction**
10+
11+ Here, in this project we will analyze various features that affect campus placements and then perform prediction using decision tree regressor algorithm.
12+
13+ Following steps are followed:
14+
15+ - **Data preprocessing and exploration** to understand what kind of data will we working on.
16+ - **Data Training** using train-test-split method from sklearn to split the data into training and testing data & Model Creation using decision tree regressor algorithm.
17+ - **Performance Evaluation** by error and accuracy check to find how efficient algorithm is for this project.
18+
19+ For the dataset being used in this project [click here](https://www.kaggle.com/benroshan/factors-affecting-campus-placement)
20+
21+ ### **Data Preprocessing & Exploration**
22+ """
23+
24+ #importing pandas library.
25+ import pandas as pd
26+
27+ #loading and reading data through following
28+ data = pd .read_csv ('/content/Placement_Data_Full_Class.csv' )
29+ data
30+
31+ #to view shape of the dataset i.e. total number of rows and columns.
32+ data .shape
33+
34+ #to view first 5 rows of the dataset.
35+ data .head ()
36+
37+ #to view last 5 rows of the dataset.
38+ data .tail ()
39+
40+ #to view different columns of the dataset.
41+ data .columns
42+
43+ #to view memory usage, non-null values, datatypes of columns.
44+ data .info ()
45+
46+ #to view statistical summary of the dataset.
47+ data .describe ()
48+
49+ #to check for any missing or null values in the dataset.
50+ data .isnull ().sum ()
51+
52+ """There are 67 null values in 'salary', we can't proceed with this.
53+
54+ We need to replace the null values by the mean of that respective column.
55+ """
56+
57+ data ['salary' ].isnull ().sum ()
58+
59+ data ['salary' ].mean ()
60+
61+ data ['salary' ].fillna ('288655' ,inplace = True )
62+ # fillna fucntion will fill the null values(where null=TRUE) with the mean value.
63+
64+ data ['salary' ].isnull ().sum ()
65+
66+ #to check again for any missing or null values in the dataset.
67+ data .isnull ().sum ()
68+
69+ #to view total null values in the dataset.
70+ data .isnull ().sum ().sum ()
71+
72+ """Now that the dataset has no null values i.e. dataset is cleaned and proper.
73+
74+ We can now proceed with further steps.
75+
76+ ### **Data Training**
77+ """
78+
79+ data .info ()
80+
81+ data1 = data .drop (['gender' ,'ssc_b' ,'hsc_b' ,'hsc_s' ,'degree_t' ,'workex' ,'specialisation' ,'status' ],axis = 1 )
82+ data1 .info ()
83+
84+ #converting data into int datatype to avoid errors below.
85+ prepareddata = data1 .astype (int )
86+ prepareddata .head ()
87+
88+ # Import train_test_split from sklearn.model_selection
89+ from sklearn .model_selection import train_test_split
90+ # Here, X is the data which will have feature and y will have our target.
91+ x = prepareddata .drop (['salary' ],axis = 1 )
92+ y = prepareddata ['salary' ]
93+
94+ # Split data into training data and testing data
95+ x_train , x_test , y_train , y_test = train_test_split (x , y , test_size = 0.2 ,random_state = 500 )
96+ #Ratio used for splitting training and testing data is 8:2 respectively
97+
98+ """### **Model Creation using Decision Tree Regressor Algorithm**"""
99+
100+ # Importing decision tree regressor
101+ from sklearn .tree import DecisionTreeRegressor
102+ reg = DecisionTreeRegressor ()
103+
104+ #Fitting data into the model.
105+ reg .fit (x_train , y_train )
106+
107+ # Making predictions on Test data
108+ pred = reg .predict (x_test )
109+
110+ pred
111+
112+ """### **Performance Evaluation**"""
113+
114+ import numpy as np
115+ from sklearn .metrics import mean_squared_error
116+ print ("Model\t \t \t RootMeanSquareError \t \t Accuracy of the model" )
117+ print ("""Decision Tree Regressor \t \t {:.2f} \t \t \t {:.2f}""" .format ( np .sqrt (mean_squared_error (y_test , pred )), reg .score (x_train ,y_train )))
118+
119+ """Conclusion Drawn:
120+ * Accuracy of the decision tree regressor model for this project is 1.00 which is an excellent accuracy.
121+
122+ * Decision tree regressor is a highly efficient model and widely used for regression tasks,various prediction projects etc.
123+
124+ **Author**
125+
126+ [Ayushi Shrivastava](https://github.com/ayushi424)
127+ """
0 commit comments