def preprocess_features(X): df_preprocess = pd.DataFrame(index=X.index) # output empty dataframe
### loop to iterate through each column
for colname, col_data in X.iteritems():
if col_data.dtype == object:
col_data = pd.get_dummies(col_data, prefix=colname)
### join columns to the empty dataframe
df_preprocess = df_preprocess.join(col_data)
return df_preprocess
student_data_model = preprocess_features(student_data) In [19]:
Save dataframe to a new .csv file
student_data_model.to_csv('student-data-model.csv', index=False) In [20]:
New dataset ready for the model with only numerical columns
student_data_model.head()
