import dataprofiler as dp
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import sys 
import sklearn.neighbors._base
sys.modules['sklearn.neighbors.base'] = sklearn.neighbors._base
from missingpy import MissForest

from scipy import stats
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer 
from sklearn.svm import SVC


data = pd.read_csv("loan_approval_data.csv")
data


# recognizing all the categorical variables 
cat_vars = ["Gender", "Married", "Education", "Self_Employed", "Property_Area", "Loan_Status", "Dependents", "Credit_History"] 

# checking number of unique values for each categorial variable 
for var in cat_vars: 
    print(data[var].unique())

['Male' 'Female' nan]
['No' 'Yes' nan]
['Graduate' 'Not Graduate']
['No' 'Yes' nan]
['Urban' 'Rural' 'Semiurban']
['Y' 'N']
['0' '1' '2' '3+' nan]
[ 1.  0. nan]


data = data.drop(['Loan_ID'], axis=1)


for col in data.columns:
    if len(data[col].value_counts()) < 10:
        #sns.histplot(data=data, x=col, hue = "Loan_Status")
        sns.histplot(
            data,
            x=col,
            # Use the value variable here to turn histogram counts into weighted
            # values.
            hue='Loan_Status',
            multiple='stack',
            # Add white borders to the bars.
            edgecolor='white',
            # Shrink the bars a bit so they don't touch.
            shrink=0.8
        )
    else:
        sns.violinplot(data=data, x=col, color = 'green')
    plt.show()


data.loc[data["ApplicantIncome"] > 30000]


sns.violinplot(data=data[data["ApplicantIncome"] < 30000], x="ApplicantIncome")

<AxesSubplot:xlabel='ApplicantIncome'>


data.loc[data["LoanAmount"] > 400]


sns.violinplot(data=data[data["LoanAmount"] < 400], x="LoanAmount", color = 'green')

<AxesSubplot:xlabel='LoanAmount'>


data.loc[data["CoapplicantIncome"] > 10000]


sns.violinplot(data=data[data["CoapplicantIncome"] < 10000], x="CoapplicantIncome", color='red')

<AxesSubplot:xlabel='CoapplicantIncome'>


sns.scatterplot(y="ApplicantIncome", x="LoanAmount", data=data,  hue="Loan_Status")

<AxesSubplot:xlabel='LoanAmount', ylabel='ApplicantIncome'>


sns.scatterplot(y="ApplicantIncome", x="LoanAmount", data=data[data["ApplicantIncome"] < 7500], hue="Loan_Status")

<AxesSubplot:xlabel='LoanAmount', ylabel='ApplicantIncome'>


p = sns.regplot(y="ApplicantIncome", x="LoanAmount", data=data[data["ApplicantIncome"] < 7500], line_kws={"color": "red"})

x = p.get_lines()[0].get_xdata()
y = p.get_lines()[0].get_ydata()


print(x[:5])
print(y[:5])

[ 9.         13.90909091 18.81818182 23.72727273 28.63636364]
[2577.43518239 2625.43161249 2673.42804258 2721.42447268 2769.42090277]


# slope of the regression line 
corr_val = (y[1] - y[0]) / (x[1] - x[0])
# y-intercept of the regression line
y_int = y[0] - (corr_val * x[0]) 
print(f"The correlation value is {corr_val} and the y-intercept is {y_int}")

The correlation value is 9.777050574767632 and the y-intercept is 2489.441727222022


data = data.replace({np.nan: None})


for var in cat_vars: 
    print(data[var].unique().tolist())

['Male', 'Female', None]
['No', 'Yes', None]
['Graduate', 'Not Graduate']
['No', 'Yes', None]
['Urban', 'Rural', 'Semiurban']
['Y', 'N']
['0', '1', '2', '3+', None]
[1.0, 0.0, None]


hot_encode = {}

for cat_var in cat_vars:
    for i, var in enumerate(data[cat_var].unique().tolist()):
        hot_encode[var] = i

# Keeping the missing values so we can impute them later
hot_encode[None] = None 
hot_encode

{'0': 0,
 0.0: 1,
 '1': 1,
 1.0: 0,
 '2': 2,
 '3+': 3,
 'Female': 1,
 'Graduate': 0,
 'Male': 0,
 'N': 1,
 'No': 0,
 None: None,
 'Not Graduate': 1,
 'Rural': 1,
 'Semiurban': 2,
 'Urban': 0,
 'Y': 0,
 'Yes': 1}


for cat_var in cat_vars:
    new_col = []
    
    for _, row in data.iterrows():
        new_col.append(hot_encode[row[cat_var]])
    
    data[cat_var] = new_col


data


for col in data.columns: 
    print(col, data[col].isna().sum())

Gender 13
Married 3
Dependents 15
Education 0
Self_Employed 32
ApplicantIncome 0
CoapplicantIncome 0
LoanAmount 22
Loan_Amount_Term 14
Credit_History 50
Property_Area 0
Loan_Status 0


# number of rows with NaN
data.isnull().values.ravel().sum()

149


imputer = MissForest() 
X_imputed = imputer.fit_transform(data)

/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,

Iteration: 0

/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,

Iteration: 1

/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,

Iteration: 2

/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,

Iteration: 3

/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_forest.py:400: FutureWarning: Criterion 'mse' was deprecated in v1.0 and will be removed in version 1.2. Use `criterion='squared_error'` which is equivalent.
  FutureWarning,

Iteration: 4


data_imputed = pd.DataFrame(X_imputed, columns=data.columns)


# lets round off values in categorical columns (0/1 columns)
for cat_var in cat_vars:
    data_imputed = data_imputed.round({cat_var: 0})


sns.heatmap(data_imputed.corr())

<AxesSubplot:>


profile_options = dp.ProfilerOptions()
profile_options.set({"correlation.is_enabled": True})

profile = dp.Profiler(data_imputed, options=profile_options)

WARNING:tensorflow:SavedModel saved prior to TF 2.5 detected when loading Keras model. Please ensure that you are saving the model with model.save() or tf.keras.models.save_model(), *NOT* tf.saved_model.save(). To confirm, there should be a file named "keras_metadata.pb" in the SavedModel directory.
INFO:DataProfiler.profilers.profile_builder: Finding the Null values in the columns...

100%|██████████| 12/12 [00:00<00:00, 334.91it/s]

INFO:DataProfiler.profilers.profile_builder: Calculating the statistics...  (with 4 processes)

100%|██████████| 12/12 [00:02<00:00,  4.63it/s]


report = profile.report()


n_samples = report["global_stats"]["samples_used"]
n_variables = len(report["data_stats"])
seed = 123


# build covariance matrix using the correlation matrix and diagonal of the standard deviations

R = report["global_stats"]["correlation_matrix"].round(decimals=8)

stddevs = [stat["statistics"]["stddev"] for stat in report["data_stats"]]
D = np.diag(stddevs)

cov = D @ R @ D
cov = cov.round(decimals=8)


# initialize X array

means = np.zeros(n_variables)
mvnorm = stats.multivariate_normal(mean=means, cov=cov)
x = mvnorm.rvs(n_samples)
x[0]

array([-4.03553698e-01,  1.93770315e-01, -4.91520253e-01, -2.91878476e-01,
       -2.72054420e-01,  1.44939133e+03, -3.82562515e+03, -6.13389950e+01,
        4.04455609e+01, -2.23467659e-01,  1.15550485e-01, -4.46130412e-01])


# now tranform marginals back to uniform distribution

norm = stats.norm()
x = norm.cdf(x)
x[0]

array([0.34327047, 0.57682213, 0.31152927, 0.38518977, 0.39279009,
       1.        , 0.        , 0.        , 1.        , 0.41158578,
       0.5459956 , 0.32775152])


# generate scalers by range of values in original data

scalers = {}
for col, stat in enumerate(report["data_stats"]):
    _min = stat["statistics"]["min"]
    _max = stat["statistics"]["max"]
    scalers[col] = MinMaxScaler(feature_range=(_min, _max))


# rescale to feature range

for col in scalers:
    x[:, col] = (
        scalers[col]
        .fit_transform(x[:, col].reshape(-1, 1))
        .flatten()
    )


# find number of decimals for each column and round the data to match
precisions = [stat["samples"][0][::-1].find(".") for stat in report["data_stats"]]

for i, precision in enumerate(precisions):
    x[:, i] = np.around(x[:, i], precision if precision > 0 else 0)


new_data = pd.DataFrame(x, columns=[stat["column_name"] for stat in report["data_stats"]])


new_data


new_data = new_data.round({"Loan_Status": 0})


sns.heatmap(data_imputed.corr())

<AxesSubplot:>


sns.heatmap(new_data.corr())

<AxesSubplot:>


X = data_imputed.drop(["Loan_Status"], axis=1)
y = data_imputed["Loan_Status"]

training_data, test_data = train_test_split(X, test_size=0.2, random_state=seed)
training_target, test_target = train_test_split(y, test_size=0.2, random_state=seed)

clf = SVC()

clf.fit(training_data, training_target)

SVC()


test_results = clf.predict(test_data)

model_accuracy = list(test_results == test_target).count(True) / len(test_target)
print(f"The model was {round(model_accuracy * 100, 2)}% accurate on the authentic data")

The model was 85.65% accurate on the authentic data


synthetic_data = new_data.to_numpy()[:,:-1]
synthetic_target = new_data.to_numpy()[:,-1:].flatten()

synthetic_results = clf.predict(synthetic_data)
synthetic_accuracy = list(synthetic_results == synthetic_target).count(True) / len(synthetic_target)

print(f"The model was {round(synthetic_accuracy * 100, 2)}% accurate on the synthetic data")

The model was 72.39% accurate on the synthetic data

/usr/local/lib/python3.7/dist-packages/sklearn/base.py:451: UserWarning: X does not have valid feature names, but SVC was fitted with feature names
  "X does not have valid feature names, but"

Data gen¶

Data Collection and Curation¶

Loading Data¶

Exploratory Data Analysis¶

Preparing the Data for Further Analysis¶

Hot Encoding¶

Imputation¶

Synthetic Data Generation¶

Machine Learning¶

Conclusion¶

	Loan_ID	Gender	Married	Dependents	Education	Self_Employed	ApplicantIncome	CoapplicantIncome	LoanAmount	Loan_Amount_Term	Credit_History	Property_Area	Loan_Status
0	LP001002	Male	No	0	Graduate	No	5849	0.0	NaN	360.0	1.0	Urban	Y
1	LP001003	Male	Yes	1	Graduate	No	4583	1508.0	128.0	360.0	1.0	Rural	N
2	LP001005	Male	Yes	0	Graduate	Yes	3000	0.0	66.0	360.0	1.0	Urban	Y
3	LP001006	Male	Yes	0	Not Graduate	No	2583	2358.0	120.0	360.0	1.0	Urban	Y
4	LP001008	Male	No	0	Graduate	No	6000	0.0	141.0	360.0	1.0	Urban	Y
...	...	...	...	...	...	...	...	...	...	...	...	...	...
609	LP002978	Female	No	0	Graduate	No	2900	0.0	71.0	360.0	1.0	Rural	Y
610	LP002979	Male	Yes	3+	Graduate	No	4106	0.0	40.0	180.0	1.0	Rural	Y
611	LP002983	Male	Yes	1	Graduate	No	8072	240.0	253.0	360.0	1.0	Urban	Y
612	LP002984	Male	Yes	2	Graduate	No	7583	0.0	187.0	360.0	1.0	Urban	Y
613	LP002990	Female	No	0	Graduate	Yes	4583	0.0	133.0	360.0	0.0	Semiurban	N

	Gender	Married	Dependents	Education	Self_Employed	ApplicantIncome	CoapplicantIncome	LoanAmount	Loan_Amount_Term	Credit_History	Property_Area	Loan_Status
155	Male	Yes	3+	Graduate	No	39999	0.0	600.0	180.0	0.0	Semiurban	Y
171	NaN	Yes	3+	Graduate	No	51763	0.0	700.0	300.0	1.0	Urban	Y
183	Male	Yes	1	Graduate	No	33846	0.0	260.0	360.0	1.0	Semiurban	N
185	Male	Yes	0	Graduate	Yes	39147	4750.0	120.0	360.0	1.0	Semiurban	Y
333	Male	Yes	0	Graduate	NaN	63337	0.0	490.0	180.0	1.0	Urban	Y
409	Male	Yes	3+	Graduate	No	81000	0.0	360.0	360.0	0.0	Rural	N
443	Male	No	1	Graduate	No	37719	0.0	152.0	360.0	1.0	Semiurban	Y

	Gender	Married	Dependents	Education	Self_Employed	ApplicantIncome	CoapplicantIncome	LoanAmount	Loan_Amount_Term	Credit_History	Property_Area	Loan_Status
130	Male	No	0	Graduate	Yes	20166	0.0	650.0	480.0	NaN	Urban	Y
155	Male	Yes	3+	Graduate	No	39999	0.0	600.0	180.0	0.0	Semiurban	Y
171	NaN	Yes	3+	Graduate	No	51763	0.0	700.0	300.0	1.0	Urban	Y
177	Male	Yes	3+	Graduate	No	5516	11300.0	495.0	360.0	0.0	Semiurban	N
278	Male	Yes	0	Graduate	No	14583	0.0	436.0	360.0	1.0	Semiurban	Y
308	Male	No	0	Graduate	No	20233	0.0	480.0	360.0	1.0	Rural	N
333	Male	Yes	0	Graduate	NaN	63337	0.0	490.0	180.0	1.0	Urban	Y
369	Male	Yes	0	Graduate	No	19730	5266.0	570.0	360.0	1.0	Rural	N
432	Male	No	0	Graduate	NaN	12876	0.0	405.0	360.0	1.0	Semiurban	Y
487	Male	Yes	1	Graduate	No	18333	0.0	500.0	360.0	1.0	Urban	N
506	Male	Yes	0	Graduate	No	20833	6667.0	480.0	360.0	NaN	Urban	Y
523	Male	Yes	2	Graduate	Yes	7948	7166.0	480.0	360.0	1.0	Rural	Y
561	Female	Yes	1	Graduate	Yes	19484	0.0	600.0	360.0	1.0	Semiurban	Y
604	Female	Yes	1	Graduate	No	12000	0.0	496.0	360.0	1.0	Semiurban	Y

	Gender	Married	Dependents	Education	Self_Employed	ApplicantIncome	CoapplicantIncome	LoanAmount	Loan_Amount_Term	Credit_History	Property_Area	Loan_Status
9	Male	Yes	1	Graduate	No	12841	10968.0	349.0	360.0	1.0	Semiurban	N
177	Male	Yes	3+	Graduate	No	5516	11300.0	495.0	360.0	0.0	Semiurban	N
402	Male	No	0	Graduate	No	2500	20000.0	103.0	360.0	1.0	Semiurban	Y
417	Male	Yes	2	Graduate	Yes	1600	20000.0	239.0	360.0	1.0	Urban	N
581	Male	No	0	Graduate	No	1836	33837.0	90.0	360.0	1.0	Urban	N
600	Female	No	3+	Graduate	NaN	416	41667.0	350.0	180.0	NaN	Urban	N

	Gender	Married	Dependents	Education	Self_Employed	ApplicantIncome	CoapplicantIncome	LoanAmount	Loan_Amount_Term	Credit_History	Property_Area	Loan_Status
0	0.0	0.0	0.0	0	0.0	5849	0.0	None	360.0	0.0	0	0
1	0.0	1.0	1.0	0	0.0	4583	1508.0	128.0	360.0	0.0	1	1
2	0.0	1.0	0.0	0	1.0	3000	0.0	66.0	360.0	0.0	0	0
3	0.0	1.0	0.0	1	0.0	2583	2358.0	120.0	360.0	0.0	0	0
4	0.0	0.0	0.0	0	0.0	6000	0.0	141.0	360.0	0.0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...
609	1.0	0.0	0.0	0	0.0	2900	0.0	71.0	360.0	0.0	1	0
610	0.0	1.0	3.0	0	0.0	4106	0.0	40.0	180.0	0.0	1	0
611	0.0	1.0	1.0	0	0.0	8072	240.0	253.0	360.0	0.0	0	0
612	0.0	1.0	2.0	0	0.0	7583	0.0	187.0	360.0	0.0	0	0
613	1.0	0.0	0.0	0	1.0	4583	0.0	133.0	360.0	1.0	2	1

	Gender	Married	Dependents	Education	Self_Employed	ApplicantIncome	CoapplicantIncome	LoanAmount	Loan_Amount_Term	Credit_History	Property_Area	Loan_Status
0	0.3	0.6	0.9	0.4	0.4	81000.0	0.0	9.0	480.0	0.4	1.1	0.3
1	0.2	0.8	3.0	0.5	0.6	150.0	41667.0	9.0	12.0	0.5	0.2	0.6
2	0.5	0.4	0.6	0.4	0.5	81000.0	0.0	9.0	12.0	0.9	0.6	0.8
3	0.7	0.5	0.2	0.4	0.7	81000.0	41667.0	700.0	12.0	0.4	0.4	0.3
4	0.6	0.5	2.9	0.2	0.4	81000.0	0.0	9.0	12.0	0.5	1.5	0.4
...	...	...	...	...	...	...	...	...	...	...	...	...
609	0.1	0.8	2.0	0.5	0.7	81000.0	0.0	9.0	12.0	0.8	1.6	0.6
610	0.5	0.7	2.2	0.6	0.5	81000.0	0.0	9.0	12.0	0.8	1.7	0.7
611	0.5	0.6	0.6	0.7	0.4	150.0	0.0	9.0	480.0	0.4	0.9	0.4
612	0.5	0.4	2.1	0.7	0.7	150.0	41667.0	700.0	480.0	0.6	0.3	0.5
613	0.5	0.4	1.6	0.5	0.5	150.0	41667.0	700.0	480.0	0.7	0.4	0.6