<script>
    import { scrollto } from "svelte-scrollto";
    import IndexNavbar from "components/Navbars/IndexNavbar.svelte";
    import Footer from "components/Footers/Footer.svelte";
    import { onMount } from "svelte";

    import { Highlight } from "svelte-highlight";
    import { python } from "svelte-highlight/languages";
    import { github } from "svelte-highlight/styles";

    import Formater from "./codeFormat.svelte";

    onMount(() => {
    window.scrollTo(0, 0);
  });
  

    // import { anOldHope } from "svelte-highlight/styles";

    const patternVue = "/assets/img/numArrays.jpg";
    const waves = "/assets/img/wave.png";
    const aTitle = "Predicting User Churn using RandomForestClassifier in Sci-kit Learn";
    const headText2 =
        "Next download the open-source dataset from Kaggle:";
    
        

    $: coda = `import pandas as pd

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics`;

    $: code = `# Load in our data
data = pd.read_csv("Churn_Modeling.csv")

# Look at the shape
data.shape

# Look at the datatypes
data.dtypes

# View it in tabular format
data.head(3)
`;

    $: code3 = `data["Gender"] = pd.factorize(data.Gender)[0]
data["Geography"] = pd.factorize(data.Geography)[0]`;

    $: code4 = `X = data.iloc[:,3:13] # Features
Y = data.iloc[:,-1] # Labels

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3) # 70% training and 30% test
`
    $: code5 = `clf=RandomForestClassifier(n_estimators=100)
clf.fit(X_train, y_train)`;

    $: code6 = `print("Accuracy:",metrics.accuracy_score(y_test, y_pred))`;

    $: code7 = `features_rank = pd.Series(clf.feature_importances_, index=X.columns).sort_values(ascending=False)
features_rank`;

    $: code8 = `X = data[["CreditScore", "Geography", "Age", "Tenure", "Balance", "NumOfProducts", "IsActiveMember", "EstimatedSalary"]]
Y = data["Exited"]`;

    $: code9 = `X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3) # 70% training and 30% test
clf2=RandomForestClassifier(n_estimators=100) # Create another model
clf2.fit(X_train, y_train) # Train the new model

y_pred = clf2.predict(X_test) 
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
    `;
</script>

<svelte:head>
    {@html github}
</svelte:head>

<!-- <svelte:head>
    <title>About</title>
  </svelte:head> -->

<IndexNavbar />

<!-- <div class="flex flexrow hidden lg:block">
    <section class="section2">
        <div class="sineWaves"></div>
    </section>
</div>
<div class="flex flexrow md:hidden sm:block">
</div> -->

<div class="container px-5 pt-24 mx-auto">
    <div class="flex flex-col text-center w-full mb-12">
        <h1
            class="sm:text-3xl title-font mt-32 mb-4 font-semibold text-4xl text-gray-700">
            {aTitle}
        </h1>
        <div flex flex-row>
            <span
                class="lg:w-2/3 mx-auto leading-relaxed text-base text-gray-600">
                This article should help you get familiar with using RandomForestClassifier for your general classification needs
            </span>
            <!-- <a
            href="https://developers.google.com/edu/python/lists"
            ><span>[List]</span></a> -->
        </div>
    </div>
</div>

<!-- First Text -->
<div class="flex flex-row w-screen relative justify-center items-center">
    <div class="w-8/12">
        <span class="pl-16 mx-auto leading-relaxed text-base text-gray-600">
            To show how
        </span>
        <span class="mx-auto leading-relaxed text-base text-gray-600">
            to do this I made an example that predicts customer churn for a bank.
        </span>
        <span class="mx-auto leading-relaxed text-base text-gray-600">
            To get started open up your Python editor and import the following packages:
            
        </span>
    </div>
</div>

<!-- Block of Code -->
<div class="flex flex-row w-screen relative justify-center items-center">
    <div class="w-8/12">
        <!-- <div class="p-2 code text-sm font-mono px-8 py-6 bg-gray-200 flex flex-row justify-between"> -->
        <div class="p-2 pl-4 code text-sm font-mono px-8 py-6">
            <!-- <Highlight language={python} {code} /> -->

            <Formater code={coda} />
        </div>
    </div>
</div>

<!-- Second Text -->
<div class="flex flex-row w-screen relative justify-center items-center">
    <div class="w-8/12">
        <span class="mx-auto leading-relaxed text-base text-gray-600">
            {headText2}
        </span>
        <a
            href="https://www.kaggle.com/santoshd3/bank-customers"
            ><span>[Bank Customers]</span></a>
            <br>
        <span class="mx-auto leading-relaxed text-base text-gray-600">Once you download the data we can get started by importing and getting a feel for what we are working with:
        </span>
    </div>
</div>
<!-- Second Block of Code -->
<div class="flex flex-row w-screen relative justify-center items-center">
    <div class="w-8/12">
        <!-- <div class="p-2 code text-sm font-mono px-8 py-6 bg-gray-200 flex flex-row justify-between"> -->
        <div class="p-2 pl-4 code text-sm font-mono px-8 py-6">
            <!-- <Highlight language={python} {code} /> -->

            <Formater {code} />
        </div>
    </div>
</div>
<div class="flex flex-row w-screen relative justify-center items-center">
    <div class="w-8/12">
        <span class="mx-auto leading-relaxed text-base text-gray-600">
            What we want to predict here, is if column ['Exited'] has a True/False value for churn. So how we go about that is feeding the model the features we want it to consider so that it may predict the dependent/y-variable.
        </span>
        <br>
        <span class="mx-auto leading-relaxed text-base text-gray-600">
            Since the Gender and Geography columns are strings, we will categorize them to turn them into integers.
        </span>
    </div>
</div>

<!-- Third Block of Code -->
<div class="flex flex-row w-screen relative justify-center items-center">
    <div class="w-8/12">
        <!-- <div class="p-2 code text-sm font-mono px-8 py-6 bg-gray-200 flex flex-row justify-between"> -->
        <div class="p-2 pl-4 code text-sm font-mono px-8 py-6">
            <!-- <Highlight language={python} {code} /> -->

            <Formater code={code3} />
        </div>
    </div>
</div>

<!-- SNIPPET 4-->
<div class="flex flex-row w-screen relative justify-center items-center">
    <div class="w-8/12">
        <span class="mx-auto leading-relaxed text-base text-gray-600">
            Next we want to specify variable 'X' which will contain all of our features and variable 'Y' which will contain our predictor variable. Then we will split the data into training and testing datasets.
        </span>
    </div>
</div>

<!-- 4th Block of Code -->
<div class="flex flex-row w-screen relative justify-center items-center">
    <div class="w-8/12">
        <!-- <div class="p-2 code text-sm font-mono px-8 py-6 bg-gray-200 flex flex-row justify-between"> -->
        <div class="p-2 pl-4 code text-sm font-mono px-8 py-6">
            <!-- <Highlight language={python} {code} /> -->

            <Formater code={code4} />
        </div>
    </div>
</div>


<!-- SNIPPET 5-->
<div class="flex flex-row w-screen relative justify-center items-center">
    <div class="w-8/12">
        <span class="mx-auto leading-relaxed text-base text-gray-600">
            Next we will Create a Gaussian Classifier, and pass in our data for training and testing.
        </span>
    </div>
</div>

<!-- 5th Block of Code -->
<div class="flex flex-row w-screen relative justify-center items-center">
    <div class="w-8/12">
        <!-- <div class="p-2 code text-sm font-mono px-8 py-6 bg-gray-200 flex flex-row justify-between"> -->
        <div class="p-2 pl-4 code text-sm font-mono px-8 py-6">
            <!-- <Highlight language={python} {code} /> -->

            <Formater code={code5} />
        </div>
    </div>
</div>


<!-- SNIPPET 6-->
<div class="flex flex-row w-screen relative justify-center items-center">
    <div class="w-8/12">
        <span class="mx-auto leading-relaxed text-base text-gray-600">
            Now we will test our model's accuracy, how often is the classifier correct?
        </span>
    </div>
</div>

<!-- 6th Block of Code -->
<div class="flex flex-row w-screen relative justify-center items-center">
    <div class="w-8/12">
        <!-- <div class="p-2 code text-sm font-mono px-8 py-6 bg-gray-200 flex flex-row justify-between"> -->
        <div class="p-2 pl-4 code text-sm font-mono px-8 py-6">
            <!-- <Highlight language={python} {code} /> -->

            <Formater code={code6} />
        </div>
    </div>
</div>

<!-- SNIPPET 7-->
<div class="flex flex-row w-screen relative justify-center items-center">
    <div class="w-8/12">
        <span class="mx-auto leading-relaxed text-base text-gray-600">
            Above our model has reached an accuracy of 86%, if you have a different value, then re-run it and the values can change a little bit.
        </span>
        <br>
        <span class="mx-auto leading-relaxed text-base text-gray-600">
            Now to improve our model accuracy we will check to see which of our features are the most important, and if we should drop any.
        </span>
    </div>
</div>

<!-- 7th Block of Code -->
<div class="flex flex-row w-screen relative justify-center items-center">
    <div class="w-8/12">
        <!-- <div class="p-2 code text-sm font-mono px-8 py-6 bg-gray-200 flex flex-row justify-between"> -->
        <div class="p-2 pl-4 code text-sm font-mono px-8 py-6">
            <!-- <Highlight language={python} {code} /> -->

            <Formater code={code7} />
        </div>
    </div>
</div>

<!-- SNIPPET 8-->
<div class="flex flex-row w-screen relative justify-center items-center">
    <div class="w-8/12">
        <span class="mx-auto leading-relaxed text-base text-gray-600">
            Above we can see that: ['Gender'], ['HasCreditCard'] are the least important features. So here we can play around and remove some of these, to see if we can improve our models accuracy.
        </span>
        <br>
        <br>
        <span class="mx-auto leading-relaxed text-base text-gray-600">
            So lets re-run our analysis and drop the unwanted features:
        </span>
    </div>
</div>

<!-- 8th Block of Code -->
<div class="flex flex-row w-screen relative justify-center items-center">
    <div class="w-8/12">
        <!-- <div class="p-2 code text-sm font-mono px-8 py-6 bg-gray-200 flex flex-row justify-between"> -->
        <div class="p-2 pl-4 code text-sm font-mono px-8 py-6">
            <!-- <Highlight language={python} {code} /> -->

            <Formater code={code8} />
        </div>
    </div>
</div>

<!-- SNIPPET 9-->
<div class="flex flex-row w-screen relative justify-center items-center">
    <div class="w-8/12">
        <span class="mx-auto leading-relaxed text-base text-gray-600">
            Now that we have new features lets re-split our training data create a new model and train it so we can check to see if our accuracy improved or not:
        </span>
    </div>
</div>

<!-- 9th Block of Code -->
<div class="flex flex-row w-screen relative justify-center items-center">
    <div class="w-8/12">
        <!-- <div class="p-2 code text-sm font-mono px-8 py-6 bg-gray-200 flex flex-row justify-between"> -->
        <div class="p-2 pl-4 code text-sm font-mono px-8 py-6">
            <!-- <Highlight language={python} {code} /> -->

            <Formater code={code9} />
        </div>
    </div>
</div>


<!-- SNIPPET 10-->
<div class="flex flex-row w-screen relative justify-center items-center">
    <div class="w-8/12">
        <span class="mx-auto leading-relaxed text-base text-gray-600">
            Did our models accuracy improve? If not you could enrich your dataset or try playing around with other algorithms.
        </span>
        <br>
        <br>
        <span class="mx-auto leading-relaxed text-base text-gray-600">
            Once you are satisfied with your models performance you could setup a data pipeline to pass in customer data to predict if they will churn or not and have some preventive measures in place.
        </span>
    </div>
</div>



<div class="pb-20"><br /></div>
<div class="pb-20"><br /></div>

<Footer />
