Create Template

Import Functions

from utils.notebookhelpers.helpers import Helpers
from utils.dtos.templateOutputCollection import TemplateOutputCollection
from utils.dtos.variable import Metadata
from utils.dtos.templateOutput import TemplateOutput
from utils.dtos.templateOutput import OutputType
from utils.dtos.templateOutput import ChartType
from utils.notebookhelpers.schemaHelpers import SchemaHelpers
from utils.functions import FunctionLib

Create context

contextId = 'recipe_split'
context = Helpers.getOrCreateContext(contextId=contextId, localVars=locals())

Create Input Dataset

inputDatasetParameter=Helpers.get_or_create_input_dataset(
    name="inputDataset",
    metadata=Metadata(
        input_name='Input Dataset',
        is_required=True,
        tooltip='Dataset to apply the train and test split'),
    local_context=locals()
)

Create input parameters

targetCol = Helpers.get_or_create_input_var(
    name="targetCol",
    metadata=Metadata(
        input_name="Target Column",
        is_required=True,
        default_value="target",
        tooltip="Input the target column to predict on (label y)",
        datatypes=['STRING']
    ),
    local_context=locals()
)

test_size = Helpers.get_or_create_input_var(
    name="test_size",
    metadata=Metadata(
        input_name="Test Size",
        is_required=True,
        default_value= 0.2,
        tooltip="Test Size, default to be 0.2, which means 80% train and 20% test split",
        datatypes=['DOUBLE']
    ),
    local_context=locals()
)

Create an output dataset parameter

outputDatasetParameter_train = Helpers.get_or_create_output_dataset(
 name="output_train",
    metadata=Metadata(
        input_name='Output Train Dataset',
        is_required=True,
        default_value="train",
        tooltip='Train Dataset to be created'),
    local_context=locals()
)


outputDatasetParameter_test = Helpers.get_or_create_output_dataset(
 name="output_test",
    metadata=Metadata(
        input_name='Output Test Dataset',
        is_required=True,
        default_value="test",
        tooltip='Test Dataset to be created'),
    local_context=locals()
)
Helpers.getAllParams(context)
inputDataset = inputDatasetParameter.value
targetCol = targetCol.value
test_size = test_size.value
output_train = outputDatasetParameter_train.value
output_test = outputDatasetParameter_test.value

# test_size here is the string from the server, change that to float
test_size = float(test_size)

print(inputDataset)
print(targetCol)
print(test_size)
print(output_train)
print(output_test)
raw_w_one_hot_encoding
SalePrice
0.2
train
test
inDF = Helpers.getEntityData(context, inputDataset)

# get rid of the __index_level_0 column from system
#if '__index_level_0__' in inDF:
#    inDF = inDF.drop(['__index_level_0__'], axis=1)

#inDF

Build the function you want your template to execute

import pandas as pd
import numpy as np
import os
import itertools
import warnings
import re
warnings.filterwarnings(action='ignore')

from sklearn.model_selection import train_test_split

X = inDF.drop(targetCol,axis=1)
y = inDF[targetCol]

X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    #stratify=y,
                                                    test_size=test_size,
                                                    random_state=42)

y_train = pd.DataFrame(y_train)
y_test = pd.DataFrame(y_test)
train = pd.concat([X_train, y_train],axis=1)
test = pd.concat([X_test,y_test],axis=1)

# fix a weird bug adding index
if 'index' in train.columns:
    train.drop(columns=['index'],inplace=True)
    test.drop(columns=['index'],inplace=True)

if 'index' in test.columns:
    train.drop(columns=['index'],inplace=True)
    test.drop(columns=['index'],inplace=True)

Create an output collection

outputCollection = Helpers.createOutputCollection(context)

output_train = Helpers.createTemplateOutputDataset(context=context, outputName=output_train, dataFrame=train)
outputCollection.addTemplateOutput(output_train)

output_test = Helpers.createTemplateOutputDataset(context=context, outputName=output_test, dataFrame=test)
outputCollection.addTemplateOutput(output_test)

Helpers.save(context)