Multiple Files

# Get the latest lib from Rapidcanvas
# !pip install --extra-index-url=https://us-central1-python.pkg.dev/rapidcanvas-361003/pypi/simple utils==0.12dev0

from utils.rc.client.requests import Requests
from utils.rc.client.auth import AuthClient

from utils.rc.dtos.project import Project
from utils.rc.dtos.dataset import Dataset
from utils.rc.dtos.recipe import Recipe
from utils.rc.dtos.transform import Transform
from utils.rc.client.files import FilesClient

from utils.rc.dtos.template_v2 import TemplateV2, TemplateTransformV2

import pandas as pd
import logging
from utils.utils.log_util import LogUtil
LogUtil.set_basic_config(format='%(levelname)s:%(message)s', level=logging.INFO)
# Requests.setRootHost("https://test.dev.rapidcanvas.net/api/")
AuthClient.setToken()
project = Project.create(
    name="sample_multi_files",
    description="Testing python lib",
    createEmpty=True
)
INFO:Creating new project by name: sample_multi_files
{'id': '216edb96-1795-4561-940b-a04ae228062d', 'name': 'sample_multi_files', 'description': 'Testing python lib', 'icon': None, 'image': None, 'createdAt': 1672892953405, 'updatedAt': 1672892953405, 'creator': 'roshan@rapid.ai', 'industries': [], 'useCases': [], 'metadata': {}, 'envId': None, 'canvasEdgeStyle': 'SQUARED', 'display_name': None}
project.id
'216edb96-1795-4561-940b-a04ae228062d'
titanic = project.addDataset(
    dataset_name="titanic",
    dataset_description="titanic golden",
    dataset_file_path=["data/titanic.1.csv", "data/titanic.2.csv", "data/titanic.3.csv"]
)
INFO:Creating new dataset by name:titanic
INFO:Uploading file data/titanic.1.csv ....
INFO:Uploading file data/titanic.2.csv ....
INFO:Uploading file data/titanic.3.csv ....
INFO:Uploading Done
recipe = project.addRecipe([titanic], name="recipe_v1")
INFO:Creating new recipe
recipe.id
'03efc51c-88e5-41cc-954b-2c2e906c6e47'
template = TemplateV2(
    name="MultiFileTransform", description="MultiFileTransform", project_id=project.id, source="CUSTOM", status="ACTIVE", tags=["Number", "datatype-long"]
)
template_transform = TemplateTransformV2(type = "python", params=dict(notebookName="MultiFileTransform.ipynb"))
template.base_transforms = [template_transform]
template.publish("transforms/MultiFileTransform.ipynb")
INFO:Publishing template | data=TemplateV2(name='MultiFileTransform', display_name=None, id=None, version='1.0', project_id='216edb96-1795-4561-940b-a04ae228062d', projectId='216edb96-1795-4561-940b-a04ae228062d', is_global=False, description='MultiFileTransform', tags=['Number', 'datatype-long'], baseTransforms=[TemplateTransformV2(type='python', params={'notebookName': 'MultiFileTransform.ipynb'})], base_transforms=[TemplateTransformV2(type='python', params={'notebookName': 'MultiFileTransform.ipynb'})], source='CUSTOM', status='ACTIVE', inputs=[])
INFO:Template Published
INFO:Generating grammar tables from /Users/nikunj/miniconda3/lib/python3.8/site-packages/blib2to3/Grammar.txt
INFO:Writing grammar tables to /Users/nikunj/Library/Caches/black/22.1.0/Grammar3.8.11.final.0.pickle
INFO:Writing failed: [Errno 2] No such file or directory: '/Users/nikunj/Library/Caches/black/22.1.0/tmp9_w12z57'
INFO:Generating grammar tables from /Users/nikunj/miniconda3/lib/python3.8/site-packages/blib2to3/PatternGrammar.txt
INFO:Writing grammar tables to /Users/nikunj/Library/Caches/black/22.1.0/PatternGrammar3.8.11.final.0.pickle
INFO:Writing failed: [Errno 2] No such file or directory: '/Users/nikunj/Library/Caches/black/22.1.0/tmpl_wczrn8'
WARNING:Input notebook does not contain a cell with tag 'parameters'
INFO:Executing notebook with kernel: python3

**************************************
**    CREATING INPUTS: outputDataset    **
**************************************
Inputs created successfully | template_id=c4dc6394-f286-46fb-93ca-74feee40948c
transform = Transform()
transform.templateId = template.id
transform.name = "transform"
transform.variables = {
    "outputDataset": "merged"
}
recipe.add_transform(transform)
WARNING:
#############################################IMPORTANT#############################################
add_transform is going to deprecate soon. Please use add_transform instead
####################################################################################################

INFO:Adding new transform
INFO:Transform added Successfully
recipe.run()
INFO:Started running
INFO:You can look at the progress on UI at https://test.dev.rapidcanvas.net/#/projects/216edb96-1795-4561-940b-a04ae228062d
INFO:No errors found
recipe.getChildrenDatasets()
{'merged_titanic': <utils.rc.dtos.dataset.Dataset at 0x143f726d0>}
outputEntity = recipe.getChildrenDatasets()["merged_titanic"]
assert outputEntity.getData(num_rows=300).shape[0] == 90, "shape of merged dataset not match"