Manage cloud connection

from utils.rc.client.requests import Requests
from utils.rc.client.auth import AuthClient

from utils.rc.dtos.project import Project
from utils.rc.dtos.dataset import Dataset

from utils.rc.dtos.dataSource import DataSource
from utils.rc.dtos.dataSource import DataSourceType
from utils.rc.dtos.dataSource import GcpConfig

from utils.notebookhelpers.gcs import GCSHelper

import logging
from utils.utils.log_util import LogUtil
LogUtil.set_basic_config(format='%(levelname)s:%(message)s', level=logging.INFO)
Requests.setRootHost("http://staging.dev.rapidcanvas.net/api/")
AuthClient.setToken()

Get access key

Get your GCP Access key json from Rapidcanvas and save it locally

### Get your GCP Access key json and the cloud bucket name from Rapidcanvas and save it locally
gcp_key_path = "/yourlocalpath/gcloud_bucket_cred.json"
gcp_bucket_name = "your_bucket_root"

List and upload

List and upload files to GCP bucket

gcs_helper = GCSHelper.init(gcp_key_path, gcp_bucket_name)
gcs_helper.list_files('')

Upload from local

Upload File from your local folders to GCP Cloud storage bucket You may need to upload just once

gcs_helper.upload_file('./data/transactions.csv', 'transactions_dataset')
gcs_helper.list_files('/transactions_dataset')

To download file from GCP to local folder

gcs_helper.download_file('transactions_dataset/transactions.csv', './data' )

#optionally to delete it
#gcs_helper.delete_file('transactions_dataset/transactions.csv')

Use files

Use the uploaded files on GCP in your RC Projects

your_datasource_name = "my_cloud_ds"

dataSource = DataSource.createDataSource(
    your_datasource_name,
    DataSourceType.GCP_STORAGE,
    {
        GcpConfig.BUCKET: gcp_bucket_name,
        GcpConfig.ACCESS_KEY: gcp_key_path
    })
# Create project on RC
project = Project.create(
    name='SampleProject',
    description='Sample Project',
    createEmpty=True
)
project.id
gcp_remote_filepath_trx = "transactions_dataset/transactions.csv"

# Create dataset in RC using the remote cloud bucket csv file
raw_data = project.addDataset(
    dataset_name="transactions",
     dataset_description="transactions",
     data_source_id=dataSource.id,
     data_source_options={GcpConfig.FILE_PATH: gcp_remote_filepath_trx} #saving output with csv input in recipe not working properly
)

#project.deleteDataset(raw_data.id)
raw_data.getData()