Manage cloud connection
from utils.rc.client.requests import Requests
from utils.rc.client.auth import AuthClient
from utils.rc.dtos.project import Project
from utils.rc.dtos.dataset import Dataset
from utils.rc.dtos.dataSource import DataSource
from utils.rc.dtos.dataSource import DataSourceType
from utils.rc.dtos.dataSource import GcpConfig
from utils.notebookhelpers.gcs import GCSHelper
import logging
from utils.utils.log_util import LogUtil
LogUtil.set_basic_config(format='%(levelname)s:%(message)s', level=logging.INFO)
Requests.setRootHost("http://staging.dev.rapidcanvas.net/api/")
AuthClient.setToken()
Get access key
Get your GCP Access key json from Rapidcanvas and save it locally
### Get your GCP Access key json and the cloud bucket name from Rapidcanvas and save it locally
gcp_key_path = "/yourlocalpath/gcloud_bucket_cred.json"
gcp_bucket_name = "your_bucket_root"
List and upload
List and upload files to GCP bucket
gcs_helper = GCSHelper.init(gcp_key_path, gcp_bucket_name)
gcs_helper.list_files('')
Upload from local
Upload File from your local folders to GCP Cloud storage bucket You may need to upload just once
gcs_helper.upload_file('./data/transactions.csv', 'transactions_dataset')
gcs_helper.list_files('/transactions_dataset')
To download file from GCP to local folder
gcs_helper.download_file('transactions_dataset/transactions.csv', './data' )
#optionally to delete it
#gcs_helper.delete_file('transactions_dataset/transactions.csv')
Use files
Use the uploaded files on GCP in your RC Projects
your_datasource_name = "my_cloud_ds"
dataSource = DataSource.createDataSource(
your_datasource_name,
DataSourceType.GCP_STORAGE,
{
GcpConfig.BUCKET: gcp_bucket_name,
GcpConfig.ACCESS_KEY: gcp_key_path
})
# Create project on RC
project = Project.create(
name='SampleProject',
description='Sample Project',
createEmpty=True
)
project.id
gcp_remote_filepath_trx = "transactions_dataset/transactions.csv"
# Create dataset in RC using the remote cloud bucket csv file
raw_data = project.addDataset(
dataset_name="transactions",
dataset_description="transactions",
data_source_id=dataSource.id,
data_source_options={GcpConfig.FILE_PATH: gcp_remote_filepath_trx} #saving output with csv input in recipe not working properly
)
#project.deleteDataset(raw_data.id)
raw_data.getData()