Copy from utils.rc.client.requests import Requests
from utils.rc.client.auth import AuthClient
from utils.rc.dtos.project import Project
from utils.rc.dtos.dataset import Dataset
from utils.rc.dtos.recipe import Recipe
from utils.rc.dtos.transform import Transform
from utils.rc.dtos.template import Template
from utils.rc.dtos.template import TemplateTransform
from utils.rc.dtos.template import TemplateInput
from utils.rc.dtos.template_v2 import TemplateV2, TemplateTransformV2
from utils.rc.dtos.segment import Segment, ItemExpression, Operator
from utils.rc.dtos.scenario import Scenario
from utils.rc.dtos.dataSource import DataSource
from utils.rc.dtos.dataSource import DataSourceType
from utils.rc.dtos.dataSource import SnowflakeConfig
from utils.rc.dtos.dataSource import MongoConfig
from utils.rc.dtos.dataSource import S3Config
from utils.rc.dtos.dataSource import GcpConfig
from utils.rc.dtos.dataSource import AzureBlobConfig
from utils.rc.dtos.dataSource import MySQLConfig
from utils.rc.dtos.dataSource import RedshiftConfig
from utils.rc.dtos.dataSource import RedisStorageConfig
import pandas as pd
import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.DEBUG)
Copy # Requests.setRootHost("https://test.dev.rapidcanvas.net/api/")
# Requests.setRootHost("http://localhost:8080/api/")
AuthClient.setToken()
Copy INFO:Authentication successful
Snowflake
Use this code snippet in Notebook to establish a connection with Snowflake data source.
Copy dataSource = DataSource.createDataSource(
"snowflake-101",
DataSourceType.SNOWFLAKE,
{
SnowflakeConfig.USER: "nikunjrc",
SnowflakeConfig.PASSWORD: "sZEWA27V86YGs5G",
SnowflakeConfig.ACCOUNT: "OM82799.us-central1.gcp"
}
)
The following code snippet is used to create a project.
Copy project = Project.create(
name="Test Snowflake",
description="Testing snowflake lib",
icon="https://rapidcanvas.ai/wp-content/uploads/2022/09/bitcoin_prediction_med.jpg",
createEmpty=True
)
This code is used on the Notebook to fetch the file from Snowflake and upload this onto the canvas.
Copy signup = project.addDataset(
dataset_name="signup",
dataset_description="signup golden",
data_source_id=dataSource.id,
data_source_options={
SnowflakeConfig.WAREHOUSE: "COMPUTE_WH",
SnowflakeConfig.QUERY: "SELECT * FROM rapidcanvas.public.SIGNUP"
}
)
The following code snippet allows you to export the output dataset to the Snowflake datasource.
Copy dataset.update_sync_options(
dataSource.id,
{
SnowflakeConfig.TABLE: "table name",
SnowflakeConfig.DATABASE: "database name",
SnowflakeConfig.SCHEMA: "schema name",
SnowflakeConfig.IF_TABLE_EXISTS = "append"
}
)
dataset.sync()
When a scheduled job is run, the source dataset updated with a fresh set of records is used in the machine learning flow of a project to generate a new output dataset. Subsequently, this output dataset is exported to Snowflake.
Copy project_run = ProjectRun.create_project_run(
project.id, "test-run-v1", "*/2 * * * *"
)
project_run.add_project_run_sync(
dataset.id,
dataSource.id,
{
SnowflakeConfig.TABLE: "table name",
SnowflakeConfig.DATABASE: "database name",
SnowflakeConfig.SCHEMA: "schema name"
}
)
Mongo
Use this code snippet in Notebook to establish a connection with Mongo data source.
Copy dataSource = DataSource.createDataSource(
"mongo-101",
DataSourceType.MONGO,
{
MongoConfig.CONNECT_STRING: "mongodb://testuser2:testuser2@34.68.122.18:27017/test"
}
)
Copy 2023-02-02 12:07:09.094 INFO root: Found existing data source by name: mongo-101
2023-02-02 12:07:09.095 INFO root: Updating the same
The following code snippet is used to create a project.
Copy project = Project.create(
name="Test Mongodb",
description="Testing mongodb lib",
icon="https://rapidcanvas.ai/wp-content/uploads/2022/09/bitcoin_prediction_med.jpg",
createEmpty=True
)
Copy 2023-02-02 12:09:07.010 INFO root: Found existing project by name: Test Mongodb
2023-02-02 12:09:07.011 INFO root: Deleting existing project
2023-02-02 12:09:07.123 INFO root: Creating new project by name: Test Mongodb
The following code snippet is used to upload the dataset that is fetched from Mongo database onto the canvas.
Copy titanic = project.addDataset(
dataset_name="titanic",
dataset_description="titanic golden",
data_source_id=dataSource.id,
data_source_options={
MongoConfig.DATABASE: "test",
MongoConfig.COLLECTION: "titanic",
MongoConfig.QUERY_IN_JSON_FORMAT: "{}"
}
)
Copy 2023-02-02 12:09:07.300 INFO root: Creating new dataset by name:titanic
PassengerId
Survived
Pclass
Name
Sex
Age
SibSp
Parch
Ticket
Fare
Cabin
Embarked
Palsson, Master. Gosta Leonard
Andersson, Mr. Anders Johan
Gustafsson, Mr. Anders Vilhelm
Salkjelsvik, Miss. Anna Kristine
100 rows × 12 columns
The following code snippet allows you to export the output dataset to the Snowflake datasource.
Copy dataset.update_sync_options(
dataSource.id,
{
MongoConfig.COLLECTION: "collection name",
MongoConfig.DATABASE: "database name",
}
)
dataset.sync()
When a scheduled job is run, the source dataset updated with a fresh set of records is used in the machine learning flow of a project to generate a new output dataset. Subsequently, this output dataset is exported to Snowflake.
Copy project_run = ProjectRun.create_project_run(
project.id, "test-run-v1", "*/2 * * * *"
)
project_run.add_project_run_sync(
dataset.id,
dataSource.id,
{
MongoConfig.COLLECTION: "collection name",
MongoConfig.DATABASE: "database name",
}
)
Amazon S3
Use this code snippet in Notebook to establish a connection with Amazon S3 data source.
Copy dataSource = DataSource.createDataSource(
"s3-101",
DataSourceType.S3_STORAGE,
{
S3Config.BUCKET: "bucket-name",
S3Config.ACCESS_KEY_ID: "access-key-id",
S3Config.ACCESS_KEY_SECRET: "access-key-secret"
}
)
The following code snippet is used to create a project.
Copy project = Project.create(
name="Test Amazon S3",
description="Testing Amazon S3",
icon="https://rapidcanvas.ai/wp-content/uploads/2022/09/bitcoin_prediction_med.jpg",
createEmpty=True
)
The following code snippet is used to upload the dataset that is imported from Amazon S3 onto the canvas.
Copy project.addDataset(
dataset_name="signup",
dataset_description="signup golden",
data_source_id=dataSource.id,
data_source_options={
S3Config.FILE_PATH: "file-path"
}
)
The following code snippet allows you to export the output dataset to the Amazon S3 datasource.
Copy dataset.update_sync_options(
dataSource.id,
{
S3Config.OUTPUT_FILE_DIRECTORY: "files/",
S3Config.OUTPUT_FILE_NAME: "dataset.parquet"
}
)
dataset.sync()
When a scheduled job is run, the source dataset updated with a fresh set of records is used in the machine learning flow of a project to generate a new output dataset. Subsequently, this output dataset is exported to Snowflake.
Copy project_run = ProjectRun.create_project_run(
project.id, "test-run-v1", "*/2 * * * *"
)
project_run.add_project_run_sync(
dataset.id,
dataSource.id,
{
S3Config.OUTPUT_FILE_DIRECTORY: "files/",
S3Config.OUTPUT_FILE_NAME: "dataset-${RUN_ID}.parquet"
}
)
Google Cloud Storage (GCS)
Use this code snippet in Notebook to establish a connection with Google Cloud Storage data source.
Copy dataSource = DataSource.createDataSource(
"gcp-101",
DataSourceType.GCP_STORAGE,
{
GcpConfig.BUCKET: "bucket-name",
GcpConfig.ACCESS_KEY: "access key path"
}
)
The following code snippet is used to create a project.
Copy project = Project.create(
name="Test Google Cloud Storage",
description="Testing Google Cloud Storage",
icon="https://rapidcanvas.ai/wp-content/uploads/2022/09/bitcoin_prediction_med.jpg",
createEmpty=True
)
The following code snippet is used to upload the dataset that is imported from Google Cloud Storage onto the canvas.
Copy project.addDataset(
dataset_name="signup",
dataset_description="signup golden",
data_source_id=dataSource.id,
data_source_options={
GcpConfig.FILE_PATH: "file-path"
}
)
The following code snippet allows you to export the output dataset to the Google Cloud Storage (GCS) datasource.
Copy dataset.update_sync_options(
dataSource.id,
{
GcpConfig.OUTPUT_FILE_DIRECTORY: "files/",
GcpConfig.OUTPUT_FILE_NAME: "dataset.parquet"
}
)
dataset.sync()
When a scheduled job is run, the source dataset updated with fresh set of records is used in the machine learning flow of a project to generate a new output dataset. Subsequently, this output dataset is exported to Google Cloud Storage.
Copy project_run = ProjectRun.create_project_run(
project.id, "test-run-v1", "*/2 * * * *"
)
project_run.add_project_run_sync(
dataset.id,
dataSource.id,
{
GcpConfig.OUTPUT_FILE_DIRECTORY: "files/",
GcpConfig.OUTPUT_FILE_NAME: "dataset-${RUN_ID}.parquet"
}
)
Azure Blob Storage
Use this code snippet in Notebook to establish a connection with Azure blob storage data source.
Copy dataSource = DataSource.createDataSource(
"azure-101",
DataSourceType.AZURE_BLOB,
{
AzureBlobConfig.CONTAINER_NAME: "container-name",
AzureBlobConfig.CONNECT_STR: "connect-string",
}
)
The following code snippet is used to create a project.
Copy project = Project.create(
name="Test Azure Blob Storage",
description="Testing Azure Blob Storage",
icon="https://rapidcanvas.ai/wp-content/uploads/2022/09/bitcoin_prediction_med.jpg",
createEmpty=True
)
The following code snippet is used to upload the dataset that is imported from Azure Blob Storage onto the canvas. This has the file path from where the file is located.
Copy project.addDataset(
dataset_name="signup",
dataset_description="signup golden",
data_source_id=dataSource.id,
data_source_options={
AzureBlobConfig.FILE_PATH: "file-path"
}
)
The following code snippet allows you to export the output dataset to the Azure Blob datasource.
Copy dataset.update_sync_options(
dataSource.id,
{
AzureBlobConfig.OUTPUT_FILE_DIRECTORY: "files/",
AzureBlobConfig.OUTPUT_FILE_NAME: "dataset.parquet"
}
)
dataset.sync()
When a scheduled job is run, the source dataset updated with a fresh set of records is used in the machine learning flow of a project to generate a new output dataset. Subsequently, this output dataset is exported to Azure Blob.
Copy project_run = ProjectRun.create_project_run(
project.id, "test-run-v1", "*/2 * * * *"
)
project_run.add_project_run_sync(
dataset.id,
dataSource.id,
{
AzureBlobConfig.OUTPUT_FILE_DIRECTORY: "files/",
AzureBlobConfig.OUTPUT_FILE_NAME: "dataset-${RUN_ID}.parquet"
}
)
MySQL/MsSQL
Use this code snippet in Notebook to establish a connection with MySQL data source.
Copy dataSource = DataSource.createDataSource(
"mysql-101",
DataSourceType.MYSQL,
{
MySQLConfig.CONNECT_STRING: "mysql://root:password@34.170.43.138/azure"
}
)
The following code snippet is used to create a project.
Copy project = Project.create(
name="Test MySQL/MsSQL",
description="Testing MySQL/MsSQL",
icon="https://rapidcanvas.ai/wp-content/uploads/2022/09/bitcoin_prediction_med.jpg",
createEmpty=True
)
The following code snippet is used to upload the dataset that is imported from MySQL/MsSQL onto the canvas.
Copy dataset = project.addDataset(
dataset_name="titanic",
dataset_description="titanic golden",
data_source_id=dataSource.id,
data_source_options={
MySQLConfig.QUERY: "SELECT * FROM titanic limit 100"
}
)
The following code snippet allows you to export the output dataset to the MySQL/MsSQL datasource.
Copy dataset.update_sync_options(
dataSource.id,
{
MySQLConfig.TABLE: "titanic"
}
)
dataset.sync()
When a scheduled job is run, the source dataset updated with a fresh set of records is used in the machine learning flow of a project to generate a new output dataset. Subsequently, this output dataset is exported to MySQL/MsSQL.
Copy project_run = ProjectRun.create_project_run(
project.id, "test-run-v1", "*/2 * * * *"
)
project_run.add_project_run_sync(
dataset.id,
dataSource.id,
{
MySQLConfig.TABLE: "titanic"
}
)
Redshift
Use this code snippet in Notebook to establish a connection with Redshift data source.
Copy dataSource = DataSource.createDataSource(
"redshift-101",
DataSourceType.REDSHIFT,
{
RedshiftConfig.CONNECT_STRING: "mysql://root:password@34.170.43.138/azure"
}
)
The following code snippet is used to create a project.
Copy project = Project.create(
name="Test Redshift",
description="Testing Redshift",
icon="https://rapidcanvas.ai/wp-content/uploads/2022/09/bitcoin_prediction_med.jpg",
createEmpty=True
)
The following code snippet is used to upload the dataset that is imported from Redshift onto the canvas.
Copy dataset = project.addDataset(
dataset_name="titanic",
dataset_description="titanic golden",
data_source_id=dataSource.id,
data_source_options={
RedshiftConfig.QUERY: "SELECT * FROM titanic limit 100"
}
)
The following code snippet allows you to export the output dataset to the Redshift datasource.
Copy dataset.update_sync_options(
dataSource.id,
{
RedshiftConfig.TABLE: "titanic"
}
)
dataset.sync()
When a scheduled job is run, the source dataset updated with a fresh set of records is used in the machine learning flow of a project to generate a new output dataset. Subsequently, this output dataset is exported to Redshift.
Copy project_run = ProjectRun.create_project_run(
project.id, "test-run-v1", "*/2 * * * *"
)
project_run.add_project_run_sync(
dataset.id,
dataSource.id,
{
RedshiftConfig.TABLE: "titanic"
}
)
Redis
Use this code snippet in Notebook to establish a connection with Redshift data source.
Copy dataSource = DataSource.createDataSource(
"redis-101",
DataSourceType.REDIS_STORAGE,
{
RedisStorageConfig.HOST: "127.0.0.1",
RedisStorageConfig.PORT: "6379"
}
)
The following code snippet is used to create a project.
Copy project = Project.create(
name="Test Redis",
description="Testing Redis",
icon="https://rapidcanvas.ai/wp-content/uploads/2022/09/bitcoin_prediction_med.jpg",
createEmpty=True
)
Note: You cannot import files from Redis to the platform but can export files and store in this data source.
The following code snippet allows you to export the output dataset to the Redis datasource.
Copy dataset.update_sync_options(
dataSource.id,
{
RedisStorageConfig.FEATURE_NAME: "titanic",
RedisStorageConfig.FEATURE_KEY_COLUMN: "PassengerId",
RedisStorageConfig.FEATURE_VALUE_COLUMNS: "Sex,Parch"
}
)
dataset.sync()
When a scheduled job is run, the source dataset updated with a fresh set of records is used in the machine learning flow of a project to generate a new output dataset. Subsequently, this output dataset is exported to Redis.
Copy project_run = ProjectRun.create_project_run(
project.id, "test-run-v1", "*/2 * * * *"
)
project_run.add_project_run_sync(
dataset.id,
dataSource.id,
{
RedisStorageConfig.FEATURE_NAME: "titanic",
RedisStorageConfig.FEATURE_KEY_COLUMN: "PassengerId",
RedisStorageConfig.FEATURE_VALUE_COLUMNS: "Sex,Parch"
}
)