Multiple Files
# Get the latest lib from Rapidcanvas
# !pip install --extra-index-url=https://us-central1-python.pkg.dev/rapidcanvas-361003/pypi/simple utils==0.12dev0
from utils.rc.client.requests import Requests
from utils.rc.client.auth import AuthClient
from utils.rc.dtos.project import Project
from utils.rc.dtos.dataset import Dataset
from utils.rc.dtos.recipe import Recipe
from utils.rc.dtos.transform import Transform
from utils.rc.client.files import FilesClient
from utils.rc.dtos.template_v2 import TemplateV2, TemplateTransformV2
import pandas as pd
import logging
from utils.utils.log_util import LogUtil
LogUtil.set_basic_config(format='%(levelname)s:%(message)s', level=logging.INFO)
# Requests.setRootHost("https://test.dev.rapidcanvas.net/api/")
AuthClient.setToken()
project = Project.create(
name="sample_multi_files",
description="Testing python lib",
createEmpty=True
)
INFO:Creating new project by name: sample_multi_files
{'id': '216edb96-1795-4561-940b-a04ae228062d', 'name': 'sample_multi_files', 'description': 'Testing python lib', 'icon': None, 'image': None, 'createdAt': 1672892953405, 'updatedAt': 1672892953405, 'creator': 'roshan@rapid.ai', 'industries': [], 'useCases': [], 'metadata': {}, 'envId': None, 'canvasEdgeStyle': 'SQUARED', 'display_name': None}
project.id
'216edb96-1795-4561-940b-a04ae228062d'
titanic = project.addDataset(
dataset_name="titanic",
dataset_description="titanic golden",
dataset_file_path=["data/titanic.1.csv", "data/titanic.2.csv", "data/titanic.3.csv"]
)
INFO:Creating new dataset by name:titanic
INFO:Uploading file data/titanic.1.csv ....
INFO:Uploading file data/titanic.2.csv ....
INFO:Uploading file data/titanic.3.csv ....
INFO:Uploading Done
recipe = project.addRecipe([titanic], name="recipe_v1")
INFO:Creating new recipe
recipe.id
'03efc51c-88e5-41cc-954b-2c2e906c6e47'
template = TemplateV2(
name="MultiFileTransform", description="MultiFileTransform", project_id=project.id, source="CUSTOM", status="ACTIVE", tags=["Number", "datatype-long"]
)
template_transform = TemplateTransformV2(type = "python", params=dict(notebookName="MultiFileTransform.ipynb"))
template.base_transforms = [template_transform]
template.publish("transforms/MultiFileTransform.ipynb")
INFO:Publishing template | data=TemplateV2(name='MultiFileTransform', display_name=None, id=None, version='1.0', project_id='216edb96-1795-4561-940b-a04ae228062d', projectId='216edb96-1795-4561-940b-a04ae228062d', is_global=False, description='MultiFileTransform', tags=['Number', 'datatype-long'], baseTransforms=[TemplateTransformV2(type='python', params={'notebookName': 'MultiFileTransform.ipynb'})], base_transforms=[TemplateTransformV2(type='python', params={'notebookName': 'MultiFileTransform.ipynb'})], source='CUSTOM', status='ACTIVE', inputs=[]) INFO:Template Published INFO:Generating grammar tables from /Users/nikunj/miniconda3/lib/python3.8/site-packages/blib2to3/Grammar.txt INFO:Writing grammar tables to /Users/nikunj/Library/Caches/black/22.1.0/Grammar3.8.11.final.0.pickle INFO:Writing failed: [Errno 2] No such file or directory: '/Users/nikunj/Library/Caches/black/22.1.0/tmp9_w12z57' INFO:Generating grammar tables from /Users/nikunj/miniconda3/lib/python3.8/site-packages/blib2to3/PatternGrammar.txt INFO:Writing grammar tables to /Users/nikunj/Library/Caches/black/22.1.0/PatternGrammar3.8.11.final.0.pickle INFO:Writing failed: [Errno 2] No such file or directory: '/Users/nikunj/Library/Caches/black/22.1.0/tmpl_wczrn8' WARNING:Input notebook does not contain a cell with tag 'parameters' INFO:Executing notebook with kernel: python3 ************************************** ** CREATING INPUTS: outputDataset ** ************************************** Inputs created successfully | template_id=c4dc6394-f286-46fb-93ca-74feee40948c
transform = Transform()
transform.templateId = template.id
transform.name = "transform"
transform.variables = {
"outputDataset": "merged"
}
recipe.add_transform(transform)
WARNING:
#############################################IMPORTANT#############################################
add_transform is going to deprecate soon. Please use add_transform instead
####################################################################################################
INFO:Adding new transform
INFO:Transform added Successfully
recipe.run()
INFO:Started running
INFO:You can look at the progress on UI at https://test.dev.rapidcanvas.net/#/projects/216edb96-1795-4561-940b-a04ae228062d
INFO:No errors found
recipe.getChildrenDatasets()
{'merged_titanic': <utils.rc.dtos.dataset.Dataset at 0x143f726d0>}
outputEntity = recipe.getChildrenDatasets()["merged_titanic"]
assert outputEntity.getData(num_rows=300).shape[0] == 90, "shape of merged dataset not match"