Quick Start

RapidCanvas offers three ways to build and run machine learning workflows, allowing you to choose the best fit for your development needs.

1. SDKs – Local Development

Install the SDK on your local system to seamlessly connect with RapidCanvas and use its syntax in your local development environment. This method provides a simple way to interact with RapidCanvas, enabling you to:

Create project
Run recipes
Build DataApps
Execute various ML workflows

Refer to Setup and Installation section.

2. Hosted Notebook – Ready-to-Use Environment

With the Hosted Notebook, everything is pre-installed, and authentication is handled automatically. You can start working immediately without worrying about dependencies or setup, making it a hassle-free way to interact with RapidCanvas using Python functions.

Refer to Notebook Guide.

3. Code Recipe – In-Platform Coding

The Code Recipe option on the UI allows you to write, execute, and manage logic directly within the RapidCanvas platform.

Refer to Basic Guide.

The same syntax applies across the SDK, Hosted Notebook, and Code Recipe, but the way you create and execute workflows will vary based on the chosen method.

Sample Code Syntax

1) Fetching Data from an API onto the Canvas

You can create and fetch datasets in RapidCanvas using SDK, Hosted Notebook, or Code Recipe. Each method provides a seamless way to load data onto the canvas while using the same code syntax, as shown below:

```python
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  # Fetching Data from an API
  # Required imports
  from utils.notebookhelpers.helpers import Helpers
  from utils.dtos.templateOutputCollection import TemplateOutputCollection
  from utils.dtos.templateOutput import TemplateOutput
  from utils.dtos.templateOutput import OutputType
  from utils.dtos.templateOutput import ChartType
  from utils.dtos.variable import Metadata
  from utils.rcclient.commons.variable_datatype import VariableDatatype
  from utils.dtos.templateOutput import FileType
  from utils.dtos.rc_ml_model import RCMLModel
  from utils.notebookhelpers.helpers import Helpers
  
  context = Helpers.getOrCreateContext(contextId='contextId', localVars=locals())
  
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  
  # Importing necessary libraries: requests to make API calls and pandas for data manipulation
  import requests
  import pandas as pd
  
  # Define the API URL from which to fetch user data
  url = "https://jsonplaceholder.typicode.com/users"
  
  # Make a GET request to the API to fetch the user data
  response = requests.get(url)
  
  # Check if the response from the API is successful (status code 200)
  if response.status_code == 200:
      # Convert the API response from JSON format to a DataFrame
      data = response.json()
      users_df = pd.json_normalize(data)
  
      # Inform that the data retrieval was successful
      print("Successfully retrieved users.")
  else:
      # If there's an error, print the error status code and message for troubleshooting
      print(f"Error: {response.status_code}")
      print(response.text)
  
      users_df = pd.DataFrame()
  
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  # Add dataset to output
  Helpers.save_output_dataset(context=context, output_name='output_users_dataset11', data_frame=users_df)
```

2) Build a machine learning model

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  # Build an Machine Learning model 
  # Required imports
  from utils.notebookhelpers.helpers import Helpers
  from utils.dtos.templateOutputCollection import TemplateOutputCollection
  from utils.dtos.templateOutput import TemplateOutput
  from utils.dtos.templateOutput import OutputType
  from utils.dtos.templateOutput import ChartType
  from utils.dtos.variable import Metadata
  from utils.rcclient.commons.variable_datatype import VariableDatatype
  from utils.dtos.templateOutput import FileType
  from utils.dtos.rc_ml_model import RCMLModel
  from utils.notebookhelpers.helpers import Helpers
  
  context = Helpers.getOrCreateContext(contextId='contextId', localVars=locals())
  
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  # Define a custom class for the price prediction model
  class PricePredictionModel(RCMLModel):
      import pickle
  
      # Load the saved model and encoders
      def load(self, artifacts):
          with open(artifacts['model_file'], 'rb') as model_file:
              self.ml_model = pickle.load(model_file)
          self.encoders = artifacts
  
      # Preprocess the input data to match the training data
      def pre_process(self, df_input):
          features_id = 'features.pkl'
          file_path = self.encoders[features_id]
          features_data = pickle.load(open(file_path, 'rb'))
  
          # Handle dropped, numeric, and categorical columns
          for col in features_data['dropped_cols']:
              try:
                  df_input = df_input.drop(columns=[col])
              except KeyError as e:
                  print(f'Error dropping column {col}: {e}')
  
          for col in features_data.get('num_cols', []):
              try:
                  if col in df_input.columns:
                      df_input[col].fillna(features_data[col], inplace=True)
              except KeyError:
                  pass
  
          for col in features_data.get('cat_cols', []):
              try:
                  if col in df_input.columns:
                      mode_value = features_data[col]
                      if pd.api.types.is_categorical_dtype(df_input[col]):
                          if mode_value not in df_input[col].cat.categories:
                              df_input[col].cat.add_categories([mode_value], inplace=True)
                      df_input[col].fillna(mode_value, inplace=True)
              except KeyError:
                  pass
  
          # One-hot encode categorical columns during prediction
          for ohe_col in features_data.get('cat_cols', []):
              try:
                  if ohe_col in df_input.columns:
                      encoderId = ohe_col + '_ohe.pkl'
                      file_path = self.encoders[encoderId]
                      ohe = self.pickle.load(open(file_path, 'rb'))
                      df_input_ohe = pd.DataFrame(ohe.transform(df_input[[ohe_col]]).toarray(),
                                                  columns=ohe.get_feature_names_out())
                      df_input = pd.concat([df_input, df_input_ohe], axis=1).drop(columns=[ohe_col])
              except Exception as e:
                  print(f'Error during one-hot encoding for column {ohe_col}: {e}')
  
          return df_input
  
      # Make predictions on new data
      def predict(self, model_input):
          model_input = self.pre_process(model_input)
          predictions = self.ml_model.predict(model_input)
          return pd.DataFrame(predictions)
  
  # Read the input dataset for processing
  input_df_1 = Helpers.getEntityData(context, 'Car_Price_Prediction_data')  # 'CPP data' is the input dataset
  
  # Import necessary libraries
  import pandas as pd  # For data manipulation
  import pickle  # For saving and loading model and encoders
  import os  # For file path operations
  import plotly.express as px  # For data visualization
  import numpy as np  # For numerical operations
  import plotly.io as pio  # For controlling plotly display options
  pio.templates.default = 'none'  # Disable plotly templates
  
  # Import machine learning libraries
  from sklearn.model_selection import train_test_split as data_split  # For splitting data
  from sklearn.preprocessing import OneHotEncoder  # For encoding categorical variables
  from sklearn.ensemble import RandomForestRegressor  # For training the model
  from sklearn.metrics import mean_squared_error, r2_score  # For evaluation metrics
  
  # Import platform helper functions
  from utils.notebookhelpers.helpers import Helpers
  from utils.dtos.templateOutput import ModelOutput
  from utils.dtos.rc_ml_model import RCMLModel
  
  # Define which columns are numeric, categorical, dropped, and the target column
  features_data = {}
  features_data['num_cols'] = ['wheelbase', 'carlength', 'carwidth', 'carheight', 'curbweight',
                               'enginesize', 'boreratio', 'stroke', 'compressionratio', 'horsepower',
                               'peakrpm', 'citympg', 'highwaympg']
  features_data['cat_cols'] = ['CarName', 'fueltype', 'aspiration', 'doornumber', 'carbody',
                               'drivewheel', 'enginelocation', 'enginetype', 'cylindernumber', 'fuelsystem']
  features_data['dropped_cols'] = ['car_ID']
  features_data['target_col'] = 'price'
  
  # Drop unnecessary columns
  input_df_1 = input_df_1.drop(columns=features_data['dropped_cols'])
  
  # Handle missing numeric values by replacing them with the column mean
  for col in features_data['num_cols']:
      if input_df_1[col].isnull().any():
          mean_val = int(round(input_df_1[col].mean()))
          input_df_1[col].fillna(mean_val, inplace=True)
          features_data[col] = mean_val
  
  # Handle missing categorical values by replacing them with the mode (most frequent value)
  for col in features_data['cat_cols']:
      if input_df_1[col].isnull().any():
          mode_value = input_df_1[col].mode()[0]
          input_df_1[col].fillna(mode_value, inplace=True)
          features_data[col] = mode_value
  
  # One-hot encode the categorical variables and store the encoders for each column
  features_data['ohe_cols'] = []
  for col in features_data['cat_cols']:
      ohe = OneHotEncoder(handle_unknown='ignore', dtype=np.int64)
      ohe.fit(input_df_1[[col]])
      ohe_features = pd.DataFrame(ohe.transform(input_df_1[[col]]).toarray(),
                                  columns=ohe.get_feature_names_out())
      input_df_1 = pd.concat([input_df_1, ohe_features], axis=1).drop(columns=[col])
      encoderId = col + '_ohe.pkl'
      with open(Helpers.getChildDir(context) + encoderId, 'wb') as handle:
          pickle.dump(ohe, handle)
      features_data['ohe_cols'].append(col)
  
  # Separate the features (X) and target (y)
  X = input_df_1.drop(columns=[features_data['target_col']])
  y = input_df_1[features_data['target_col']]
  
  # Split the data into training and testing sets
  X_train, X_test, y_train, y_test = data_split(X, y, test_size=0.2, random_state=42)
  
  # Train a Random Forest Regressor model
  rf = RandomForestRegressor(random_state=42)
  rf.fit(X_train, y_train)
  
  # Save the trained model to the artifacts directory
  artifacts = {}
  model_path = os.path.join(Helpers.getChildDir(context), 'model_price_prediction.pkl')
  with open(model_path, 'wb') as f:
      pickle.dump(rf, f)
  artifacts['model_file'] = model_path
  
  # Save the feature metadata
  features_id = 'features.pkl'
  with open(Helpers.getChildDir(context) + features_id, 'wb') as handle:
      pickle.dump(features_data, handle)
  artifacts[features_id] = Helpers.getChildDir(context) + features_id
  
  # Save each one-hot encoder for future use
  for ohe_col in features_data['ohe_cols']:
      encoderId = ohe_col + '_ohe.pkl'
      artifacts[encoderId] = os.path.join(Helpers.getChildDir(context), encoderId)
  
  # Output the trained model for future predictions
  model = ModelOutput(PricePredictionModel, artifacts=artifacts)
  
  # Make predictions on the test data and generate plots for evaluation
  y_pred = rf.predict(X_test)
  
  # Plot Actual vs Predicted Prices
  fig_1 = px.scatter(x=y_test, y=y_pred, labels={'x': 'Actual Price', 'y': 'Predicted Price'},
                     title='Actual vs Predicted Prices')
  fig_1.update_layout(autosize=False, height=450, width=950)
  fig_1.show()
  
  # Plot Residuals vs Predicted Prices
  residuals = y_test - y_pred
  fig_2 = px.scatter(x=y_pred, y=residuals, labels={'x': 'Predicted Price', 'y': 'Residuals'},
                     title='Residuals vs Predicted Prices')
  fig_2.update_layout(autosize=False, height=450, width=950)
  fig_2.show()
  
  # Plot Histogram of Residuals
  fig_3 = px.histogram(residuals, nbins=30, labels={'value': 'Residuals'},
                       title='Distribution of Residuals')
  fig_3.update_layout(autosize=False, height=450, width=950)
  fig_3.show()
  
  # Plot Feature Importance
  feature_importance = rf.feature_importances_
  features = X.columns
  fig_4 = px.bar(x=features, y=feature_importance, labels={'x': 'Features', 'y': 'Importance'},
                 title='Feature Importance')
  fig_4.update_layout(autosize=False, height=450, width=950)
  fig_4.show()
  
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  
  # Add charts and model to output
  Helpers.save_output_plotly_chart_as_json(context=context, chart_title='my-chart', plotly_fig=fig_1, group=None)
  Helpers.save_output_plotly_chart_as_json(context=context, chart_title='my-chart', plotly_fig=fig_2, group=None)
  Helpers.save_output_plotly_chart_as_json(context=context, chart_title='my-chart', plotly_fig=fig_3, group=None)
  Helpers.save_output_plotly_chart_as_json(context=context, chart_title='my-chart', plotly_fig=fig_4, group=None)
  Helpers.save_output_rc_ml_model(context=context, model_name='myModel', model_obj=PricePredictionModel , artifacts=artifacts)
  Helpers.save(context)

PreviousStarter Guide NextSetup and Installation

Last updated 2 months ago

```python # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE # Fetching Data from an API # Required imports from utils.notebookhelpers.helpers import Helpers from utils.dtos.templateOutputCollection import TemplateOutputCollection from utils.dtos.templateOutput import TemplateOutput from utils.dtos.templateOutput import OutputType from utils.dtos.templateOutput import ChartType from utils.dtos.variable import Metadata from utils.rcclient.commons.variable_datatype import VariableDatatype from utils.dtos.templateOutput import FileType from utils.dtos.rc_ml_model import RCMLModel from utils.notebookhelpers.helpers import Helpers context = Helpers.getOrCreateContext(contextId='contextId', localVars=locals()) # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE # Importing necessary libraries: requests to make API calls and pandas for data manipulation import requests import pandas as pd # Define the API URL from which to fetch user data url = "https://jsonplaceholder.typicode.com/users" # Make a GET request to the API to fetch the user data response = requests.get(url) # Check if the response from the API is successful (status code 200) if response.status_code == 200: # Convert the API response from JSON format to a DataFrame data = response.json() users_df = pd.json_normalize(data) # Inform that the data retrieval was successful print("Successfully retrieved users.") else: # If there's an error, print the error status code and message for troubleshooting print(f"Error: {response.status_code}") print(response.text) users_df = pd.DataFrame() # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE # Add dataset to output Helpers.save_output_dataset(context=context, output_name='output_users_dataset11', data_frame=users_df) ```

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE # Build an Machine Learning model # Required imports from utils.notebookhelpers.helpers import Helpers from utils.dtos.templateOutputCollection import TemplateOutputCollection from utils.dtos.templateOutput import TemplateOutput from utils.dtos.templateOutput import OutputType from utils.dtos.templateOutput import ChartType from utils.dtos.variable import Metadata from utils.rcclient.commons.variable_datatype import VariableDatatype from utils.dtos.templateOutput import FileType from utils.dtos.rc_ml_model import RCMLModel from utils.notebookhelpers.helpers import Helpers context = Helpers.getOrCreateContext(contextId='contextId', localVars=locals()) # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE # Define a custom class for the price prediction model class PricePredictionModel(RCMLModel): import pickle # Load the saved model and encoders def load(self, artifacts): with open(artifacts['model_file'], 'rb') as model_file: self.ml_model = pickle.load(model_file) self.encoders = artifacts # Preprocess the input data to match the training data def pre_process(self, df_input): features_id = 'features.pkl' file_path = self.encoders[features_id] features_data = pickle.load(open(file_path, 'rb')) # Handle dropped, numeric, and categorical columns for col in features_data['dropped_cols']: try: df_input = df_input.drop(columns=[col]) except KeyError as e: print(f'Error dropping column {col}: {e}') for col in features_data.get('num_cols', []): try: if col in df_input.columns: df_input[col].fillna(features_data[col], inplace=True) except KeyError: pass for col in features_data.get('cat_cols', []): try: if col in df_input.columns: mode_value = features_data[col] if pd.api.types.is_categorical_dtype(df_input[col]): if mode_value not in df_input[col].cat.categories: df_input[col].cat.add_categories([mode_value], inplace=True) df_input[col].fillna(mode_value, inplace=True) except KeyError: pass # One-hot encode categorical columns during prediction for ohe_col in features_data.get('cat_cols', []): try: if ohe_col in df_input.columns: encoderId = ohe_col + '_ohe.pkl' file_path = self.encoders[encoderId] ohe = self.pickle.load(open(file_path, 'rb')) df_input_ohe = pd.DataFrame(ohe.transform(df_input[[ohe_col]]).toarray(), columns=ohe.get_feature_names_out()) df_input = pd.concat([df_input, df_input_ohe], axis=1).drop(columns=[ohe_col]) except Exception as e: print(f'Error during one-hot encoding for column {ohe_col}: {e}') return df_input # Make predictions on new data def predict(self, model_input): model_input = self.pre_process(model_input) predictions = self.ml_model.predict(model_input) return pd.DataFrame(predictions) # Read the input dataset for processing input_df_1 = Helpers.getEntityData(context, 'Car_Price_Prediction_data') # 'CPP data' is the input dataset # Import necessary libraries import pandas as pd # For data manipulation import pickle # For saving and loading model and encoders import os # For file path operations import plotly.express as px # For data visualization import numpy as np # For numerical operations import plotly.io as pio # For controlling plotly display options pio.templates.default = 'none' # Disable plotly templates # Import machine learning libraries from sklearn.model_selection import train_test_split as data_split # For splitting data from sklearn.preprocessing import OneHotEncoder # For encoding categorical variables from sklearn.ensemble import RandomForestRegressor # For training the model from sklearn.metrics import mean_squared_error, r2_score # For evaluation metrics # Import platform helper functions from utils.notebookhelpers.helpers import Helpers from utils.dtos.templateOutput import ModelOutput from utils.dtos.rc_ml_model import RCMLModel # Define which columns are numeric, categorical, dropped, and the target column features_data = {} features_data['num_cols'] = ['wheelbase', 'carlength', 'carwidth', 'carheight', 'curbweight', 'enginesize', 'boreratio', 'stroke', 'compressionratio', 'horsepower', 'peakrpm', 'citympg', 'highwaympg'] features_data['cat_cols'] = ['CarName', 'fueltype', 'aspiration', 'doornumber', 'carbody', 'drivewheel', 'enginelocation', 'enginetype', 'cylindernumber', 'fuelsystem'] features_data['dropped_cols'] = ['car_ID'] features_data['target_col'] = 'price' # Drop unnecessary columns input_df_1 = input_df_1.drop(columns=features_data['dropped_cols']) # Handle missing numeric values by replacing them with the column mean for col in features_data['num_cols']: if input_df_1[col].isnull().any(): mean_val = int(round(input_df_1[col].mean())) input_df_1[col].fillna(mean_val, inplace=True) features_data[col] = mean_val # Handle missing categorical values by replacing them with the mode (most frequent value) for col in features_data['cat_cols']: if input_df_1[col].isnull().any(): mode_value = input_df_1[col].mode()[0] input_df_1[col].fillna(mode_value, inplace=True) features_data[col] = mode_value # One-hot encode the categorical variables and store the encoders for each column features_data['ohe_cols'] = [] for col in features_data['cat_cols']: ohe = OneHotEncoder(handle_unknown='ignore', dtype=np.int64) ohe.fit(input_df_1[[col]]) ohe_features = pd.DataFrame(ohe.transform(input_df_1[[col]]).toarray(), columns=ohe.get_feature_names_out()) input_df_1 = pd.concat([input_df_1, ohe_features], axis=1).drop(columns=[col]) encoderId = col + '_ohe.pkl' with open(Helpers.getChildDir(context) + encoderId, 'wb') as handle: pickle.dump(ohe, handle) features_data['ohe_cols'].append(col) # Separate the features (X) and target (y) X = input_df_1.drop(columns=[features_data['target_col']]) y = input_df_1[features_data['target_col']] # Split the data into training and testing sets X_train, X_test, y_train, y_test = data_split(X, y, test_size=0.2, random_state=42) # Train a Random Forest Regressor model rf = RandomForestRegressor(random_state=42) rf.fit(X_train, y_train) # Save the trained model to the artifacts directory artifacts = {} model_path = os.path.join(Helpers.getChildDir(context), 'model_price_prediction.pkl') with open(model_path, 'wb') as f: pickle.dump(rf, f) artifacts['model_file'] = model_path # Save the feature metadata features_id = 'features.pkl' with open(Helpers.getChildDir(context) + features_id, 'wb') as handle: pickle.dump(features_data, handle) artifacts[features_id] = Helpers.getChildDir(context) + features_id # Save each one-hot encoder for future use for ohe_col in features_data['ohe_cols']: encoderId = ohe_col + '_ohe.pkl' artifacts[encoderId] = os.path.join(Helpers.getChildDir(context), encoderId) # Output the trained model for future predictions model = ModelOutput(PricePredictionModel, artifacts=artifacts) # Make predictions on the test data and generate plots for evaluation y_pred = rf.predict(X_test) # Plot Actual vs Predicted Prices fig_1 = px.scatter(x=y_test, y=y_pred, labels={'x': 'Actual Price', 'y': 'Predicted Price'}, title='Actual vs Predicted Prices') fig_1.update_layout(autosize=False, height=450, width=950) fig_1.show() # Plot Residuals vs Predicted Prices residuals = y_test - y_pred fig_2 = px.scatter(x=y_pred, y=residuals, labels={'x': 'Predicted Price', 'y': 'Residuals'}, title='Residuals vs Predicted Prices') fig_2.update_layout(autosize=False, height=450, width=950) fig_2.show() # Plot Histogram of Residuals fig_3 = px.histogram(residuals, nbins=30, labels={'value': 'Residuals'}, title='Distribution of Residuals') fig_3.update_layout(autosize=False, height=450, width=950) fig_3.show() # Plot Feature Importance feature_importance = rf.feature_importances_ features = X.columns fig_4 = px.bar(x=features, y=feature_importance, labels={'x': 'Features', 'y': 'Importance'}, title='Feature Importance') fig_4.update_layout(autosize=False, height=450, width=950) fig_4.show() # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE # Add charts and model to output Helpers.save_output_plotly_chart_as_json(context=context, chart_title='my-chart', plotly_fig=fig_1, group=None) Helpers.save_output_plotly_chart_as_json(context=context, chart_title='my-chart', plotly_fig=fig_2, group=None) Helpers.save_output_plotly_chart_as_json(context=context, chart_title='my-chart', plotly_fig=fig_3, group=None) Helpers.save_output_plotly_chart_as_json(context=context, chart_title='my-chart', plotly_fig=fig_4, group=None) Helpers.save_output_rc_ml_model(context=context, model_name='myModel', model_obj=PricePredictionModel , artifacts=artifacts) Helpers.save(context)