Quick Start
Last updated
Last updated
RapidCanvas offers three ways to build and run machine learning workflows, allowing you to choose the best fit for your development needs.
1. SDKs – Local Development
Install the SDK on your local system to seamlessly connect with RapidCanvas and use its syntax in your local development environment. This method provides a simple way to interact with RapidCanvas, enabling you to:
Create project
Run recipes
Build DataApps
Execute various ML workflows
Refer to section.
2. Hosted Notebook – Ready-to-Use Environment
With the Hosted Notebook, everything is pre-installed, and authentication is handled automatically. You can start working immediately without worrying about dependencies or setup, making it a hassle-free way to interact with RapidCanvas using Python functions.
Refer to .
3. Code Recipe – In-Platform Coding
The Code Recipe option on the UI allows you to write, execute, and manage logic directly within the RapidCanvas platform.
Refer to Guide.
The same syntax applies across the SDK, Hosted Notebook, and Code Recipe, but the way you create and execute workflows will vary based on the chosen method.
You can create and fetch datasets in RapidCanvas using SDK, Hosted Notebook, or Code Recipe. Each method provides a seamless way to load data onto the canvas while using the same code syntax, as shown below:
```python
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Fetching Data from an API
# Required imports
from utils.notebookhelpers.helpers import Helpers
from utils.dtos.templateOutputCollection import TemplateOutputCollection
from utils.dtos.templateOutput import TemplateOutput
from utils.dtos.templateOutput import OutputType
from utils.dtos.templateOutput import ChartType
from utils.dtos.variable import Metadata
from utils.rcclient.commons.variable_datatype import VariableDatatype
from utils.dtos.templateOutput import FileType
from utils.dtos.rc_ml_model import RCMLModel
from utils.notebookhelpers.helpers import Helpers
context = Helpers.getOrCreateContext(contextId='contextId', localVars=locals())
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Importing necessary libraries: requests to make API calls and pandas for data manipulation
import requests
import pandas as pd
# Define the API URL from which to fetch user data
url = "https://jsonplaceholder.typicode.com/users"
# Make a GET request to the API to fetch the user data
response = requests.get(url)
# Check if the response from the API is successful (status code 200)
if response.status_code == 200:
# Convert the API response from JSON format to a DataFrame
data = response.json()
users_df = pd.json_normalize(data)
# Inform that the data retrieval was successful
print("Successfully retrieved users.")
else:
# If there's an error, print the error status code and message for troubleshooting
print(f"Error: {response.status_code}")
print(response.text)
users_df = pd.DataFrame()
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Add dataset to output
Helpers.save_output_dataset(context=context, output_name='output_users_dataset11', data_frame=users_df)
```
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Build an Machine Learning model
# Required imports
from utils.notebookhelpers.helpers import Helpers
from utils.dtos.templateOutputCollection import TemplateOutputCollection
from utils.dtos.templateOutput import TemplateOutput
from utils.dtos.templateOutput import OutputType
from utils.dtos.templateOutput import ChartType
from utils.dtos.variable import Metadata
from utils.rcclient.commons.variable_datatype import VariableDatatype
from utils.dtos.templateOutput import FileType
from utils.dtos.rc_ml_model import RCMLModel
from utils.notebookhelpers.helpers import Helpers
context = Helpers.getOrCreateContext(contextId='contextId', localVars=locals())
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Define a custom class for the price prediction model
class PricePredictionModel(RCMLModel):
import pickle
# Load the saved model and encoders
def load(self, artifacts):
with open(artifacts['model_file'], 'rb') as model_file:
self.ml_model = pickle.load(model_file)
self.encoders = artifacts
# Preprocess the input data to match the training data
def pre_process(self, df_input):
features_id = 'features.pkl'
file_path = self.encoders[features_id]
features_data = pickle.load(open(file_path, 'rb'))
# Handle dropped, numeric, and categorical columns
for col in features_data['dropped_cols']:
try:
df_input = df_input.drop(columns=[col])
except KeyError as e:
print(f'Error dropping column {col}: {e}')
for col in features_data.get('num_cols', []):
try:
if col in df_input.columns:
df_input[col].fillna(features_data[col], inplace=True)
except KeyError:
pass
for col in features_data.get('cat_cols', []):
try:
if col in df_input.columns:
mode_value = features_data[col]
if pd.api.types.is_categorical_dtype(df_input[col]):
if mode_value not in df_input[col].cat.categories:
df_input[col].cat.add_categories([mode_value], inplace=True)
df_input[col].fillna(mode_value, inplace=True)
except KeyError:
pass
# One-hot encode categorical columns during prediction
for ohe_col in features_data.get('cat_cols', []):
try:
if ohe_col in df_input.columns:
encoderId = ohe_col + '_ohe.pkl'
file_path = self.encoders[encoderId]
ohe = self.pickle.load(open(file_path, 'rb'))
df_input_ohe = pd.DataFrame(ohe.transform(df_input[[ohe_col]]).toarray(),
columns=ohe.get_feature_names_out())
df_input = pd.concat([df_input, df_input_ohe], axis=1).drop(columns=[ohe_col])
except Exception as e:
print(f'Error during one-hot encoding for column {ohe_col}: {e}')
return df_input
# Make predictions on new data
def predict(self, model_input):
model_input = self.pre_process(model_input)
predictions = self.ml_model.predict(model_input)
return pd.DataFrame(predictions)
# Read the input dataset for processing
input_df_1 = Helpers.getEntityData(context, 'Car_Price_Prediction_data') # 'CPP data' is the input dataset
# Import necessary libraries
import pandas as pd # For data manipulation
import pickle # For saving and loading model and encoders
import os # For file path operations
import plotly.express as px # For data visualization
import numpy as np # For numerical operations
import plotly.io as pio # For controlling plotly display options
pio.templates.default = 'none' # Disable plotly templates
# Import machine learning libraries
from sklearn.model_selection import train_test_split as data_split # For splitting data
from sklearn.preprocessing import OneHotEncoder # For encoding categorical variables
from sklearn.ensemble import RandomForestRegressor # For training the model
from sklearn.metrics import mean_squared_error, r2_score # For evaluation metrics
# Import platform helper functions
from utils.notebookhelpers.helpers import Helpers
from utils.dtos.templateOutput import ModelOutput
from utils.dtos.rc_ml_model import RCMLModel
# Define which columns are numeric, categorical, dropped, and the target column
features_data = {}
features_data['num_cols'] = ['wheelbase', 'carlength', 'carwidth', 'carheight', 'curbweight',
'enginesize', 'boreratio', 'stroke', 'compressionratio', 'horsepower',
'peakrpm', 'citympg', 'highwaympg']
features_data['cat_cols'] = ['CarName', 'fueltype', 'aspiration', 'doornumber', 'carbody',
'drivewheel', 'enginelocation', 'enginetype', 'cylindernumber', 'fuelsystem']
features_data['dropped_cols'] = ['car_ID']
features_data['target_col'] = 'price'
# Drop unnecessary columns
input_df_1 = input_df_1.drop(columns=features_data['dropped_cols'])
# Handle missing numeric values by replacing them with the column mean
for col in features_data['num_cols']:
if input_df_1[col].isnull().any():
mean_val = int(round(input_df_1[col].mean()))
input_df_1[col].fillna(mean_val, inplace=True)
features_data[col] = mean_val
# Handle missing categorical values by replacing them with the mode (most frequent value)
for col in features_data['cat_cols']:
if input_df_1[col].isnull().any():
mode_value = input_df_1[col].mode()[0]
input_df_1[col].fillna(mode_value, inplace=True)
features_data[col] = mode_value
# One-hot encode the categorical variables and store the encoders for each column
features_data['ohe_cols'] = []
for col in features_data['cat_cols']:
ohe = OneHotEncoder(handle_unknown='ignore', dtype=np.int64)
ohe.fit(input_df_1[[col]])
ohe_features = pd.DataFrame(ohe.transform(input_df_1[[col]]).toarray(),
columns=ohe.get_feature_names_out())
input_df_1 = pd.concat([input_df_1, ohe_features], axis=1).drop(columns=[col])
encoderId = col + '_ohe.pkl'
with open(Helpers.getChildDir(context) + encoderId, 'wb') as handle:
pickle.dump(ohe, handle)
features_data['ohe_cols'].append(col)
# Separate the features (X) and target (y)
X = input_df_1.drop(columns=[features_data['target_col']])
y = input_df_1[features_data['target_col']]
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = data_split(X, y, test_size=0.2, random_state=42)
# Train a Random Forest Regressor model
rf = RandomForestRegressor(random_state=42)
rf.fit(X_train, y_train)
# Save the trained model to the artifacts directory
artifacts = {}
model_path = os.path.join(Helpers.getChildDir(context), 'model_price_prediction.pkl')
with open(model_path, 'wb') as f:
pickle.dump(rf, f)
artifacts['model_file'] = model_path
# Save the feature metadata
features_id = 'features.pkl'
with open(Helpers.getChildDir(context) + features_id, 'wb') as handle:
pickle.dump(features_data, handle)
artifacts[features_id] = Helpers.getChildDir(context) + features_id
# Save each one-hot encoder for future use
for ohe_col in features_data['ohe_cols']:
encoderId = ohe_col + '_ohe.pkl'
artifacts[encoderId] = os.path.join(Helpers.getChildDir(context), encoderId)
# Output the trained model for future predictions
model = ModelOutput(PricePredictionModel, artifacts=artifacts)
# Make predictions on the test data and generate plots for evaluation
y_pred = rf.predict(X_test)
# Plot Actual vs Predicted Prices
fig_1 = px.scatter(x=y_test, y=y_pred, labels={'x': 'Actual Price', 'y': 'Predicted Price'},
title='Actual vs Predicted Prices')
fig_1.update_layout(autosize=False, height=450, width=950)
fig_1.show()
# Plot Residuals vs Predicted Prices
residuals = y_test - y_pred
fig_2 = px.scatter(x=y_pred, y=residuals, labels={'x': 'Predicted Price', 'y': 'Residuals'},
title='Residuals vs Predicted Prices')
fig_2.update_layout(autosize=False, height=450, width=950)
fig_2.show()
# Plot Histogram of Residuals
fig_3 = px.histogram(residuals, nbins=30, labels={'value': 'Residuals'},
title='Distribution of Residuals')
fig_3.update_layout(autosize=False, height=450, width=950)
fig_3.show()
# Plot Feature Importance
feature_importance = rf.feature_importances_
features = X.columns
fig_4 = px.bar(x=features, y=feature_importance, labels={'x': 'Features', 'y': 'Importance'},
title='Feature Importance')
fig_4.update_layout(autosize=False, height=450, width=950)
fig_4.show()
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Add charts and model to output
Helpers.save_output_plotly_chart_as_json(context=context, chart_title='my-chart', plotly_fig=fig_1, group=None)
Helpers.save_output_plotly_chart_as_json(context=context, chart_title='my-chart', plotly_fig=fig_2, group=None)
Helpers.save_output_plotly_chart_as_json(context=context, chart_title='my-chart', plotly_fig=fig_3, group=None)
Helpers.save_output_plotly_chart_as_json(context=context, chart_title='my-chart', plotly_fig=fig_4, group=None)
Helpers.save_output_rc_ml_model(context=context, model_name='myModel', model_obj=PricePredictionModel , artifacts=artifacts)
Helpers.save(context)