CI/CD for Machine Learning
Ravi Bhadauria
Machine Learning Engineer
max_depth = 2
, n_estimators = 50
def target_encode_categorical_features(
df: pd.DataFrame, categorical_columns: List[str], target_column: str
) -> pd.DataFrame:
encoded_data = df.copy()
# Iterate through categorical columns
for col in categorical_columns:
# Calculate mean target value for each category
encoding_map = df.groupby(col)[target_column].mean().to_dict()
# Apply target encoding
encoded_data[col] = encoded_data[col].map(encoding_map)
return encoded_data
def impute_and_scale_data(df_features: pd.DataFrame) -> pd.DataFrame:
# Impute data with mean strategy
imputer = SimpleImputer(strategy="mean")
X_preprocessed = imputer.fit_transform(df_features.values)
# Scale and fit with zero mean and unit variance
scaler = StandardScaler()
X_preprocessed = scaler.fit_transform(X_preprocessed)
return pd.DataFrame(X_preprocessed, columns=df_features.columns)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
data.drop(TARGET_COLUMN), data[TARGET_COLUMN], random_state=1993)
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(
max_depth=2, n_estimators=50, random_state=1993)
clf.fit(X_train, y_train)
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score # Calculate predictions y_pred = model.predict(X_test)
# Calculate accuracy accuracy = accuracy_score(y_test, y_pred)
# Calculate precision precision = precision_score(y_test, y_pred)
# Calculate recall recall = recall_score(y_test, y_pred)
# Calculate f1 score f1 = f1_score(y_test, y_pred)
from sklearn.metrics import ConfusionMatrixDisplay
ConfusionMatrixDisplay.from_estimator(model, X_test, y_test,cmap=plt.cm.Blues)
# Enable setup-cml action to be used later
- uses: iterative/setup-cml@v1
- name: Train model
run: |
# Your ML workflow goes here
pip install -r requirements.txt
python3 train.py
- name: Write CML report run: | # Add results and plots to markdown cat results.txt >> report.md echo "" >> report.md
# Create comment from markdown report cml comment create report.md
env: REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
CI/CD for Machine Learning