ml_record_evaluation_results.dig#

_export:
  !include : config/params.yaml
  td:
    database: ${ml.output_database}
    engine: presto

+get_latest_model:
  td>: queries/get_latest_model.sql
  table: ${ml.output_database}.automl_experiments
  store_last_results: true

# Note: If input_table contains target labels, ml_predict shows evaluation results
+predict:
  ml_predict>:
    docker:
      task_mem: 64g # 64g/128g/256g/384g/512g
    notebook: gluon_predict
    shared_model: ${td.last_results.shared_model}
    input_table: ${ml.input_database}.${ml.test_data_table}
    output_table: ${ml.output_database}.predicted_${ml.test_data_table}_${session_id}
    export_leaderboard: ${ml.output_database}.leaderboard_${ml.test_data_table}
    export_feature_importance: ${ml.output_database}.feature_importance_${ml.test_data_table}

+get_eval_score:
  td>: queries/get_eval_score_from_leaderboard.sql
  table: ${ml.output_database}.leaderboard_${ml.test_data_table}
  store_last_results: true

+alert_if_drift_detected:
  # we can refer: score_test, balanced_accuracy, f1, accuracy, roc_auc, log_loss.
  if>: ${td.last_results.roc_auc < ml.drift_metric_threshold}
  _do:
    # mail>:
    #   data: Detect drift in model performance. AUC was ${td.last_results.roc_auc}.
    # subject: Drift detected
    # to: [me+alerts@example.com]
    # bcc: [foo@example.com,bar@example.com]
    echo>: Detect drift in model performance. auc was ${td.last_results.roc_auc}.
  _else:
    echo>: AUC was ${td.last_results.roc_auc}.

+record_evaluation:
  td>: queries/record_evaluation.sql
  insert_into: ${ml.output_database}.automl_eval_results
  engine: presto
  model_name: gluon_model_${session_id}
  shared_model: ${automl.shared_model}
  test_table: ${ml.input_database}.${ml.test_data_table}
  session_time: ${session_local_time}
  model: ${td.last_results.model}
  score_test: ${td.last_results.score_test}
  balanced_accuracy: ${td.last_results.balanced_accuracy}
  f1: ${td.last_results.f1}
  accuracy: ${td.last_results.accuracy}
  roc_auc: ${td.last_results.roc_auc}
  log_loss: ${td.last_results.log_loss}