import ast import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.metrics import roc_auc_score from sklearn.ensemble import GradientBoostingClassifier from sklearn.calibration import CalibratedClassifierCV import joblib CSV = "nn_dataset_signalready.csv" OUT = "qt_quality_model.joblib" df = pd.read_csv(CSV) X = np.vstack(df["features_json"].apply(ast.literal_eval).values) y = df["tp_first"].astype(int).values X_train, X_val, y_train, y_val = train_test_split( X, y, test_size=0.2, random_state=42, stratify=y ) base = GradientBoostingClassifier(random_state=42) cal = CalibratedClassifierCV(base, method="isotonic", cv=3) cal.fit(X_train, y_train) p = cal.predict_proba(X_val)[:, 1] print("AUC:", roc_auc_score(y_val, p)) print("Mean P(tp_first):", p.mean()) joblib.dump({"model": cal, "n_features": X.shape[1]}, OUT) print("Saved:", OUT)