# Test_02_Label_Definition.py
#
# OBJECTIVE:
#   This script tests the hypothesis that the model's failure is due to the
#   definition of a trade's outcome (the labels). It runs two back-to-back tests
#   on a known failing strategy, changing only the Take-Profit and Stop-Loss
#   multipliers to see if a different risk-reward structure is more predictable.
#
#   - Test A ("Big Move"): Uses a wide 4:2 Risk/Reward ratio.
#   - Test B ("Scalper"): Uses a tight 1:1 Risk/Reward ratio.
#
#   A significant F1-score improvement in either test would indicate that the
#   labeling definition is a critical variable.
#
# --- SCRIPT VERSION ---
VERSION = "Test.2.0"
# ---------------------

import os
import re
import json
import time
import warnings
import logging
import sys
import random
from datetime import datetime, date, timedelta
from logging.handlers import RotatingFileHandler
from typing import List, Dict, Any, Optional, Tuple, Union, Callable
from collections import defaultdict
import pathlib

# --- LOAD ENVIRONMENT VARIABLES ---
from dotenv import load_dotenv
load_dotenv()
# --- END ---

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import shap
import xgboost as xgb
import optuna
import requests
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import f1_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import RobustScaler, MinMaxScaler, StandardScaler
from sklearn.utils.class_weight import compute_class_weight
from pydantic import BaseModel, DirectoryPath, confloat, conint, Field, ValidationError
from sklearn.ensemble import IsolationForest
from sklearn.decomposition import PCA
import yfinance as yf
from hurst import compute_Hc

# --- DIAGNOSTICS & LOGGING SETUP ---
logger = logging.getLogger("ML_Trading_Framework")

# --- GNN Specific Imports (requires PyTorch, PyG) ---
try:
    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    from torch_geometric.data import Data
    from torch_geometric.nn import GCNConv
    from torch.optim import Adam
    GNN_AVAILABLE = True
except ImportError:
    GNN_AVAILABLE = False
    class _dummy_module_container:
        Module = object
        def __init__(self):
            self.Module = object
    torch = _dummy_module_container()
    torch.nn = _dummy_module_container()
    nn = _dummy_module_container()
    F = None
    Data = None
    GCNConv = None
    Adam = None

# This try-except block for Pruning can be removed entirely, 
# but is left here as a harmless placeholder in case you reintroduce it later.
try:
    from optuna.integration import XGBoostPruningCallback
    PRUNING_AVAILABLE = True
except ModuleNotFoundError:
    PRUNING_AVAILABLE = False
    class XGBoostPruningCallback:
        def __init__(self, trial, observation_key): pass
        def __call__(self, env): pass

# --- LOGGING SWITCHES ---
LOG_ANOMALY_SKIPS = False
LOG_PARTIAL_PROFITS = True
# -----------------------------

def flush_loggers():
    """Flushes all handlers for all active loggers to disk."""
    for handler in logging.getLogger().handlers:
        handler.flush()
    for handler in logging.getLogger("ML_Trading_Framework").handlers:
        handler.flush()

def setup_logging() -> logging.Logger:
    if logger.hasHandlers():
        logger.handlers.clear()
    logger.setLevel(logging.DEBUG)
    ch = logging.StreamHandler(sys.stdout)
    ch.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    ch.setFormatter(formatter)
    logger.addHandler(ch)
    if GNN_AVAILABLE:
        logger.info("PyTorch and PyG loaded successfully. GNN module is available.")
    else:
        logger.warning("PyTorch or PyTorch Geometric not found. GNN-based strategies will be unavailable.")
    return logger

logger = setup_logging()
optuna.logging.set_verbosity(optuna.logging.WARNING)
# --- END DIAGNOSTICS & LOGGING ---

warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning)
warnings.filterwarnings('ignore', category=pd.errors.PerformanceWarning)

# =============================================================================
# CONFIGURATION & VALIDATION
# (Full class definitions from the main V208 script are included here)
# =============================================================================

class EarlyInterventionConfig(BaseModel):
    """Configuration for the adaptive early intervention system."""
    enabled: bool = True
    attempt_threshold: conint(ge=2) = 2
    min_profitability_for_f1_override: confloat(ge=0) = 3.0
    max_f1_override_value: confloat(ge=0.4, le=0.6) = 0.50

class ConfigModel(BaseModel):
    """
    The central configuration model for the trading framework.
    It holds all parameters that define a run, from data paths and capital
    to risk management, AI behavior, and backtesting realism settings.
    """
    # --- Core Run & Capital Parameters ---
    BASE_PATH: DirectoryPath
    REPORT_LABEL: str
    INITIAL_CAPITAL: confloat(gt=0)
    HARD_STOP_EQUITY_PCT: confloat(ge=0.0, le=1.0) = 0.5

    # --- AI & Optimization Parameters ---
    OPTUNA_TRIALS: conint(gt=0)
    MAX_TRAINING_RETRIES_PER_CYCLE: conint(ge=0) = 3
    CALCULATE_SHAP_VALUES: bool = True
    MIN_F1_SCORE_GATE: confloat(ge=0.0, le=1.0) = 0.60
    MIN_VALIDATION_CALMAR: confloat(ge=-10.0) = 0.1

    # --- NEW: Early Intervention System Configuration ---
    early_intervention: EarlyInterventionConfig = Field(default_factory=EarlyInterventionConfig)

    # --- Dynamic Labeling & Trade Definition ---
    TP_ATR_MULTIPLIER: confloat(gt=0.5, le=10.0) = 2.0
    SL_ATR_MULTIPLIER: confloat(gt=0.5, le=10.0) = 1.5
    LOOKAHEAD_CANDLES: conint(gt=0)
    LABEL_MIN_RETURN_PCT: confloat(ge=0.0) = 0.001
    LABEL_MIN_EVENT_PCT: confloat(ge=0.01, le=0.5) = 0.02

    # --- Walk-Forward & Data Parameters ---
    TRAINING_WINDOW: str
    RETRAINING_FREQUENCY: str
    FORWARD_TEST_GAP: str

    # --- Risk & Portfolio Management ---
    MAX_DD_PER_CYCLE: confloat(ge=0.05, lt=1.0) = 0.25
    RISK_CAP_PER_TRADE_USD: confloat(gt=0)
    BASE_RISK_PER_TRADE_PCT: confloat(gt=0, lt=1)
    MAX_CONCURRENT_TRADES: conint(ge=1, le=20) = 3
    CONFIDENCE_TIERS: Dict[str, Dict[str, Any]]
    USE_TP_LADDER: bool = True
    TP_LADDER_LEVELS_PCT: List[confloat(gt=0, lt=1)] = Field(default_factory=lambda: [0.25, 0.25, 0.25, 0.25])
    TP_LADDER_RISK_MULTIPLIERS: List[confloat(gt=0)] = Field(default_factory=lambda: [1.0, 2.0, 3.0, 4.0])
    USE_TIERED_RISK: bool = False
    RISK_PROFILE: str = 'Medium'
    TIERED_RISK_CONFIG: Dict[int, Dict[str, Dict[str, Union[float, int]]]] = {}

    # --- Broker & Execution Simulation ---
    COMMISSION_PER_LOT: confloat(ge=0.0) = 3.5
    USE_REALISTIC_EXECUTION: bool = True
    SIMULATE_LATENCY: bool = True
    EXECUTION_LATENCY_MS: conint(ge=50, le=500) = 150
    USE_VARIABLE_SLIPPAGE: bool = True
    SLIPPAGE_VOLATILITY_FACTOR: confloat(ge=0.0, le=5.0) = 1.5
    SPREAD_CONFIG: Dict[str, Dict[str, float]] = Field(default_factory=lambda: {
        'default': {'normal_pips': 1.8, 'volatile_pips': 5.5},
        'EURUSD':  {'normal_pips': 1.2, 'volatile_pips': 4.0},
        'GBPUSD':  {'normal_pips': 1.6, 'volatile_pips': 5.0},
    })
    CONTRACT_SIZE: confloat(gt=0) = 100000.0
    LEVERAGE: conint(gt=0) = 30
    MIN_LOT_SIZE: confloat(gt=0) = 0.01
    LOT_STEP: confloat(gt=0) = 0.01

    # --- Feature Engineering Parameters ---
    TREND_FILTER_THRESHOLD: confloat(gt=0) = 25.0
    BOLLINGER_PERIOD: conint(gt=0) = 20
    STOCHASTIC_PERIOD: conint(gt=0) = 14
    MIN_VOLATILITY_RANK: confloat(ge=0.0, le=1.0) = 0.1
    MAX_VOLATILITY_RANK: confloat(ge=0.0, le=1.0) = 0.9
    HAWKES_KAPPA: confloat(gt=0) = 0.5
    anomaly_contamination_factor: confloat(ge=0.001, le=0.1) = 0.01
    USE_PCA_REDUCTION: bool = True
    PCA_N_COMPONENTS: conint(gt=1, le=10) = 3
    RSI_PERIODS_FOR_PCA: List[conint(gt=1)] = Field(default_factory=lambda: [5, 10, 15, 20, 25])

    # --- GNN Specific Parameters ---
    GNN_EMBEDDING_DIM: conint(gt=0) = 8
    GNN_EPOCHS: conint(gt=0) = 50

    # --- State & Info Parameters ---
    selected_features: List[str]
    run_timestamp: str
    strategy_name: str
    nickname: str = ""
    analysis_notes: str = ""

    # --- File Path Management (Internal) ---
    MODEL_SAVE_PATH: str = Field(default="", repr=False)
    PLOT_SAVE_PATH: str = Field(default="", repr=False)
    REPORT_SAVE_PATH: str = Field(default="", repr=False)
    SHAP_PLOT_PATH: str = Field(default="", repr=False)
    LOG_FILE_PATH: str = Field(default="", repr=False)
    CHAMPION_FILE_PATH: str = Field(default="", repr=False)
    HISTORY_FILE_PATH: str = Field(default="", repr=False)
    PLAYBOOK_FILE_PATH: str = Field(default="", repr=False)
    DIRECTIVES_FILE_PATH: str = Field(default="", repr=False)
    NICKNAME_LEDGER_PATH: str = Field(default="", repr=False)
    REGIME_CHAMPIONS_FILE_PATH: str = Field(default="", repr=False)

    def __init__(self, **data: Any):
        super().__init__(**data)
        results_dir = os.path.join(self.BASE_PATH, "Results")
        version_match = re.search(r'V(\d+)', self.REPORT_LABEL)
        version_str = f"_V{version_match.group(1)}" if version_match else ""
        folder_name = f"{self.nickname}{version_str}" if self.nickname and version_str else self.REPORT_LABEL
        run_id = f"{folder_name}_{self.strategy_name}_{self.run_timestamp}"
        result_folder_path = os.path.join(results_dir, folder_name)

        if self.nickname and self.nickname != "init":
            os.makedirs(result_folder_path, exist_ok=True)

        self.MODEL_SAVE_PATH = os.path.join(result_folder_path, f"{run_id}_model.json")
        self.PLOT_SAVE_PATH = os.path.join(result_folder_path, f"{run_id}_equity_curve.png")
        self.REPORT_SAVE_PATH = os.path.join(result_folder_path, f"{run_id}_report.txt")
        self.SHAP_PLOT_PATH = os.path.join(result_folder_path, f"{run_id}_shap_summary.png")
        self.LOG_FILE_PATH = os.path.join(result_folder_path, f"{run_id}_run.log")

        self.CHAMPION_FILE_PATH = os.path.join(results_dir, "champion.json")
        self.HISTORY_FILE_PATH = os.path.join(results_dir, "historical_runs.jsonl")
        self.PLAYBOOK_FILE_PATH = os.path.join(results_dir, "strategy_playbook.json")
        self.DIRECTIVES_FILE_PATH = os.path.join(results_dir, "framework_directives.json")
        self.NICKNAME_LEDGER_PATH = os.path.join(results_dir, "nickname_ledger.json")
        self.REGIME_CHAMPIONS_FILE_PATH = os.path.join(results_dir, "regime_champions.json")

# =============================================================================
# ALL OTHER CLASSES & FUNCTIONS FROM V208
# =============================================================================

class APITimer:
    """Manages the timing of API calls to ensure a minimum interval between them."""
    def __init__(self, interval_seconds: int = 61):
        self.interval = timedelta(seconds=interval_seconds)
        self.last_call_time: Optional[datetime] = None
        if self.interval.total_seconds() > 0:
            logger.info(f"API Timer initialized with a {self.interval.total_seconds():.0f}-second interval.")
        else:
            logger.info("API Timer initialized with a 0-second interval (timer is effectively disabled).")

    def _wait_if_needed(self):
        if self.interval.total_seconds() <= 0: return
        if self.last_call_time is None: return

        elapsed = datetime.now() - self.last_call_time
        wait_time_delta = self.interval - elapsed
        wait_seconds = wait_time_delta.total_seconds()

        if wait_seconds > 0:
            logger.info(f"  - Time since last API call: {elapsed.total_seconds():.1f} seconds.")
            logger.info(f"  - Waiting for {wait_seconds:.1f} seconds to respect the {self.interval.total_seconds():.0f}s interval...")
            flush_loggers()
            time.sleep(wait_seconds)
        else:
            logger.info(f"  - Time since last API call ({elapsed.total_seconds():.1f}s) exceeds interval. No wait needed.")

    def call(self, api_function: Callable, *args, **kwargs) -> Any:
        """Executes the API function after ensuring the timing interval is met."""
        self._wait_if_needed()
        self.last_call_time = datetime.now()
        logger.info(f"  - Making API call to '{api_function.__name__}' at {self.last_call_time.strftime('%H:%M:%S')}...")
        result = api_function(*args, **kwargs)
        logger.info(f"  - API call to '{api_function.__name__}' complete.")
        return result

class GeminiAnalyzer:
    def __init__(self):
        self.api_key = os.getenv("GEMINI_API_KEY")
        if not self.api_key or "YOUR" in self.api_key or "PASTE" in self.api_key:
            logger.warning("!CRITICAL! GEMINI_API_KEY not found in environment or is a placeholder.")
            try:
                self.api_key = input(">>> Please paste your Gemini API Key and press Enter, or press Enter to skip: ").strip()
                if not self.api_key:
                    logger.warning("No API Key provided. AI analysis will be skipped.")
                    self.api_key_valid = False
                else:
                    logger.info("Using API Key provided via manual input.")
                    self.api_key_valid = True
            except Exception:
                logger.warning("Could not read input (non-interactive environment?). AI analysis will be skipped.")
                self.api_key_valid = False
                self.api_key = None
        else:
            logger.info("Successfully loaded GEMINI_API_KEY from environment.")
            self.api_key_valid = True

        self.headers = {"Content-Type": "application/json"}
        self.primary_model = "gemini-2.0-flash"
        self.backup_model = "gemini-1.5-flash"


    def _sanitize_value(self, value: Any) -> Any:
        from decimal import Decimal
        if isinstance(value, Decimal): return float(value)
        if isinstance(value, pathlib.Path): return str(value)
        if isinstance(value, (np.int64, np.int32)): return int(value)
        if isinstance(value, (np.float64, np.float32)):
            if np.isnan(value) or np.isinf(value): return None
            return float(value)
        if isinstance(value, (pd.Timestamp, datetime, date)): return value.isoformat()
        return value

    def _sanitize_dict(self, data: Any) -> Any:
        if isinstance(data, dict): return {key: self._sanitize_dict(value) for key, value in data.items()}
        if isinstance(data, list): return [self._sanitize_dict(item) for item in data]
        return self._sanitize_value(data)

    def _call_gemini(self, prompt: str) -> str:
        if not self.api_key_valid:
            return "{}"

        if len(prompt) > 30000:
            logger.warning("Prompt is very large, may risk exceeding token limits.")

        payload = {"contents": [{"parts": [{"text": prompt}]}]}
        sanitized_payload = self._sanitize_dict(payload)

        models_to_try = [self.primary_model, self.backup_model]
        retry_delays = [5, 15, 30]

        for model in models_to_try:
            logger.info(f"Attempting to call Gemini API with model: {model}")
            api_url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={self.api_key}"

            for attempt, delay in enumerate([0] + retry_delays):
                if delay > 0:
                    logger.warning(f"API connection failed. Retrying in {delay} seconds... (Attempt {attempt}/{len(retry_delays)})")
                    flush_loggers()
                    time.sleep(delay)

                try:
                    response = requests.post(api_url, headers=self.headers, data=json.dumps(sanitized_payload), timeout=120)
                    response.raise_for_status()

                    result = response.json()
                    if "candidates" in result and result["candidates"] and "content" in result["candidates"][0] and "parts" in result["candidates"][0]["content"]:
                        logger.info(f"Successfully received response from model: {model}")
                        return result["candidates"][0]["content"]["parts"][0]["text"]
                    else:
                        logger.error(f"Invalid Gemini response structure from {model}: {result}")
                        continue

                except requests.exceptions.RequestException as e:
                    logger.error(f"Gemini API request failed for model {model} on attempt {attempt + 1}: {e}")
                    if attempt == len(retry_delays):
                         logger.critical(f"All retries for model {model} failed.")
                except json.JSONDecodeError as e:
                    logger.error(f"Failed to decode Gemini response JSON from {model}: {e} - Response: {response.text}")
                    continue
                except (KeyError, IndexError) as e:
                    logger.error(f"Failed to extract text from Gemini response from {model}: {e} - Response: {response.text}")
                    continue

            logger.warning(f"Failed to get a response from model {model} after all retries.")

        logger.critical("API connection failed for all primary and backup models after all retries. Stopping.")
        return "{}"

    def _extract_json_from_response(self, response_text: str) -> Dict:
        try:
            anchor_match = re.search(r"BEGIN_JSON\s*(.*?)\s*END_JSON", response_text, re.DOTALL)
            if anchor_match:
                json_text = anchor_match.group(1).strip()
                logger.info("  - Extracted JSON using BEGIN_JSON/END_JSON anchors.")
            else:
                match = re.search(r"```json\s*(.*?)\s*```", response_text, re.DOTALL)
                json_text = match.group(1) if match else response_text

            if json_text.strip().lower() in ['null', '{}']:
                return {}

            suggestions = json.loads(json_text.strip())

            if not isinstance(suggestions, dict):
                 logger.error(f"Parsed JSON is not a dictionary. Response text: {response_text}")
                 return {}

            if 'model_confidence_score' in suggestions:
                logger.info(f"  - AI indicated a confidence score of: {suggestions['model_confidence_score']}/10")

            if 'current_params' in suggestions and isinstance(suggestions.get('current_params'), dict):
                nested_params = suggestions.pop('current_params')
                suggestions.update(nested_params)
            return suggestions
        except (json.JSONDecodeError, AttributeError) as e:
            logger.error(f"Could not parse JSON from response: {e}\nResponse text: {response_text}")
            return {}

class DataLoader:
    def __init__(self, config: ConfigModel): self.config = config
    def _parse_single_file(self, file_path: str, filename: str) -> Optional[pd.DataFrame]:
        try:
            parts = filename.split('_'); symbol, tf = parts[0], parts[1]
            df = pd.read_csv(file_path, delimiter='\t' if '\t' in open(file_path, encoding='utf-8').readline() else ',')
            df.columns = [c.upper().replace('<', '').replace('>', '') for c in df.columns]
            date_col = next((c for c in df.columns if 'DATE' in c), None)
            time_col = next((c for c in df.columns if 'TIME' in c), None)
            if date_col and time_col: df['Timestamp'] = pd.to_datetime(df[date_col] + ' ' + df[time_col], errors='coerce')
            elif date_col: df['Timestamp'] = pd.to_datetime(df[date_col], errors='coerce')
            else: logger.error(f"  - No date/time columns found in {filename}."); return None
            df.dropna(subset=['Timestamp'], inplace=True); df.set_index('Timestamp', inplace=True)
            col_map = {c: c.capitalize() for c in df.columns if c.lower() in ['open', 'high', 'low', 'close', 'tickvol', 'volume', 'spread']}
            df.rename(columns=col_map, inplace=True)
            vol_col = 'Volume' if 'Volume' in df.columns else 'Tickvol'
            df.rename(columns={vol_col: 'RealVolume'}, inplace=True, errors='ignore')

            df['Symbol'] = symbol

            for col in df.columns:
                if df[col].dtype == 'object' and col != 'Symbol':
                    df[col] = pd.to_numeric(df[col], errors='coerce')

            if 'RealVolume' not in df.columns: df['RealVolume'] = 0
            df['RealVolume'] = pd.to_numeric(df['RealVolume'], errors='coerce').fillna(0).astype('int32')
            for col in ['Open', 'High', 'Low', 'Close']:
                if col in df.columns:
                    df[col] = pd.to_numeric(df[col], errors='coerce').astype('float32')

            return df
        except Exception as e: logger.error(f"  - Failed to load {filename}: {e}", exc_info=True); return None

    def load_and_parse_data(self, filenames: List[str]) -> Tuple[Optional[Dict[str, pd.DataFrame]], List[str]]:
        logger.info("-> Stage 1: Loading and Preparing Multi-Timeframe Data...")
        data_by_tf = defaultdict(list)
        for filename in filenames:
            file_path = os.path.join(self.config.BASE_PATH, filename)
            if not os.path.exists(file_path): logger.warning(f"  - File not found, skipping: {file_path}"); continue
            df = self._parse_single_file(file_path, filename)
            if df is not None: tf = filename.split('_')[1]; data_by_tf[tf].append(df)
        processed_dfs: Dict[str, pd.DataFrame] = {}
        for tf, dfs in data_by_tf.items():
            if dfs:
                combined = pd.concat(dfs)
                # Ensure data is sorted by timestamp before returning
                final_combined = combined.sort_index()
                processed_dfs[tf] = final_combined
                logger.info(f"  - Processed {tf}: {len(final_combined):,} rows for {len(final_combined['Symbol'].unique())} symbols.")
        detected_timeframes = list(processed_dfs.keys())
        if not processed_dfs: logger.critical("  - Data loading failed for all files."); return None, []
        logger.info(f"[SUCCESS] Data loading complete. Detected timeframes: {detected_timeframes}")
        return processed_dfs, detected_timeframes

class FeatureEngineer:
    """
    Enhanced feature engineering with additional simple features.
    This class includes standard technical indicators, advanced market structure analysis, volatility regime detection,
    and feature interaction/normalization to improve model performance.
    """
    TIMEFRAME_MAP = {'M1': 1, 'M5': 5, 'M15': 15, 'M30': 30, 'H1': 60, 'H4': 240, 'D1': 1440, 'DAILY': 1440}
    ANOMALY_FEATURES = [
        'ATR', 'bollinger_bandwidth', 'RSI', 'RealVolume', 'candle_body_size',
        'pct_change', 'candle_body_size_vs_atr', 'atr_vs_daily_atr', 'MACD_hist',
        'wick_to_body_ratio', 'overnight_gap_pct', 'RSI_zscore', 'volume_ma_ratio', 'volatility_hawkes'
    ]

    def __init__(self, config: 'ConfigModel', timeframe_roles: Dict[str, str], playbook: Dict):
        self.config = config
        self.roles = timeframe_roles
        self.playbook = playbook

    def _get_weights_ffd(self, d: float, thres: float) -> np.ndarray:
        w, k = [1.], 1
        while True:
            w_ = -w[-1] / k * (d - k + 1)
            if abs(w_) < thres: break
            w.append(w_)
            k += 1
        return np.array(w[::-1]).reshape(-1, 1)

    def _fractional_differentiation(self, series: pd.Series, d: float, thres: float = 1e-5) -> pd.Series:
        weights = self._get_weights_ffd(d, thres)
        width = len(weights)
        if width > len(series): return pd.Series(index=series.index)
        diff_series = series.rolling(width).apply(lambda x: np.dot(weights.T, x)[0], raw=True)
        diff_series.name = f"{series.name}_fracdiff_{d}"
        return diff_series

    def _get_anomaly_scores(self, df: pd.DataFrame, contamination: float) -> pd.Series:
        features_to_check = [f for f in self.ANOMALY_FEATURES if f in df.columns]
        df_clean = df[features_to_check].dropna()
        if df_clean.empty:
            return pd.Series(1, index=df.index, name='anomaly_score')
        model = IsolationForest(contamination=contamination, random_state=42, n_estimators=100)
        model.fit(df_clean)
        scores = pd.Series(model.predict(df[features_to_check].fillna(0)), index=df.index)
        scores.name = 'anomaly_score'
        return scores

    def hawkes_process(self, data: pd.Series, kappa: float) -> pd.Series:
        if not isinstance(data, pd.Series) or data.isnull().all():
            logger.warning("Hawkes process received invalid data; returning zeros.")
            return pd.Series(np.zeros(len(data)), index=data.index)
        assert kappa > 0.0
        alpha = np.exp(-kappa)
        arr = data.to_numpy()
        output = np.zeros(len(data))
        output[:] = np.nan
        for i in range(1, len(data)):
            if np.isnan(output[i - 1]): output[i] = arr[i]
            else: output[i] = output[i - 1] * alpha + arr[i]
        return pd.Series(output, index=data.index) * kappa

    def _apply_pca_to_features(self, df: pd.DataFrame, feature_prefix: str, n_components: int) -> pd.DataFrame:
        pca_features = df.filter(regex=f'^{feature_prefix}').copy()
        if pca_features.shape[1] < n_components:
            logger.warning(f"    - Not enough features ({pca_features.shape[1]}) for PCA with n_components={n_components}. Skipping.")
            return pd.DataFrame(index=df.index)
        pca_features.dropna(inplace=True)
        if pca_features.empty or pca_features.shape[1] < n_components:
            logger.warning("    - Feature set for PCA is empty or has too few columns after dropping NaNs. Skipping PCA.")
            return pd.DataFrame(index=df.index)
        scaler = StandardScaler()
        scaled_features = scaler.fit_transform(pca_features)
        pca = PCA(n_components=n_components)
        principal_components = pca.fit_transform(scaled_features)
        pc_df = pd.DataFrame(data=principal_components, columns=[f'PCA_{feature_prefix}_{i}' for i in range(n_components)], index=pca_features.index)
        return pc_df

    def _calculate_rsi_divergence(self, g: pd.DataFrame, lookback: int = 14) -> pd.DataFrame:
        low_prices = g['Low'].rolling(window=lookback, center=False).min()
        rsi_at_low = g['RSI'][g['Low'] == low_prices]
        high_prices = g['High'].rolling(window=lookback, center=False).max()
        rsi_at_high = g['RSI'][g['High'] == high_prices]
        price_makes_lower_low = (low_prices < low_prices.shift(1)).astype(int)
        rsi_makes_higher_low = (rsi_at_low > rsi_at_low.shift(1)).reindex(g.index).fillna(0).astype(int)
        g['rsi_bullish_divergence'] = (price_makes_lower_low & rsi_makes_higher_low)
        price_makes_higher_high = (high_prices > high_prices.shift(1)).astype(int)
        rsi_makes_lower_high = (rsi_at_high < rsi_at_high.shift(1)).reindex(g.index).fillna(0).astype(int)
        return g

    def _calculate_hoffman_features(self, g: pd.DataFrame) -> pd.DataFrame:
        ema20 = g['Close'].ewm(span=20, adjust=False).mean()
        g['EMA_20_slope'] = ema20.diff()
        candle_range = g['High'] - g['Low']
        candle_range = candle_range.replace(0, np.nan)
        is_strong_uptrend = g['EMA_20_slope'] > g['EMA_20_slope'].rolling(10).mean()
        is_strong_downtrend = g['EMA_20_slope'] < g['EMA_20_slope'].rolling(10).mean()
        g['is_hoffman_irb_bullish'] = (is_strong_uptrend & (((g['Close'] - g['Low']) / candle_range.replace(0,1)) < 0.45) & (((g['Open'] - g['Low']) / candle_range.replace(0,1)) < 0.45)).astype(int)
        g['is_hoffman_irb_bearish'] = (is_strong_downtrend & (((g['High'] - g['Close']) / candle_range.replace(0,1)) < 0.45) & (((g['High'] - g['Open']) / candle_range.replace(0,1)) < 0.45)).astype(int)
        return g

    def _calculate_ict_features(self, g: pd.DataFrame, swing_lookback: int = 10) -> pd.DataFrame:
        bullish_fvg_condition = g['High'].shift(2) < g['Low']
        bearish_fvg_condition = g['Low'].shift(2) > g['High']
        g['fvg_bullish_exists'] = bullish_fvg_condition.astype(int)
        g['fvg_bearish_exists'] = bearish_fvg_condition.astype(int)
        swing_highs = g['High'].rolling(swing_lookback*2+1, center=True).max()
        swing_lows = g['Low'].rolling(swing_lookback*2+1, center=True).min()
        g['liquidity_grab_up'] = ((g['High'] > swing_highs.shift(1)) & (g['Close'] < swing_highs.shift(1))).astype(int)
        g['liquidity_grab_down'] = ((g['Low'] < swing_lows.shift(1)) & (g['Close'] > swing_lows.shift(1))).astype(int)
        g['choch_up_signal'] = (g['Close'] > swing_highs.shift(1)).astype(int)
        g['choch_down_signal'] = (g['Close'] < swing_lows.shift(1)).astype(int)
        return g

    def _calculate_market_structure(self, g: pd.DataFrame, swing_lookback: int = 10) -> pd.DataFrame:
        window = swing_lookback * 2 + 1
        local_highs = g['High'].rolling(window, center=True, min_periods=window).max()
        local_lows = g['Low'].rolling(window, center=True, min_periods=window).min()
        swing_high_points = g['High'][g['High'] == local_highs]
        swing_low_points = g['Low'][g['Low'] == local_lows]
        g['swing_high'] = swing_high_points.ffill()
        g['swing_low'] = swing_low_points.ffill()
        g['bos_up_signal'] = (g['Close'] > g['swing_high'].shift(1)).astype(int)
        g['bos_down_signal'] = (g['Close'] < g['swing_low'].shift(1)).astype(int)
        g['bos_up_since'] = g.groupby((g['bos_up_signal'] == 1).cumsum()).cumcount()
        g['bos_down_since'] = g.groupby((g['bos_down_signal'] == 1).cumsum()).cumcount()
        g.drop(columns=['swing_high', 'swing_low'], inplace=True, errors='ignore')
        return g

    def _calculate_volatility_regime(self, g:pd.DataFrame, hurst_window:int=100) -> pd.DataFrame:
        g['hurst_exponent'] = g['Close'].rolling(window=hurst_window).apply(lambda x: compute_Hc(x, kind='price', simplified=True)[0] if len(x)==hurst_window else np.nan, raw=False)
        g['market_mode'] = pd.cut(g['hurst_exponent'], bins=[0, 0.4, 0.6, 1], labels=[-1, 0, 1], right=False)
        bb_width_rank = g['bollinger_bandwidth'].rolling(hurst_window).rank(pct=True)
        g['bollinger_squeeze'] = (bb_width_rank < 0.1).astype(int)
        return g

    def _calculate_zscores_and_interactions(self, g:pd.DataFrame, z_window:int=50) -> pd.DataFrame:
        for col in ['RSI', 'momentum_20', 'MACD_hist']:
             if col in g.columns:
                mean = g[col].rolling(window=z_window).mean()
                std = g[col].rolling(window=z_window).std().replace(0, np.nan)
                g[f'{col}_zscore'] = (g[col] - mean) / std
        g['momentum_20_norm_atr'] = g['momentum_20'] / g['ATR'].replace(0, np.nan)
        g['adx_x_rsi'] = (g['ADX'] / 50.0) * (g['RSI'] / 100.0)
        if 'hurst_exponent' in g.columns:
            g['hurst_x_adx'] = g['hurst_exponent'] * g['ADX']
        return g

    def _calculate_support_resistance(self, g: pd.DataFrame, period: int = 20) -> pd.DataFrame:
        g[f'support_level_{period}'] = g['Low'].rolling(window=period).min()
        g[f'resistance_level_{period}'] = g['High'].rolling(window=period).max()
        return g

    def _enhance_volume_features(self, g: pd.DataFrame, spike_multiplier: float = 2.0, spike_window: int = 50) -> pd.DataFrame:
        if 'volume' in g.columns:
            g['volume_ma'] = g['volume'].rolling(window=spike_window).mean()
            g['volume_spike'] = (g['volume'] > g['volume_ma'] * spike_multiplier).astype(int)
            g.drop(columns=['volume_ma'], inplace=True, errors='ignore')
        return g

    def _calculate_adx(self, g:pd.DataFrame, period:int) -> pd.DataFrame:
        df=g.copy();alpha=1/period;df['tr']=pd.concat([df['High']-df['Low'],abs(df['High']-df['Close'].shift()),abs(df['Low']-df['Close'].shift())],axis=1).max(axis=1)
        df['dm_plus']=((df['High']-df['High'].shift())>(df['Low'].shift()-df['Low'])).astype(int)*(df['High']-df['High'].shift()).clip(lower=0)
        df['dm_minus']=((df['Low'].shift()-df['Low'])>(df['High']-df['High'].shift())).astype(int)*(df['Low'].shift()-df['Low']).clip(lower=0)
        atr_adx=df['tr'].ewm(alpha=alpha,adjust=False).mean();di_plus=100*(df['dm_plus'].ewm(alpha=alpha,adjust=False).mean()/atr_adx.replace(0,1e-9))
        di_minus=100*(df['dm_minus'].ewm(alpha=alpha,adjust=False).mean()/atr_adx.replace(0,1e-9));dx=100*(abs(di_plus-di_minus)/(di_plus+di_minus).replace(0,1e-9))
        g['ADX']=dx.ewm(alpha=alpha,adjust=False).mean();return g

    def _calculate_bollinger_bands(self, g:pd.DataFrame, period:int) -> pd.DataFrame:
        rolling_close=g['Close'].rolling(window=period);middle_band=rolling_close.mean();std_dev=rolling_close.std()
        g['bollinger_upper'] = middle_band + (std_dev * 2); g['bollinger_lower'] = middle_band - (std_dev * 2)
        g['bollinger_middle'] = middle_band
        g['bollinger_bandwidth'] = (g['bollinger_upper'] - g['bollinger_lower']) / middle_band.replace(0,np.nan); return g

    def _calculate_stochastic(self, g:pd.DataFrame, period:int) -> pd.DataFrame:
        low_min=g['Low'].rolling(window=period).min();high_max=g['High'].rolling(window=period).max()
        g['stoch_k']=100*(g['Close']-low_min)/(high_max-low_min).replace(0,np.nan);g['stoch_d']=g['stoch_k'].rolling(window=3).mean();return g

    def _calculate_momentum(self, g:pd.DataFrame) -> pd.DataFrame:
        g['momentum_10'] = g['Close'].diff(10)
        g['momentum_20'] = g['Close'].diff(20)
        g['pct_change'] = g['Close'].pct_change()
        g['log_returns'] = np.log(g['Close'] / g['Close'].shift(1))
        return g

    def _calculate_seasonality(self, g: pd.DataFrame) -> pd.DataFrame:
        g['month'] = g.index.month
        g['week_of_year'] = g.index.isocalendar().week.astype(int)
        g['day_of_month'] = g.index.day
        return g

    def _calculate_candle_microstructure(self, g: pd.DataFrame) -> pd.DataFrame:
        g['candle_body_size'] = abs(g['Close'] - g['Open'])
        g['upper_wick'] = g['High'] - g[['Open', 'Close']].max(axis=1)
        g['lower_wick'] = g[['Open', 'Close']].min(axis=1) - g['Low']
        candle_range = (g['High'] - g['Low']).replace(0, np.nan)
        g['wick_to_body_ratio'] = (g['upper_wick'] + g['lower_wick']) / g['candle_body_size'].replace(0, 1e-9)
        g['is_doji'] = (g['candle_body_size'] / g['ATR'].replace(0,1)).lt(0.1).astype(int)
        g['is_engulfing'] = ((g['candle_body_size'] > abs(g['Close'].shift() - g['Open'].shift())) & (np.sign(g['Close']-g['Open']) != np.sign(g['Close'].shift()-g['Open'].shift()))).astype(int)
        g['candle_body_size_vs_atr'] = g['candle_body_size'] / g['ATR'].replace(0, 1)
        g['candle_body_to_range_ratio'] = g['candle_body_size'] / candle_range
        return g

    def _calculate_indicator_dynamics(self, g: pd.DataFrame, period: int = 5) -> pd.DataFrame:
        def get_slope(series):
            if len(series) < 2 or series.isnull().all(): return np.nan
            series_float = series.fillna(method='ffill').fillna(method='bfill').astype(float)
            if series_float.isnull().all(): return np.nan
            return np.polyfit(np.arange(len(series_float)), series_float, 1)[0]
        g['RSI_slope'] = g['RSI'].rolling(window=period).apply(get_slope, raw=False)
        g['momentum_10_slope'] = g['momentum_10'].rolling(window=period).apply(get_slope, raw=False)
        if 'MACD_hist' in g.columns:
            g['MACD_hist_slope'] = g['MACD_hist'].rolling(window=period).apply(get_slope, raw=False)
        g['RSI_slope_acceleration'] = g['RSI_slope'].diff()
        g['momentum_10_slope_acceleration'] = g['momentum_10_slope'].diff()
        return g

    def _calculate_markov_features(self, g: pd.DataFrame) -> pd.DataFrame:
        candle_color = np.sign(g['Close'] - g['Open']).fillna(0)
        blocks = (candle_color != candle_color.shift()).cumsum()
        streaks = candle_color.groupby(blocks).cumsum()
        g['markov_streak'] = streaks
        return g

    def _calculate_htf_features(self,df:pd.DataFrame,p:str,s:int,a:int)->pd.DataFrame:
        tf_id = p.upper()
        results=[]
        def get_rolling_slope(series, window):
            if series.notna().sum() < 2: return np.nan
            series_clean = series.dropna()
            if len(series_clean) < 2: return np.nan
            return np.polyfit(series_clean.index.astype(np.int64) // 10**9, series_clean.values, 1)[0]
        for symbol,group in df.groupby('Symbol'):
            g=group.copy()
            sma=g['Close'].rolling(s,min_periods=s).mean()
            atr=(g['High']-g['Low']).rolling(a,min_periods=a).mean()
            trend=np.sign(g['Close']-sma)
            lin_reg_slope = g['Close'].rolling(window=s).apply(get_rolling_slope, raw=False, args=(s,))
            temp_df=pd.DataFrame(index=g.index)
            temp_df[f'{tf_id}_ctx_SMA']=sma
            temp_df[f'{tf_id}_ctx_ATR']=atr
            temp_df[f'{tf_id}_ctx_Trend']=trend
            temp_df[f'{tf_id}_ctx_LinRegSlope'] = lin_reg_slope
            shifted_df=temp_df.shift(1);shifted_df['Symbol']=symbol;results.append(shifted_df)
        if not results: return pd.DataFrame()
        return pd.concat(results).reset_index()

    def _calculate_base_tf_native(self, g:pd.DataFrame)->pd.DataFrame:
        g_out = g.copy()
        lookback=14

        # --- BLOCK 1: CORE INDICATORS (Dependencies for other features) ---
        g_out['ATR']=(g_out['High']-g_out['Low']).rolling(lookback).mean()
        delta=g_out['Close'].diff()
        gain=delta.where(delta > 0,0).ewm(com=lookback-1,adjust=False).mean()
        loss=-delta.where(delta < 0,0).ewm(com=lookback-1,adjust=False).mean()
        rs = gain / loss.replace(0, 1e-9)
        g_out['RSI']=100-(100/(1+rs))
        
        # --- [FIX] Generate multiple RSI periods for PCA ---
        if getattr(self.config, 'USE_PCA_REDUCTION', False) and hasattr(self.config, 'RSI_PERIODS_FOR_PCA'):
            for period in self.config.RSI_PERIODS_FOR_PCA:
                delta_pca = g_out['Close'].diff()
                gain_pca = delta_pca.where(delta_pca > 0, 0).ewm(com=period - 1, adjust=False).mean()
                loss_pca = -delta_pca.where(delta_pca < 0, 0).ewm(com=period - 1, adjust=False).mean()
                rs_pca = gain_pca / loss_pca.replace(0, 1e-9)
                g_out[f'rsi_{period}'] = 100 - (100 / (1 + rs_pca))
        # --- [END FIX] ---

        g_out=self._calculate_adx(g_out,lookback)
        g_out=self._calculate_bollinger_bands(g_out,self.config.BOLLINGER_PERIOD)
        g_out=self._calculate_stochastic(g_out,self.config.STOCHASTIC_PERIOD)
        g_out = self._calculate_momentum(g_out)

        g_out['EMA_20'] = g_out['Close'].ewm(span=20, adjust=False).mean()
        g_out['EMA_50'] = g_out['Close'].ewm(span=50, adjust=False).mean()
        g_out['EMA_100'] = g_out['Close'].ewm(span=100, adjust=False).mean()
        g_out['EMA_200'] = g_out['Close'].ewm(span=200, adjust=False).mean()
        
        ema_12 = g_out['Close'].ewm(span=12, adjust=False).mean()
        ema_26 = g_out['Close'].ewm(span=26, adjust=False).mean()
        g_out['MACD_line'] = ema_12 - ema_26
        g_out['MACD_signal'] = g_out['MACD_line'].ewm(span=9, adjust=False).mean()
        g_out['MACD_hist'] = g_out['MACD_line'] - g_out['MACD_signal']
        
        # --- BLOCK 2: CANDLE & VOLUME FEATURES (Depend on Block 1) ---
        g_out = self._calculate_candle_microstructure(g_out)
        
        if 'RealVolume' in g_out.columns:
            g_out['volume'] = g_out['RealVolume']
        else:
            g_out['volume'] = 0

        # --- BLOCK 3: NEW SIMPLE FEATURES (Depend on Block 1 & 2) ---
        g_out['overnight_gap_pct'] = (g_out['Open'] - g_out['Close'].shift(1)) / g_out['Close'].shift(1)
        g_out['intraday_range_pct'] = (g_out['High'] - g_out['Low']) / g_out['Open'].replace(0, np.nan)
        g_out['close_vs_open_pct'] = (g_out['Close'] - g_out['Open']) / g_out['Open'].replace(0, np.nan)
        
        g_out['ema_20_vs_50'] = (g_out['EMA_20'] - g_out['EMA_50']) / g_out['EMA_50'].replace(0, np.nan)
        g_out['ema_50_vs_200'] = (g_out['EMA_50'] - g_out['EMA_200']) / g_out['EMA_200'].replace(0, np.nan)
        
        g_out['is_bullish_hammer'] = ((g_out['lower_wick'] > 2 * g_out['candle_body_size']) & (g_out['upper_wick'] < g_out['candle_body_size']) & (g_out['Close'] > g_out['Open'])).astype(int)
        g_out['is_bearish_shooting_star'] = ((g_out['upper_wick'] > 2 * g_out['candle_body_size']) & (g_out['lower_wick'] < g_out['candle_body_size']) & (g_out['Close'] < g_out['Open'])).astype(int)
        
        if g_out['volume'].sum() > 0:
            g_out['volume_ma_ratio'] = g_out['volume'] / g_out['volume'].rolling(20).mean().replace(0, np.nan)
            g_out['volume_trend'] = g_out['volume'].rolling(5).apply(lambda x: np.polyfit(np.arange(len(x)), x, 1)[0] if x.notna().all() else np.nan, raw=False)
        else:
            g_out['volume_ma_ratio'] = 0
            g_out['volume_trend'] = 0

        g_out['momentum_5'] = g_out['Close'].pct_change(5)
        g_out['momentum_10_vs_20'] = g_out['momentum_10'] - g_out['momentum_20']
        
        g_out['atr_ratio'] = g_out['ATR'] / g_out['ATR'].rolling(20).mean().replace(0, np.nan)
        g_out['volatility_change'] = g_out['ATR'].pct_change()

        # --- BLOCK 4: DERIVED & ADVANCED FEATURES (Depend on previous blocks) ---
        g_out = self._calculate_indicator_dynamics(g_out)
        g_out = self._calculate_markov_features(g_out)
        g_out = self._calculate_seasonality(g_out)
        
        g_out['market_regime']=np.where(g_out['ADX']>self.config.TREND_FILTER_THRESHOLD,1,0)
        sma_fast = g_out['Close'].rolling(window=20).mean()
        sma_slow = g_out['Close'].rolling(window=50).mean()
        signal_series = pd.Series(np.where(sma_fast > sma_slow, 1.0, -1.0), index=g_out.index)
        g_out['primary_model_signal'] = signal_series.diff().fillna(0)
        
        g_out['market_volatility_index'] = g_out['ATR'].rolling(100).rank(pct=True)
        g_out['close_fracdiff'] = self._fractional_differentiation(g_out['Close'], d=0.5)
        g_out['abs_log_returns'] = g_out['log_returns'].abs().fillna(0)
        g_out['volatility_hawkes'] = self.hawkes_process(g_out['abs_log_returns'], kappa=self.config.HAWKES_KAPPA)
        g_out['returns_autocorr_10'] = g_out['log_returns'].rolling(10).corr(g_out['log_returns'].shift(1))

        g_out['donchian_upper'] = g_out['High'].rolling(20).max()
        g_out['donchian_lower'] = g_out['Low'].rolling(20).min()
        g_out['donchian_channel'] = g_out['donchian_upper'] - g_out['donchian_lower']
        g_out['linear_regression'] = g_out['Close'].rolling(window=14).apply(lambda x: np.polyfit(np.arange(len(x)), x, 1)[0], raw=False)
        g_out['SMA_30_weekly'] = g_out['Close'].rolling(window=30*5).mean()

        ha_close = (g_out['Open'] + g_out['High'] + g_out['Low'] + g_out['Close']) / 4
        ha_open = ((g_out['Open'].shift(1) + g_out['Close'].shift(1)) / 2).bfill()
        g_out['ha_body_size'] = abs(ha_close - ha_open)
        g_out['ha_color'] = np.sign(ha_close - ha_open)
        ha_blocks = (g_out['ha_color'] != g_out['ha_color'].shift()).cumsum()
        g_out['ha_streak'] = g_out.groupby(ha_blocks)['ha_color'].cumsum().abs()

        g_out['fractal_up'] = ((g_out['High'] > g_out['High'].shift(1)) & (g_out['High'] > g_out['High'].shift(2)) & (g_out['High'] > g_out['High'].shift(-1)) & (g_out['High'] > g_out['High'].shift(-2))).astype(int)
        g_out['fractal_down'] = ((g_out['Low'] < g_out['Low'].shift(1)) & (g_out['Low'] < g_out['Low'].shift(2)) & (g_out['Low'] < g_out['Low'].shift(-1)) & (g_out['Low'] < g_out['Low'].shift(-2))).astype(int)

        if g_out['volume'].sum() > 0:
            g_out = self._enhance_volume_features(g_out)
            if g_out.index.nlevels > 1:
                 g_out['relative_strength'] = (g_out['pct_change'] - g_out.groupby(level=0)['pct_change'].transform('mean'))
            else: g_out['relative_strength'] = 0
        else:
            g_out['relative_strength'] = 0; g_out['volume_spike'] = 0

        g_out = self._calculate_rsi_divergence(g_out, lookback=lookback)
        g_out = self._calculate_hoffman_features(g_out)
        g_out = self._calculate_ict_features(g_out, swing_lookback=10)
        g_out = self._calculate_market_structure(g_out, swing_lookback=10)
        g_out = self._calculate_volatility_regime(g_out, hurst_window=100)
        g_out = self._calculate_support_resistance(g_out, period=20)
        g_out = self._calculate_zscores_and_interactions(g_out, z_window=50)

        if g_out['volume'].sum() > 0:
            tpv = ((g_out['High'] + g_out['Low'] + g_out['Close']) / 3) * g_out['volume']
            cum_volume = g_out.groupby(g_out.index.date)['volume'].transform('cumsum')
            cum_tpv = g_out.groupby(g_out.index.date).apply(lambda x: tpv.loc[x.index].cumsum()).reset_index(level=0, drop=True)
            g_out['VWAP'] = cum_tpv / cum_volume.replace(0, np.nan)
            g_out['VWAP'] = g_out['VWAP'].ffill()
            g_out['price_to_vwap'] = (g_out['Close'] - g_out['VWAP']) / g_out['ATR'].replace(0, np.nan)
            g_out['price_vs_vwap_sign'] = np.sign(g_out['Close'] - g_out['VWAP'])
            g_out['vwap_slope'] = g_out['VWAP'].diff()
        else:
            g_out['VWAP']=np.nan; g_out['price_to_vwap']=np.nan; g_out['price_vs_vwap_sign']=np.nan; g_out['vwap_slope']=np.nan

        return g_out

    def _calculate_relative_performance(self, df: pd.DataFrame) -> pd.DataFrame:
        if 'pct_change' not in df.columns:
            logger.warning("  - 'pct_change' not found, cannot calculate relative performance.")
            return df
        if 'Symbol' in df.columns and df['Symbol'].nunique() > 1:
            df['avg_market_pct_change'] = df.groupby(level=0)['pct_change'].transform('mean')
            df['relative_performance'] = df['pct_change'] - df['avg_market_pct_change']
        else:
            df['relative_performance'] = 0
        return df

    def _process_single_symbol_stack(self, data_by_tf_single_symbol: Dict[str, pd.DataFrame]) -> pd.DataFrame:
        base_tf, medium_tf, high_tf = self.roles['base'], self.roles['medium'], self.roles['high']
        df_base = data_by_tf_single_symbol[base_tf]

        df_base_featured = self._calculate_base_tf_native(df_base)
        df_merged = df_base_featured.reset_index()

        if medium_tf and medium_tf in data_by_tf_single_symbol and not data_by_tf_single_symbol[medium_tf].empty:
            df_medium_ctx = self._calculate_htf_features(data_by_tf_single_symbol[medium_tf], medium_tf, 50, 14)
            if not df_medium_ctx.empty:
                df_merged = pd.merge_asof(df_merged.sort_values('Timestamp'), df_medium_ctx.sort_values('Timestamp'), on='Timestamp', by='Symbol', direction='backward')

        if high_tf and high_tf in data_by_tf_single_symbol and not data_by_tf_single_symbol[high_tf].empty:
            df_high_ctx = self._calculate_htf_features(data_by_tf_single_symbol[high_tf], high_tf, 20, 14)
            if not df_high_ctx.empty:
                df_merged = pd.merge_asof(df_merged.sort_values('Timestamp'), df_high_ctx.sort_values('Timestamp'), on='Timestamp', by='Symbol', direction='backward')

        df_final = df_merged.set_index('Timestamp').copy()
        del df_merged, df_base_featured

        if medium_tf:
            tf_id = medium_tf.upper()
            df_final[f'adx_x_{tf_id}_trend'] = df_final['ADX'] * df_final.get(f'{tf_id}_ctx_Trend', 0)
        if high_tf:
            tf_id = high_tf.upper()
            df_final[f'atr_x_{tf_id}_trend'] = df_final['ATR'] * df_final.get(f'{tf_id}_ctx_Trend', 0)
            df_final['atr_vs_daily_atr'] = df_final['ATR'] / df_final.get(f'{tf_id}_ctx_ATR', 1).replace(0, 1)

        strategy_details = self.playbook.get(self.config.strategy_name, {})
        complexity = strategy_details.get('complexity', 'medium')
        
        if complexity in ['high', 'specialized'] and self.config.USE_PCA_REDUCTION:
            logger.info(f"    - Applying PCA for '{self.config.strategy_name}' (Complexity: {complexity}).")
            rsi_pc_df = self._apply_pca_to_features(df_final, 'rsi_', self.config.PCA_N_COMPONENTS)
            if not rsi_pc_df.empty:
                df_final = df_final.join(rsi_pc_df)
                cols_to_drop = [c for c in df_final.columns if c.startswith('rsi_')]
                df_final.drop(columns=cols_to_drop, inplace=True, errors='ignore')
        
        df_final['anomaly_score'] = self._get_anomaly_scores(df_final, self.config.anomaly_contamination_factor)
        return df_final

    def create_feature_stack(self, data_by_tf: Dict[str, pd.DataFrame]) -> pd.DataFrame:
        logger.info("-> Stage 2: Engineering Features...")
        base_tf = self.roles['base']
        if base_tf not in data_by_tf:
            logger.critical(f"Base timeframe '{base_tf}' data is missing. Cannot proceed.")
            return pd.DataFrame()

        all_symbols_processed_dfs = []
        unique_symbols = data_by_tf[base_tf]['Symbol'].unique()

        for i, symbol in enumerate(unique_symbols):
            logger.info(f"  - ({i+1}/{len(unique_symbols)}) Processing features for symbol: {symbol}")
            symbol_specific_data = {tf: df[df['Symbol'] == symbol].copy() for tf, df in data_by_tf.items()}
            processed_symbol_df = self._process_single_symbol_stack(symbol_specific_data)
            del symbol_specific_data
            if not processed_symbol_df.empty:
                all_symbols_processed_dfs.append(processed_symbol_df)

        if not all_symbols_processed_dfs:
            logger.critical("Feature engineering resulted in no processable data across all symbols.")
            return pd.DataFrame()

        logger.info("  - Concatenating data for all symbols...")
        final_df = pd.concat(all_symbols_processed_dfs, sort=False).sort_index()
        del all_symbols_processed_dfs

        logger.info("  - Calculating cross-symbol features (relative performance)...")
        final_df = self._calculate_relative_performance(final_df)

        logger.info("  - Applying final data shift and cleaning...")
        feature_cols = [c for c in final_df.columns if c not in ['Open','High','Low','Close','RealVolume','Symbol']]
        final_df[feature_cols] = final_df.groupby('Symbol', sort=False)[feature_cols].shift(1)
        final_df.replace([np.inf,-np.inf],np.nan,inplace=True)
        # Drop rows with NaN in essential core features to ensure model stability
        core_features = ['ATR', 'RSI', 'ADX']
        final_df.dropna(subset=core_features, inplace=True)

        logger.info(f"  - Merged data and created features. Final dataset shape: {final_df.shape}")
        logger.info("[SUCCESS] Feature engineering complete.")
        return final_df

    def label_outcomes(self,df:pd.DataFrame,lookahead:int)->pd.DataFrame:
        logger.info("  - Generating trade labels with VOLATILITY-ADJUSTED DYNAMIC BARRIERS...");
        labeled_dfs=[self._label_group(group,lookahead) for _,group in df.groupby('Symbol')];
        return pd.concat(labeled_dfs)

    def _label_group(self, group: pd.DataFrame, lookahead: int) -> pd.DataFrame:
        """
        Calculates trade outcomes (1 for long win, -1 for short win, 0 for no outcome)
        based on dynamic, volatility-adjusted take-profit and stop-loss levels.
        This method is now controlled by parameters in the ConfigModel for AI-driven optimization.
        """
        group = group.copy()
        if 'ATR' not in group.columns or len(group) < lookahead + 1:
            logger.warning(f"ATR not found or insufficient data for labeling in group. Skipping.")
            group['target'] = 0
            return group

        # Use dynamic parameters from the main configuration, enabling AI control
        tp_multiplier = self.config.TP_ATR_MULTIPLIER
        sl_multiplier = self.config.SL_ATR_MULTIPLIER

        profit_target_points = group['ATR'] * tp_multiplier
        stop_loss_points = group['ATR'] * sl_multiplier
        
        outcomes = np.zeros(len(group))
        prices = group['Close'].values
        highs = group['High'].values
        lows = group['Low'].values

        for i in range(len(group) - lookahead):
            sl_dist = stop_loss_points.iloc[i]
            tp_dist = profit_target_points.iloc[i]

            if pd.isna(sl_dist) or sl_dist <= 1e-9:
                continue

            # Define levels for both long and short scenarios
            tp_long_level = prices[i] + tp_dist
            sl_long_level = prices[i] - sl_dist
            tp_short_level = prices[i] - tp_dist
            sl_short_level = prices[i] + sl_dist

            # Slice future price action
            future_highs = highs[i+1 : i+1+lookahead]
            future_lows = lows[i+1 : i+1+lookahead]

            # Find first time hitting TP/SL for a long trade
            hit_tp_long_idx = np.where(future_highs >= tp_long_level)[0]
            hit_sl_long_idx = np.where(future_lows <= sl_long_level)[0]
            first_tp_long = hit_tp_long_idx[0] if len(hit_tp_long_idx) > 0 else np.inf
            first_sl_long = hit_sl_long_idx[0] if len(hit_sl_long_idx) > 0 else np.inf

            # Find first time hitting TP/SL for a short trade
            hit_tp_short_idx = np.where(future_lows <= tp_short_level)[0]
            hit_sl_short_idx = np.where(future_highs >= sl_short_level)[0]
            first_tp_short = hit_tp_short_idx[0] if len(hit_tp_short_idx) > 0 else np.inf
            first_sl_short = hit_sl_short_idx[0] if len(hit_sl_short_idx) > 0 else np.inf

            # Pessimistic assignment: only assign a label if one barrier is hit before the other
            if first_tp_long < first_sl_long:
                outcomes[i] = 1  # Long trade won
            if first_tp_short < first_sl_short:
                outcomes[i] = -1 # Short trade won
        
        group['target'] = outcomes
        return group

    def _label_meta_group(self, group: pd.DataFrame, lookahead: int) -> pd.DataFrame:
        group = group.copy()
        if 'primary_model_signal' not in group.columns or len(group) < lookahead + 1:
            group['target'] = 0; return group
        
        # Use dynamic parameters from the main configuration for meta-labeling as well
        tp_multiplier = self.config.TP_ATR_MULTIPLIER
        sl_multiplier = self.config.SL_ATR_MULTIPLIER
        
        sl_atr_dynamic = group['ATR'] * sl_multiplier
        tp_atr_dynamic = group['ATR'] * tp_multiplier
        
        outcomes = np.zeros(len(group))
        prices, lows, highs = group['Close'].values, group['Low'].values, group['High'].values
        primary_signals = group['primary_model_signal'].values
        min_return = self.config.LABEL_MIN_RETURN_PCT

        for i in range(len(group) - lookahead):
            signal = primary_signals[i]
            if signal == 0: continue

            sl_dist, tp_dist = sl_atr_dynamic[i], tp_atr_dynamic[i]
            if pd.isna(sl_dist) or sl_dist <= 1e-9: continue
            
            future_highs, future_lows = highs[i + 1:i + 1 + lookahead], lows[i + 1:i + 1 + lookahead]
            
            if signal > 0: # Primary model signals a long
                tp_level, sl_level = prices[i] + tp_dist, prices[i] - sl_dist
                if (tp_level / prices[i] - 1) <= min_return: continue
                time_to_tp = np.where(future_highs >= tp_level)[0]
                time_to_sl = np.where(future_lows <= sl_level)[0]
                if len(time_to_tp) > 0 and (len(time_to_sl) == 0 or time_to_tp[0] < time_to_sl[0]):
                    outcomes[i] = 1 # Primary signal was correct
            
            elif signal < 0: # Primary model signals a short
                tp_level, sl_level = prices[i] - tp_dist, prices[i] + sl_dist
                if (prices[i] / tp_level - 1) <= min_return: continue
                time_to_tp = np.where(future_lows <= tp_level)[0]
                time_to_sl = np.where(future_highs >= sl_level)[0]
                if len(time_to_tp) > 0 and (len(time_to_sl) == 0 or time_to_tp[0] < time_to_sl[0]):
                    outcomes[i] = 1 # Primary signal was correct
        
        group['target'] = outcomes
        return group

    def label_meta_outcomes(self, df: pd.DataFrame, lookahead: int) -> pd.DataFrame:
        logger.info("  - Generating BINARY meta-labels (1=correct, 0=incorrect)...")
        labeled_dfs = [self._label_meta_group(group, lookahead) for _, group in df.groupby('Symbol')]
        if not labeled_dfs: return pd.DataFrame()
        return pd.concat(labeled_dfs)

def check_label_quality(df_train_labeled: pd.DataFrame, min_label_pct: float = 0.02) -> bool:
    """Checks if the generated labels are of sufficient quality for training."""
    if 'target' not in df_train_labeled.columns or df_train_labeled['target'].abs().sum() == 0:
        logger.warning("  - LABEL SANITY CHECK FAILED: No non-zero labels were generated.")
        return False

    label_counts = df_train_labeled['target'].value_counts(normalize=True)

    long_pct = label_counts.get(1.0, 0)
    short_pct = label_counts.get(-1.0, 0)

    if (long_pct + short_pct) < min_label_pct:
        logger.warning(f"  - LABEL SANITY CHECK FAILED: Total trade labels ({long_pct+short_pct:.2%}) is below threshold ({min_label_pct:.2%}).")
        return False

    logger.info(f"  - Label Sanity Check Passed. Distribution: Longs={long_pct:.2%}, Shorts={short_pct:.2%}")
    return True

class GNNModel(torch.nn.Module if GNN_AVAILABLE else object):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GNNModel, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return x

class TimeSeriesTransformer(nn.Module if GNN_AVAILABLE else object):
    def __init__(
        self,
        feature_size=9,
        num_layers=2,
        d_model=64,
        nhead=8,
        dim_feedforward=256,
        dropout=0.1,
        seq_length=30,
        prediction_length=1
    ):
        super(TimeSeriesTransformer, self).__init__()
        self.input_fc = nn.Linear(feature_size, d_model)
        self.pos_embedding = nn.Parameter(torch.zeros(1, seq_length, d_model))
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward,
            dropout=dropout, activation="relu"
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc_out = nn.Linear(d_model, prediction_length)

    def forward(self, src):
        batch_size, seq_len, _ = src.shape
        src = self.input_fc(src)
        src = src + self.pos_embedding[:, :seq_len, :]
        src = src.permute(1, 0, 2)
        encoded = self.transformer_encoder(src)
        last_step = encoded[-1, :, :]
        out = self.fc_out(last_step)
        return out

class ModelTrainer:
    GNN_BASE_FEATURES = ['ATR', 'RSI', 'ADX', 'bollinger_bandwidth', 'stoch_k', 'momentum_10', 'hour', 'day_of_week']
    def __init__(self,config:ConfigModel):
        self.config=config
        self.shap_summary:Optional[pd.DataFrame]=None
        self.class_weights:Optional[Dict[int,float]]=None
        self.best_threshold=0.5
        self.study: Optional[optuna.study.Study] = None
        self.is_gnn_model = False
        self.is_meta_model = False
        self.is_transformer_model = False
        self.gnn_model: Optional[GNNModel] = None
        self.gnn_scaler = MinMaxScaler()
        self.asset_map: Dict[str, int] = {}

    def train(self, df_train: pd.DataFrame, feature_list: List[str], strategy_details: Dict) -> Optional[Tuple[Pipeline, float, float]]:
        logger.info(f"  - Starting model training using strategy: '{strategy_details.get('description', 'N/A')}'")
        self.is_gnn_model = strategy_details.get("requires_gnn", False)
        self.is_meta_model = strategy_details.get("requires_meta_labeling", False)
        self.is_transformer_model = strategy_details.get("requires_transformer", False)
        X = pd.DataFrame()

        if self.is_transformer_model:
            if not GNN_AVAILABLE:
                logger.error("  - Skipping Transformer model training: PyTorch libraries not found.")
                return None
            logger.info("  - Transformer strategy detected. Training regression model.")
            df_train = df_train.copy()
            df_train['target_price'] = df_train['Close'].shift(-1)
            df_train.dropna(subset=['target_price'], inplace=True)
            X = df_train[feature_list].copy().fillna(0)
            y = df_train['target_price']
            X_seq, y_seq = [], []
            seq_len = 30
            for i in range(len(X) - seq_len):
                X_seq.append(X.iloc[i:i+seq_len].values)
                y_seq.append(y.iloc[i+seq_len-1])
            X_seq, y_seq = torch.tensor(np.array(X_seq), dtype=torch.float32), torch.tensor(np.array(y_seq), dtype=torch.float32).unsqueeze(1)
            dataset = torch.utils.data.TensorDataset(X_seq, y_seq)
            train_loader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)
            model = TimeSeriesTransformer(feature_size=len(feature_list), seq_length=seq_len, prediction_length=1)
            criterion = nn.MSELoss()
            optimizer = Adam(model.parameters(), lr=0.001)
            for epoch in range(20):
                for x_batch, y_batch in train_loader:
                    optimizer.zero_grad()
                    output = model(x_batch)
                    loss = criterion(output, y_batch)
                    loss.backward()
                    optimizer.step()
            logger.info("  - [SUCCESS] Transformer training complete.")
            return model, 0.0, 1.0 # Return model, dummy threshold, and dummy F1 score

        if self.is_gnn_model:
            if not GNN_AVAILABLE:
                logger.error("  - Skipping GNN model training: PyTorch/PyG libraries not found.")
                return None
            logger.info("  - GNN strategy detected. Generating graph embeddings as features...")
            gnn_embeddings = self._train_gnn(df_train)
            if gnn_embeddings.empty:
                logger.error("  - GNN embedding generation failed. Aborting cycle.")
                return None
            X = gnn_embeddings
            feature_list = list(X.columns)
            logger.info(f"  - Feature set replaced by {len(feature_list)} GNN embeddings.")
            y_map={-1:0,0:1,1:2}; y=df_train['target'].map(y_map).astype(int); num_classes = 3
        else:
            if not feature_list:
                logger.error(f"  - Training aborted for strategy '{strategy_details.get('description', 'N/A')}': The 'selected_features' list is empty.")
                return None
            
            # The defensive data cleansing block is no longer needed here,
            # as the data is now pre-cleaned by the updated DataLoader.
            X = df_train[feature_list].copy().fillna(0)

            if self.is_meta_model:
                logger.info("  - Meta-Labeling strategy detected. Training secondary filter model.")
                y = df_train['target'].astype(int); num_classes = 2
            else:
                y_map={-1:0,0:1,1:2}; y=df_train['target'].map(y_map).astype(int); num_classes = 3

        if X.empty or len(y.unique()) < num_classes:
            logger.error("  - Training data (X) is empty or not enough classes for the model. Aborting.")
            return None

        self.class_weights=dict(zip(np.unique(y),compute_class_weight(class_weight='balanced',classes=np.unique(y),y=y)))

        X_train_val, _, y_train_val, _ = train_test_split(X, y, test_size=0.1, shuffle=False)
        X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.2, shuffle=False)

        if X_train.empty or X_val.empty:
            logger.error(f"  - Training aborted: Data split resulted in an empty training or validation set. (Train shape: {X_train.shape}, Val shape: {X_val.shape})")
            return None

        self.study=self._optimize_hyperparameters(df_train, X, y, num_classes)
        if not self.study or not self.study.best_trials:
            logger.error("  - Training aborted: Hyperparameter optimization failed.")
            return None

        logger.info(f"    - Optimization complete. Best Objective Score: {self.study.best_value:.4f}")
        logger.info(f"    - Best params: {self.study.best_params}")

        self.best_threshold, f1_score = self._find_best_threshold(self.study.best_params, X_train, y_train, X_val, y_val, num_classes)
        final_pipeline=self._train_final_model(self.study.best_params,X_train_val,y_train_val, feature_list, num_classes)

        if final_pipeline is None:
            logger.error("  - Training aborted: Final model training failed.")
            return None

        logger.info("  - [SUCCESS] Model training complete.")
        return final_pipeline, self.best_threshold, f1_score

    def _create_graph_data(self, df: pd.DataFrame) -> Tuple[Optional[Data], Dict[str, int]]:
        logger.info("    - Creating graph structure from asset correlations...")
        pivot_df = df.pivot(columns='Symbol', values='Close').ffill().dropna(how='all', axis=1)
        if pivot_df.shape[1] < 2:
            logger.warning("    - Not enough assets to build a correlation graph. Skipping GNN.")
            return None, {}
        corr_matrix = pivot_df.corr()
        assets = corr_matrix.index.tolist()
        asset_map = {asset: i for i, asset in enumerate(assets)}
        edge_list = []
        for i in range(len(assets)):
            for j in range(i + 1, len(assets)):
                if abs(corr_matrix.iloc[i, j]) > 0.3:
                    edge_list.extend([[asset_map[assets[i]], asset_map[assets[j]]], [asset_map[assets[j]], asset_map[assets[i]]]])
        if not edge_list:
            logger.warning("    - No strong correlations found. Creating a fully connected graph as fallback.")
            edge_list = [[i, j] for i in range(len(assets)) for j in range(len(assets)) if i != j]
        edge_index = torch.tensor(edge_list, dtype=torch.long).t().contiguous()
        feature_cols = [f for f in self.GNN_BASE_FEATURES if f in df.columns]
        node_features = df.groupby('Symbol')[feature_cols].mean().reindex(assets).fillna(0)
        node_features_scaled = pd.DataFrame(self.gnn_scaler.fit_transform(node_features), index=node_features.index)
        x = torch.tensor(node_features_scaled.values, dtype=torch.float)
        return Data(x=x, edge_index=edge_index), asset_map

    def _train_gnn(self, df: pd.DataFrame) -> pd.DataFrame:
        graph_data, self.asset_map = self._create_graph_data(df)
        if graph_data is None: return pd.DataFrame()
        self.gnn_model = GNNModel(in_channels=graph_data.num_node_features, hidden_channels=self.config.GNN_EMBEDDING_DIM * 2, out_channels=self.config.GNN_EMBEDDING_DIM)
        optimizer = Adam(self.gnn_model.parameters(), lr=0.01, weight_decay=5e-4)
        self.gnn_model.train()
        for epoch in range(self.config.GNN_EPOCHS):
            optimizer.zero_grad()
            out = self.gnn_model(graph_data)
            loss = out.mean()
            loss.backward()
            optimizer.step()
        self.gnn_model.eval()
        with torch.no_grad():
            embeddings = self.gnn_model(graph_data).numpy()
        embedding_df = pd.DataFrame(embeddings, index=self.asset_map.keys(), columns=[f"gnn_{i}" for i in range(self.config.GNN_EMBEDDING_DIM)])
        full_embeddings = df['Symbol'].map(embedding_df.to_dict('index')).apply(pd.Series)
        full_embeddings.index = df.index
        return full_embeddings

    def _get_gnn_embeddings_for_test(self, df_test: pd.DataFrame) -> pd.DataFrame:
        if not self.is_gnn_model or self.gnn_model is None or not self.asset_map: return pd.DataFrame()
        feature_cols = [f for f in self.GNN_BASE_FEATURES if f in df_test.columns]
        test_node_features = df_test.groupby('Symbol')[feature_cols].mean()
        aligned_features = test_node_features.reindex(self.asset_map.keys()).fillna(0)
        test_node_features_scaled = pd.DataFrame(self.gnn_scaler.transform(aligned_features), index=aligned_features.index)
        x = torch.tensor(test_node_features_scaled.values, dtype=torch.float)
        graph_data, _ = self._create_graph_data(df_test)
        if graph_data is None: return pd.DataFrame()
        graph_data.x = x
        self.gnn_model.eval()
        with torch.no_grad():
            embeddings = self.gnn_model(graph_data).numpy()
        embedding_df = pd.DataFrame(embeddings, index=self.asset_map.keys(), columns=[f"gnn_{i}" for i in range(self.config.GNN_EMBEDDING_DIM)])
        full_embeddings = df_test['Symbol'].map(embedding_df.to_dict('index')).apply(pd.Series)
        full_embeddings.index = df_test.index
        return full_embeddings

    def _find_best_threshold(self, best_params, X_train, y_train, X_val, y_val, num_classes) -> Tuple[float, float]:
        logger.info("    - Tuning classification threshold for F1 score...")
        objective = 'multi:softprob' if num_classes > 2 else 'binary:logistic'
        temp_params = {'objective':objective,'booster':'gbtree','tree_method':'hist',**best_params}
        if num_classes > 2: temp_params['num_class'] = num_classes
        temp_params.pop('early_stopping_rounds', None)
        temp_pipeline = Pipeline([('scaler', RobustScaler()), ('model', xgb.XGBClassifier(**temp_params))])
        fit_params={'model__sample_weight':y_train.map(self.class_weights)}
        temp_pipeline.fit(X_train, y_train, **fit_params)
        probs = temp_pipeline.predict_proba(X_val)
        best_f1, best_thresh = -1, 0.5
        for threshold in np.arange(0.3, 0.7, 0.01):
            if num_classes > 2:
                max_probs = np.max(probs, axis=1)
                preds = np.argmax(probs, axis=1)
                preds = np.where(max_probs > threshold, preds, 1)
            else:
                preds = (probs[:, 1] > threshold).astype(int)
            f1 = f1_score(y_val, preds, average='macro', zero_division=0)
            if f1 > best_f1:
                best_f1, best_thresh = f1, threshold
        logger.info(f"    - Best threshold found: {best_thresh:.2f} (F1: {best_f1:.4f})")
        return best_thresh, best_f1

    def _optimize_hyperparameters(self, df_full_train: pd.DataFrame, X: pd.DataFrame, y: pd.Series, num_classes: int) -> Optional[optuna.study.Study]:
        logger.info(f"    - Starting hyperparameter optimization with 5-Fold CV (Objective: Realistic Calmar Ratio, {self.config.OPTUNA_TRIALS} trials)...")

        def dynamic_progress_callback(study: optuna.study.Study, trial: optuna.trial.FrozenTrial):
            n_trials = self.config.OPTUNA_TRIALS
            trial_number = trial.number + 1
            best_value = study.best_value if study.best_trial else float('nan')
            progress_str = f"> Optuna Optimization: Trial {trial_number}/{n_trials} | Best Score: {best_value:.4f}"
            sys.stdout.write(f"\r{progress_str.ljust(80)}")
            sys.stdout.flush()

        objective = 'multi:softprob' if num_classes > 2 else 'binary:logistic'
        eval_metric = 'mlogloss' if num_classes > 2 else 'logloss'

        def custom_objective(trial: optuna.Trial) -> float:
            params = {
                'objective': objective, 'eval_metric': eval_metric, 'booster': 'gbtree',
                'tree_method': 'hist', 'seed': 42,
                'n_estimators': trial.suggest_int('n_estimators', 100, 800, step=50),
                'max_depth': trial.suggest_int('max_depth', 3, 7),
                'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.2, log=True),
                'subsample': trial.suggest_float('subsample', 0.6, 1.0),
                'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
                'gamma': trial.suggest_float('gamma', 0, 5),
                'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 5.0, log=True),
                'alpha': trial.suggest_float('alpha', 1e-8, 5.0, log=True),
                'early_stopping_rounds': 50
            }
            if num_classes > 2: params['num_class'] = num_classes

            complexity_penalty = 1.0 + (params['max_depth'] / 10.0) * 0.5 + (params['n_estimators'] / 1000.0) * 0.5
            skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
            fold_scores = []

            for train_idx, val_idx in skf.split(X, y):
                X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
                y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
                df_val = df_full_train.iloc[val_idx]

                try:
                    scaler = RobustScaler()
                    X_train_scaled = scaler.fit_transform(X_train)
                    X_val_scaled = scaler.transform(X_val)

                    model = xgb.XGBClassifier(**params)
                    fit_params = {'sample_weight': y_train.map(self.class_weights)}
                    model.fit(X_train_scaled, y_train, eval_set=[(X_val_scaled, y_val)], verbose=False, **fit_params)

                    preds_val = model.predict(X_val_scaled)
                    pnl_results = []
                    lookahead = self.config.LOOKAHEAD_CANDLES
                    tp_multiplier = self.config.TP_ATR_MULTIPLIER
                    sl_multiplier = self.config.SL_ATR_MULTIPLIER

                    for i in range(len(preds_val)):
                        signal = preds_val[i]
                        direction = 1 if signal == 2 else -1 if signal == 0 else 0

                        if direction == 0 or (i + lookahead) >= len(df_val):
                            pnl_results.append(0)
                            continue

                        entry_candle = df_val.iloc[i]
                        entry_price = entry_candle['Close']
                        atr = entry_candle['ATR']

                        if pd.isna(atr) or atr <= 0:
                            pnl_results.append(0)
                            continue

                        tp_dist = atr * tp_multiplier
                        sl_dist = atr * sl_multiplier

                        tp_level = entry_price + (tp_dist * direction)
                        sl_level = entry_price - (sl_dist * direction)

                        future_candles = df_val.iloc[i+1 : i+1+lookahead]
                        future_highs = future_candles['High'].values
                        future_lows = future_candles['Low'].values

                        hit_tp_idx = np.where(future_highs >= tp_level if direction == 1 else future_lows <= tp_level)[0]
                        hit_sl_idx = np.where(future_lows <= sl_level if direction == 1 else future_highs >= sl_level)[0]

                        first_tp = hit_tp_idx[0] if len(hit_tp_idx) > 0 else np.inf
                        first_sl = hit_sl_idx[0] if len(hit_sl_idx) > 0 else np.inf

                        if first_tp < first_sl:
                            pnl_results.append(tp_dist * direction)
                        elif first_sl < first_tp:
                            pnl_results.append(-sl_dist * direction)
                        else:
                            pnl_results.append(0)

                    pnl_series = pd.Series(pnl_results)
                    if pnl_series.abs().sum() == 0:
                         fold_scores.append(0.0)
                         continue

                    equity_curve = pnl_series.cumsum()
                    running_max = equity_curve.cummax()
                    drawdown = running_max - equity_curve
                    max_drawdown = drawdown.max()

                    total_pnl = equity_curve.iloc[-1]
                    calmar_proxy = total_pnl / max_drawdown if max_drawdown > 0 else total_pnl if total_pnl > 0 else 0.0
                    fold_scores.append(calmar_proxy)

                except Exception as e:
                    sys.stdout.write("\n")
                    logger.warning(f"Fold in trial {trial.number} failed with error: {e}")
                    fold_scores.append(-5.0)

            avg_score = np.mean(fold_scores)
            final_score = avg_score / complexity_penalty
            return final_score
        try:
            study = optuna.create_study(direction='maximize')
            study.optimize(custom_objective, n_trials=self.config.OPTUNA_TRIALS, timeout=3600, n_jobs=-1, callbacks=[dynamic_progress_callback])
            sys.stdout.write("\n")
            return study
        except Exception as e:
            sys.stdout.write("\n")
            logger.error(f"    - Optuna study failed catastrophically: {e}", exc_info=True)
            return None

    def _train_final_model(self,best_params:Dict,X:pd.DataFrame,y:pd.Series, feature_names: List[str], num_classes: int)->Optional[Pipeline]:
        logger.info("    - Training final model on all available data...")
        try:
            best_params.pop('early_stopping_rounds', None)

            objective = 'multi:softprob' if num_classes > 2 else 'binary:logistic'
            final_params={'objective':objective,'booster':'gbtree','tree_method':'hist','seed':42,**best_params}
            if num_classes > 2: final_params['num_class'] = num_classes

            final_pipeline=Pipeline([('scaler',RobustScaler()),('model',xgb.XGBClassifier(**final_params))])

            fit_params={'model__sample_weight':y.map(self.class_weights)}

            final_pipeline.fit(X, y, **fit_params)

            if self.config.CALCULATE_SHAP_VALUES:
                self._generate_shap_summary(final_pipeline.named_steps['model'], final_pipeline.named_steps['scaler'].transform(X), feature_names, num_classes)

            return final_pipeline
        except Exception as e:
            logger.error(f"    - Error during final model training: {e}",exc_info=True)
            return None

    def _generate_shap_summary(self, model: xgb.XGBClassifier, X_scaled: np.ndarray, feature_names: List[str], num_classes: int):
        logger.info("    - Generating SHAP feature importance summary...")
        try:
            if len(X_scaled) > 2000:
                logger.info(f"    - Subsampling data for SHAP from {len(X_scaled)} to 2000 rows.")
                np.random.seed(42)
                sample_indices = np.random.choice(X_scaled.shape[0], 2000, replace=False)
                X_sample = X_scaled[sample_indices]
            else:
                X_sample = X_scaled
            explainer = shap.TreeExplainer(model)
            shap_explanation = explainer(X_sample)
            if num_classes > 2:
                mean_abs_shap_per_class = shap_explanation.abs.mean(0).values
                overall_importance = mean_abs_shap_per_class.mean(axis=1) if mean_abs_shap_per_class.ndim == 2 else mean_abs_shap_per_class
            else:
                overall_importance = np.abs(shap_explanation.values).mean(axis=0)
            summary = pd.DataFrame(overall_importance, index=feature_names, columns=['SHAP_Importance']).sort_values(by='SHAP_Importance', ascending=False)
            self.shap_summary = summary
            logger.info("    - SHAP summary generated successfully.")
        except Exception as e:
            logger.error(f"    - Failed to generate SHAP summary: {e}", exc_info=True)
            self.shap_summary = None

class Backtester:
    def __init__(self,config:ConfigModel):
        self.config=config
        self.is_meta_model = False
        self.is_transformer_model = False
        self.use_tp_ladder = self.config.USE_TP_LADDER

        if self.use_tp_ladder:
            if len(self.config.TP_LADDER_LEVELS_PCT) != len(self.config.TP_LADDER_RISK_MULTIPLIERS):
                logger.error("TP Ladder config error: 'TP_LADDER_LEVELS_PCT' and 'TP_LADDER_RISK_MULTIPLIERS' must have the same length. Disabling ladder.")
                self.use_tp_ladder = False
            elif not np.isclose(sum(self.config.TP_LADDER_LEVELS_PCT), 1.0):
                logger.error(f"TP Ladder config error: 'TP_LADDER_LEVELS_PCT' sum ({sum(self.config.TP_LADDER_LEVELS_PCT)}) is not 1.0. Disabling ladder.")
                self.use_tp_ladder = False
            else:
                 logger.info("Take-Profit Ladder is ENABLED. Standard partial profit logic will be skipped.")

class PerformanceAnalyzer:
    def __init__(self,config:ConfigModel):
        self.config=config

def determine_timeframe_roles(detected_tfs: List[str]) -> Dict[str, Optional[str]]:
    if not detected_tfs: raise ValueError("No timeframes were detected from data files.")
    tf_with_values = sorted([(tf, FeatureEngineer.TIMEFRAME_MAP.get(tf.upper(), 99999)) for tf in detected_tfs], key=lambda x: x[1])
    sorted_tfs = [tf[0] for tf in tf_with_values]
    roles = {'base': sorted_tfs[0], 'medium': None, 'high': None}
    if len(sorted_tfs) == 2: roles['high'] = sorted_tfs[1]
    elif len(sorted_tfs) >= 3:
        roles['medium'], roles['high'] = sorted_tfs[1], sorted_tfs[2]
    logger.info(f"Dynamically determined timeframe roles: {roles}")
    return roles

# =============================================================================
# FRAMEWORK ORCHESTRATION & MEMORY
# =============================================================================
def run_diagnostic_instance(test_config: Dict):
    """
    A stripped-down orchestration function specifically for running a single diagnostic test.
    It bypasses AI calls and uses the exact configuration provided.
    """
    run_timestamp_str = datetime.now().strftime("%Y%m%d-%H%M%S")
    test_config['run_timestamp'] = run_timestamp_str
    
    logger.warning(f"--- EXECUTING DIAGNOSTIC: {test_config.get('REPORT_LABEL', 'Untitled Test')} ---")

    try:
        config = ConfigModel(**test_config)
    except ValidationError as e:
        logger.critical(f"--- FATAL CONFIGURATION ERROR ---\n{e}")
        return

    # Simplified logging for tests
    # Ensure Results directory exists
    results_dir = os.path.join(config.BASE_PATH, "Results")
    os.makedirs(results_dir, exist_ok=True)
    
    log_file_path = os.path.join(results_dir, f"{config.REPORT_LABEL}_{run_timestamp_str}.log")
    file_handler = RotatingFileHandler(log_file_path, maxBytes=5*1024*1024, backupCount=2)
    file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
    logger.addHandler(file_handler)
    logger.info(f"--- Run Initialized: {config.nickname} | Strategy: {config.strategy_name} ---")
    
    # Standard data loading and feature engineering
    data_loader = DataLoader(config)
    all_files = [f for f in os.listdir(config.BASE_PATH) if f.endswith(('.csv', '.txt')) and re.match(r'^[A-Z0-9]+_[A-Z0-9]+', f)]
    if not all_files: logger.critical("No data files found. Exiting."); return
    data_by_tf, detected_timeframes = data_loader.load_and_parse_data(all_files)
    if not data_by_tf: return

    # Dummy playbook for test purposes
    playbook = {config.strategy_name: {"description": "Test Strategy"}}
    tf_roles = determine_timeframe_roles(detected_timeframes)
    fe = FeatureEngineer(config, tf_roles, playbook)
    full_df = fe.create_feature_stack(data_by_tf)
    if full_df.empty: logger.critical("Feature engineering resulted in an empty dataframe. Exiting."); return

    # We will only run the first cycle for this diagnostic test.
    train_window, forward_gap = pd.to_timedelta(config.TRAINING_WINDOW), pd.to_timedelta(config.FORWARD_TEST_GAP)
    test_start_date = full_df.index.min() + train_window + forward_gap
    
    logger.info(f"\n--- Starting Diagnostic Cycle ---")
    train_end = test_start_date - forward_gap
    train_start = train_end - pd.to_timedelta(config.TRAINING_WINDOW)
    
    df_train_raw = full_df.loc[train_start:train_end].copy()
    if df_train_raw.empty:
        logger.error("Empty training dataframe for the selected period. Aborting test.")
        return

    # Labeling and Training
    strategy_details = playbook.get(config.strategy_name, {})
    fe.config = config
    df_train_labeled = fe.label_outcomes(df_train_raw, config.LOOKAHEAD_CANDLES)

    if not check_label_quality(df_train_labeled, config.LABEL_MIN_EVENT_PCT):
         logger.critical("Failed: Untrainable labels generated.")
         return

    trainer = ModelTrainer(config)
    train_result = trainer.train(df_train_labeled, config.selected_features, strategy_details)
    
    # Reporting the outcome of the test
    if train_result:
        _, _, f1_score = train_result
        best_objective_score = trainer.study.best_value if trainer.study and trainer.study.best_value is not None else -1.0
        logger.info("--- DIAGNOSTIC TEST RESULT ---")
        logger.info(f"Strategy: {config.strategy_name}")
        logger.info(f"Labeling: TP_MULT={config.TP_ATR_MULTIPLIER}, SL_MULT={config.SL_ATR_MULTIPLIER}")
        logger.info(f"F1 Score Achieved: {f1_score:.4f}")
        logger.info(f"Profitability Score (Calmar): {best_objective_score:.4f}")
        if f1_score >= config.MIN_F1_SCORE_GATE:
            logger.warning(f"CONCLUSION: SUCCESS. The model PASSED the F1 gate of {config.MIN_F1_SCORE_GATE}.")
        else:
            logger.warning(f"CONCLUSION: FAILURE. The model FAILED the F1 gate of {config.MIN_F1_SCORE_GATE}.")
    else:
        logger.error("--- DIAGNOSTIC TEST RESULT: COMPLETE FAILURE. Model training process failed. ---")

    logger.removeHandler(file_handler)
    file_handler.close()

def main():
    """
    Main function to configure and run the diagnostic tests.
    """
    # This is a base configuration that both tests will inherit from.
    # Note: Many parameters are added here to satisfy the ConfigModel validation,
    # even if they aren't directly used by the diagnostic training run.
    base_config = {
        # Use a failing strategy from previous logs
        "strategy_name": "CHoCH_Orderblock_Entry",
        "selected_features": [
            "choch_up_signal", "choch_down_signal", "fvg_bullish_exists", 
            "fvg_bearish_exists", "volume_spike", "DAILY_ctx_Trend"
        ],
        "BASE_PATH": os.getcwd(),
        "INITIAL_CAPITAL": 10000.0,
        "OPTUNA_TRIALS": 50,
        "MAX_TRAINING_RETRIES_PER_CYCLE": 0, # No retries for diagnostic tests
        "TRAINING_WINDOW": '365D',
        "RETRAINING_FREQUENCY": '90D', # Irrelevant as we only run one cycle
        "FORWARD_TEST_GAP": "1D",
        "LOOKAHEAD_CANDLES": 150,
        "MIN_F1_SCORE_GATE": 0.55, # Using a realistic but still challenging gate for the test
        "CONFIDENCE_TIERS": {
            'ultra_high': {'min': 0.85, 'risk_mult': 1.2, 'rr': 2.5},
            'high':       {'min': 0.75, 'risk_mult': 1.0, 'rr': 2.0},
            'standard':   {'min': 0.60, 'risk_mult': 0.8, 'rr': 1.5}
        },
        "BASE_RISK_PER_TRADE_PCT": 0.01,
        "RISK_CAP_PER_TRADE_USD": 500.0,
        # Add other required fields with default values
        "MAX_DD_PER_CYCLE": 0.25,
        "MAX_CONCURRENT_TRADES": 3,
        "USE_TP_LADDER": False,
        "COMMISSION_PER_LOT": 3.5
    }

    # --- Test A: The "Big Move" Hypothesis ---
    config_a = base_config.copy()
    config_a.update({
        "REPORT_LABEL": "Test_02a_Big_Move_Labels",
        "nickname": "BigMoveTest",
        "TP_ATR_MULTIPLIER": 4.0,
        "SL_ATR_MULTIPLIER": 2.0
    })
    run_diagnostic_instance(config_a)

    # --- Test B: The "Scalper" Hypothesis ---
    # Add a small delay to separate the log files clearly
    time.sleep(5) 
    config_b = base_config.copy()
    config_b.update({
        "REPORT_LABEL": "Test_02b_Scalper_Labels",
        "nickname": "ScalperTest",
        "TP_ATR_MULTIPLIER": 1.0,
        "SL_ATR_MULTIPLIER": 1.0
    })
    run_diagnostic_instance(config_b)

if __name__ == '__main__':
    # NOTE: This script assumes all class definitions (ConfigModel, GeminiAnalyzer, etc.)
    # from the main V208 script have been copied into the top of this file.
    main()

# Test_02_Label_Definition.py