class FeatureEngineer:
    """
    A comprehensive feature engineering engine designed to be perfectly aligned with the master playbook.
    It calculates all required features, from simple price-action metrics to advanced market structure and
    volatility regime indicators, ensuring any strategy in the playbook can be executed.
    """
    TIMEFRAME_MAP = {'M1': 1, 'M5': 5, 'M15': 15, 'M30': 30, 'H1': 60, 'H4': 240, 'D1': 1440, 'DAILY': 1440}
    ANOMALY_FEATURES = [
        'ATR', 'bollinger_bandwidth', 'RSI', 'RealVolume', 'candle_body_size',
        'pct_change', 'candle_body_size_vs_atr', 'atr_vs_daily_atr', 'MACD_hist'
    ]

    def __init__(self, config: 'ConfigModel', timeframe_roles: Dict[str, str]):
        self.config = config
        self.roles = timeframe_roles

    def _get_weights_ffd(self, d: float, thres: float) -> np.ndarray:
        w, k = [1.], 1
        while True:
            w_ = -w[-1] / k * (d - k + 1)
            if abs(w_) < thres: break
            w.append(w_)
            k += 1
        return np.array(w[::-1]).reshape(-1, 1)

    def _fractional_differentiation(self, series: pd.Series, d: float, thres: float = 1e-5) -> pd.Series:
        weights = self._get_weights_ffd(d, thres)
        width = len(weights)
        if width > len(series): return pd.Series(index=series.index)
        diff_series = series.rolling(width).apply(lambda x: np.dot(weights.T, x)[0], raw=True)
        diff_series.name = f"{series.name}_fracdiff_{d}"
        return diff_series

    def _get_anomaly_scores(self, df: pd.DataFrame, contamination: float) -> pd.Series:
        features_to_check = [f for f in self.ANOMALY_FEATURES if f in df.columns]
        df_clean = df[features_to_check].dropna()
        if df_clean.empty:
            return pd.Series(1, index=df.index, name='anomaly_score')
        model = IsolationForest(contamination=contamination, random_state=42, n_estimators=100)
        model.fit(df_clean)
        scores = pd.Series(model.predict(df[features_to_check].fillna(0)), index=df.index)
        scores.name = 'anomaly_score'
        return scores

    def hawkes_process(self, data: pd.Series, kappa: float) -> pd.Series:
        if not isinstance(data, pd.Series) or data.isnull().all():
            logger.warning("Hawkes process received invalid data; returning zeros.")
            return pd.Series(np.zeros(len(data)), index=data.index)
        assert kappa > 0.0
        alpha = np.exp(-kappa)
        arr = data.to_numpy()
        output = np.zeros(len(data))
        output[:] = np.nan
        for i in range(1, len(data)):
            if np.isnan(output[i - 1]): output[i] = arr[i]
            else: output[i] = output[i - 1] * alpha + arr[i]
        return pd.Series(output, index=data.index) * kappa

    def _apply_pca_to_features(self, df: pd.DataFrame, feature_prefix: str, n_components: int) -> pd.DataFrame:
        pca_features = df.filter(regex=f'^{feature_prefix}').copy()
        if pca_features.shape[1] < n_components:
            return pd.DataFrame(index=df.index)
        pca_features.dropna(inplace=True)
        if pca_features.empty or pca_features.shape[1] < n_components:
            return pd.DataFrame(index=df.index)
        scaler = StandardScaler()
        scaled_features = scaler.fit_transform(pca_features)
        pca = PCA(n_components=n_components)
        principal_components = pca.fit_transform(scaled_features)
        pc_df = pd.DataFrame(data=principal_components, columns=[f'PCA_{feature_prefix}_{i}' for i in range(n_components)], index=pca_features.index)
        return pc_df

    def _calculate_rsi_divergence(self, g: pd.DataFrame, lookback: int = 14) -> pd.DataFrame:
        low_prices = g['Low'].rolling(window=lookback, center=False).min()
        rsi_at_low = g['RSI'][g['Low'] == low_prices]
        high_prices = g['High'].rolling(window=lookback, center=False).max()
        rsi_at_high = g['RSI'][g['High'] == high_prices]
        price_makes_lower_low = (low_prices < low_prices.shift(1)).astype(int)
        rsi_makes_higher_low = (rsi_at_low > rsi_at_low.shift(1)).reindex(g.index).fillna(0).astype(int)
        g['rsi_bullish_divergence'] = (price_makes_lower_low & rsi_makes_higher_low)
        price_makes_higher_high = (high_prices > high_prices.shift(1)).astype(int)
        rsi_makes_lower_high = (rsi_at_high < rsi_at_high.shift(1)).reindex(g.index).fillna(0).astype(int)
        return g

    def _calculate_hoffman_features(self, g: pd.DataFrame) -> pd.DataFrame:
        ema20 = g['Close'].ewm(span=20, adjust=False).mean()
        g['EMA_20_slope'] = ema20.diff()
        candle_range = g['High'] - g['Low']
        candle_range = candle_range.replace(0, np.nan)
        is_strong_uptrend = g['EMA_20_slope'] > g['EMA_20_slope'].rolling(10).mean()
        is_strong_downtrend = g['EMA_20_slope'] < g['EMA_20_slope'].rolling(10).mean()
        g['is_hoffman_irb_bullish'] = (is_strong_uptrend & (((g['Close'] - g['Low']) / candle_range.replace(0,1)) < 0.45) & (((g['Open'] - g['Low']) / candle_range.replace(0,1)) < 0.45)).astype(int)
        g['is_hoffman_irb_bearish'] = (is_strong_downtrend & (((g['High'] - g['Close']) / candle_range.replace(0,1)) < 0.45) & (((g['High'] - g['Open']) / candle_range.replace(0,1)) < 0.45)).astype(int)
        return g

    def _calculate_ict_features(self, g: pd.DataFrame, swing_lookback: int = 10) -> pd.DataFrame:
        bullish_fvg_condition = g['High'].shift(2) < g['Low']
        bearish_fvg_condition = g['Low'].shift(2) > g['High']
        g['fvg_bullish_exists'] = bullish_fvg_condition.astype(int)
        g['fvg_bearish_exists'] = bearish_fvg_condition.astype(int)
        swing_highs = g['High'].rolling(swing_lookback*2+1, center=True).max()
        swing_lows = g['Low'].rolling(swing_lookback*2+1, center=True).min()
        g['liquidity_grab_up'] = ((g['High'] > swing_highs.shift(1)) & (g['Close'] < swing_highs.shift(1))).astype(int)
        g['liquidity_grab_down'] = ((g['Low'] < swing_lows.shift(1)) & (g['Close'] > swing_lows.shift(1))).astype(int)
        g['choch_up_signal'] = (g['Close'] > swing_highs.shift(1)).astype(int)
        g['choch_down_signal'] = (g['Close'] < swing_lows.shift(1)).astype(int)
        return g

    def _calculate_market_structure(self, g: pd.DataFrame, swing_lookback: int = 10) -> pd.DataFrame:
        window = swing_lookback * 2 + 1
        local_highs = g['High'].rolling(window, center=True, min_periods=window).max()
        local_lows = g['Low'].rolling(window, center=True, min_periods=window).min()
        swing_high_points = g['High'][g['High'] == local_highs]
        swing_low_points = g['Low'][g['Low'] == local_lows]
        g['swing_high'] = swing_high_points.ffill()
        g['swing_low'] = swing_low_points.ffill()
        g['bos_up_signal'] = (g['Close'] > g['swing_high'].shift(1)).astype(int)
        g['bos_down_signal'] = (g['Close'] < g['swing_low'].shift(1)).astype(int)
        g['bos_up_since'] = g.groupby((g['bos_up_signal'] == 1).cumsum()).cumcount()
        g['bos_down_since'] = g.groupby((g['bos_down_signal'] == 1).cumsum()).cumcount()
        g.drop(columns=['swing_high', 'swing_low'], inplace=True, errors='ignore')
        return g

    def _calculate_volatility_regime(self, g:pd.DataFrame, hurst_window:int=100) -> pd.DataFrame:
        g['hurst_exponent'] = g['Close'].rolling(window=hurst_window).apply(lambda x: compute_Hc(x, kind='price', simplified=True)[0] if len(x)==hurst_window else np.nan, raw=False)
        g['market_mode'] = pd.cut(g['hurst_exponent'], bins=[0, 0.4, 0.6, 1], labels=[-1, 0, 1], right=False)
        bb_width_rank = g['bollinger_bandwidth'].rolling(hurst_window).rank(pct=True)
        g['bollinger_squeeze'] = (bb_width_rank < 0.1).astype(int)
        return g

    def _calculate_zscores_and_interactions(self, g:pd.DataFrame, z_window:int=50) -> pd.DataFrame:
        for col in ['RSI', 'momentum_20', 'MACD_hist']:
             if col in g.columns:
                mean = g[col].rolling(window=z_window).mean()
                std = g[col].rolling(window=z_window).std().replace(0, np.nan)
                g[f'{col}_zscore'] = (g[col] - mean) / std
        g['momentum_20_norm_atr'] = g['momentum_20'] / g['ATR'].replace(0, np.nan)
        g['adx_x_rsi'] = (g['ADX'] / 50.0) * (g['RSI'] / 100.0)
        if 'hurst_exponent' in g.columns:
            g['hurst_x_adx'] = g['hurst_exponent'] * g['ADX']
        return g

    def _calculate_support_resistance(self, g: pd.DataFrame, period: int = 20) -> pd.DataFrame:
        g[f'support_level_{period}'] = g['Low'].rolling(window=period).min()
        g[f'resistance_level_{period}'] = g['High'].rolling(window=period).max()
        return g

    def _enhance_volume_features(self, g: pd.DataFrame, spike_multiplier: float = 2.0, spike_window: int = 50) -> pd.DataFrame:
        if 'volume' in g.columns and g['volume'].sum() > 0:
            volume_ma = g['volume'].rolling(window=spike_window).mean()
            g['volume_spike'] = (g['volume'] > volume_ma * spike_multiplier).astype(int)
        else:
            g['volume_spike'] = 0
        return g

    def _calculate_adx(self, g:pd.DataFrame, period:int) -> pd.DataFrame:
        df=g.copy();alpha=1/period;df['tr']=pd.concat([df['High']-df['Low'],abs(df['High']-df['Close'].shift()),abs(df['Low']-df['Close'].shift())],axis=1).max(axis=1)
        df['dm_plus']=((df['High']-df['High'].shift())>(df['Low'].shift()-df['Low'])).astype(int)*(df['High']-df['High'].shift()).clip(lower=0)
        df['dm_minus']=((df['Low'].shift()-df['Low'])>(df['High']-df['High'].shift())).astype(int)*(df['Low'].shift()-df['Low']).clip(lower=0)
        atr_adx=df['tr'].ewm(alpha=alpha,adjust=False).mean();di_plus=100*(df['dm_plus'].ewm(alpha=alpha,adjust=False).mean()/atr_adx.replace(0,1e-9))
        di_minus=100*(df['dm_minus'].ewm(alpha=alpha,adjust=False).mean()/atr_adx.replace(0,1e-9));dx=100*(abs(di_plus-di_minus)/(di_plus+di_minus).replace(0,1e-9))
        g['ADX']=dx.ewm(alpha=alpha,adjust=False).mean();return g

    def _calculate_bollinger_bands(self, g:pd.DataFrame, period:int) -> pd.DataFrame:
        rolling_close=g['Close'].rolling(window=period);middle_band=rolling_close.mean();std_dev=rolling_close.std()
        g['bollinger_upper'] = middle_band + (std_dev * 2); g['bollinger_lower'] = middle_band - (std_dev * 2)
        g['bollinger_middle'] = middle_band
        g['bollinger_bandwidth'] = (g['bollinger_upper'] - g['bollinger_lower']) / middle_band.replace(0,np.nan); return g

    def _calculate_stochastic(self, g:pd.DataFrame, period:int) -> pd.DataFrame:
        low_min=g['Low'].rolling(window=period).min();high_max=g['High'].rolling(window=period).max()
        g['stoch_k']=100*(g['Close']-low_min)/(high_max-low_min).replace(0,np.nan);g['stoch_d']=g['stoch_k'].rolling(window=3).mean();return g

    def _calculate_momentum(self, g:pd.DataFrame) -> pd.DataFrame:
        g['momentum_10'] = g['Close'].diff(10)
        g['momentum_20'] = g['Close'].diff(20)
        g['pct_change'] = g['Close'].pct_change()
        g['log_returns'] = np.log(g['Close'] / g['Close'].shift(1))
        return g

    def _calculate_seasonality(self, g: pd.DataFrame) -> pd.DataFrame:
        g['month'] = g.index.month
        g['week_of_year'] = g.index.isocalendar().week.astype(int)
        g['day_of_month'] = g.index.day
        g['hour'] = g.index.hour
        g['day_of_week'] = g.index.dayofweek
        return g

    def _calculate_candle_microstructure(self, g: pd.DataFrame) -> pd.DataFrame:
        g['candle_body_size'] = abs(g['Close'] - g['Open'])
        g['upper_wick'] = g['High'] - g[['Open', 'Close']].max(axis=1)
        g['lower_wick'] = g[['Open', 'Close']].min(axis=1) - g['Low']
        candle_range = (g['High'] - g['Low']).replace(0, np.nan)
        g['wick_to_body_ratio'] = (g['upper_wick'] + g['lower_wick']) / g['candle_body_size'].replace(0, 1e-9)
        g['is_doji'] = (g['candle_body_size'] / g['ATR'].replace(0,1)).lt(0.1).astype(int)
        g['is_engulfing'] = ((g['candle_body_size'] > abs(g['Close'].shift() - g['Open'].shift())) & (np.sign(g['Close']-g['Open']) != np.sign(g['Close'].shift()-g['Open'].shift()))).astype(int)
        g['candle_body_size_vs_atr'] = g['candle_body_size'] / g['ATR'].replace(0, 1)
        g['candle_body_to_range_ratio'] = g['candle_body_size'] / candle_range
        return g

    def _calculate_indicator_dynamics(self, g: pd.DataFrame, period: int = 5) -> pd.DataFrame:
        def get_slope(series):
            if len(series) < 2 or series.isnull().all(): return np.nan
            series_float = series.fillna(method='ffill').fillna(method='bfill').astype(float)
            if series_float.isnull().all(): return np.nan
            return np.polyfit(np.arange(len(series_float)), series_float, 1)[0]
        g['RSI_slope'] = g['RSI'].rolling(window=period).apply(get_slope, raw=False)
        g['momentum_10_slope'] = g['momentum_10'].rolling(window=period).apply(get_slope, raw=False)
        if 'MACD_hist' in g.columns:
            g['MACD_hist_slope'] = g['MACD_hist'].rolling(window=period).apply(get_slope, raw=False)
        if 'ADX' in g.columns:
            g['ADX_slope'] = g['ADX'].rolling(window=period).apply(get_slope, raw=False)
        g['RSI_slope_acceleration'] = g['RSI_slope'].diff()
        g['momentum_10_slope_acceleration'] = g['momentum_10_slope'].diff()
        return g

    def _calculate_markov_features(self, g: pd.DataFrame) -> pd.DataFrame:
        candle_color = np.sign(g['Close'] - g['Open']).fillna(0)
        blocks = (candle_color != candle_color.shift()).cumsum()
        streaks = candle_color.groupby(blocks).cumsum()
        g['markov_streak'] = streaks
        return g

    def _calculate_htf_features(self,df:pd.DataFrame,p:str,s:int,a:int)->pd.DataFrame:
        tf_id = p.upper()
        results=[]
        def get_rolling_slope(series, window):
            if series.notna().sum() < 2: return np.nan
            series_clean = series.dropna()
            if len(series_clean) < 2: return np.nan
            return np.polyfit(series_clean.index.astype(np.int64) // 10**9, series_clean.values, 1)[0]
        for symbol,group in df.groupby('Symbol'):
            g=group.copy()
            sma=g['Close'].rolling(s,min_periods=s).mean()
            atr=(g['High']-g['Low']).rolling(a,min_periods=a).mean()
            trend=np.sign(g['Close']-sma)
            lin_reg_slope = g['Close'].rolling(window=s).apply(get_rolling_slope, raw=False, args=(s,))
            temp_df=pd.DataFrame(index=g.index)
            temp_df[f'{tf_id}_ctx_SMA']=sma
            temp_df[f'{tf_id}_ctx_ATR']=atr
            temp_df[f'{tf_id}_ctx_Trend']=trend
            temp_df[f'{tf_id}_ctx_LinRegSlope'] = lin_reg_slope
            shifted_df=temp_df.shift(1);shifted_df['Symbol']=symbol;results.append(shifted_df)
        if not results: return pd.DataFrame()
        return pd.concat(results).reset_index()

    def _calculate_base_tf_native(self, g:pd.DataFrame)->pd.DataFrame:
        g_out = g.copy()
        lookback=14

        # Block 1: Core Indicators (Dependencies for other features)
        g_out['ATR']=(g_out['High']-g_out['Low']).rolling(lookback).mean()
        delta=g_out['Close'].diff()
        gain=delta.where(delta > 0,0).ewm(com=lookback-1,adjust=False).mean()
        loss=-delta.where(delta < 0,0).ewm(com=lookback-1,adjust=False).mean()
        rs = gain / loss.replace(0, 1e-9)
        g_out['RSI']=100-(100/(1+rs))
        g_out=self._calculate_adx(g_out,lookback)
        g_out=self._calculate_bollinger_bands(g_out,self.config.BOLLINGER_PERIOD)
        g_out=self._calculate_stochastic(g_out,self.config.STOCHASTIC_PERIOD)
        g_out = self._calculate_momentum(g_out)
        g_out['EMA_20'] = g_out['Close'].ewm(span=20, adjust=False).mean()
        g_out['EMA_50'] = g_out['Close'].ewm(span=50, adjust=False).mean()
        g_out['EMA_100'] = g_out['Close'].ewm(span=100, adjust=False).mean()
        g_out['EMA_200'] = g_out['Close'].ewm(span=200, adjust=False).mean()
        ema_12 = g_out['Close'].ewm(span=12, adjust=False).mean()
        ema_26 = g_out['Close'].ewm(span=26, adjust=False).mean()
        g_out['MACD_line'] = ema_12 - ema_26
        g_out['MACD_signal'] = g_out['MACD_line'].ewm(span=9, adjust=False).mean()
        g_out['MACD_hist'] = g_out['MACD_line'] - g_out['MACD_signal']
        
        # Block 2: Candle & Volume Features (Depend on Block 1)
        g_out = self._calculate_candle_microstructure(g_out)
        if 'RealVolume' in g_out.columns:
            g_out['volume'] = g_out['RealVolume']
        else:
            g_out['volume'] = 0

        # Block 3: Simple Features (Depend on Block 1 & 2)
        g_out['overnight_gap_pct'] = (g_out['Open'] - g_out['Close'].shift(1)) / g_out['Close'].shift(1).replace(0, np.nan)
        g_out['intraday_range_pct'] = (g_out['High'] - g_out['Low']) / g_out['Open'].replace(0, np.nan)
        g_out['close_vs_open_pct'] = (g_out['Close'] - g_out['Open']) / g_out['Open'].replace(0, np.nan)
        g_out['ema_20_vs_50'] = (g_out['EMA_20'] - g_out['EMA_50']) / g_out['EMA_50'].replace(0, np.nan)
        g_out['ema_50_vs_200'] = (g_out['EMA_50'] - g_out['EMA_200']) / g_out['EMA_200'].replace(0, np.nan)
        g_out['is_bullish_hammer'] = ((g_out['lower_wick'] > 2 * g_out['candle_body_size']) & (g_out['upper_wick'] < g_out['candle_body_size']) & (g_out['Close'] > g_out['Open'])).astype(int)
        g_out['is_bearish_shooting_star'] = ((g_out['upper_wick'] > 2 * g_out['candle_body_size']) & (g_out['lower_wick'] < g_out['candle_body_size']) & (g_out['Close'] < g_out['Open'])).astype(int)
        if g_out['volume'].sum() > 0:
            g_out['volume_ma_ratio'] = g_out['volume'] / g_out['volume'].rolling(20).mean().replace(0, np.nan)
            g_out['volume_trend'] = g_out['volume'].rolling(5).apply(lambda x: np.polyfit(np.arange(len(x)), x, 1)[0] if x.notna().all() else np.nan, raw=False)
        else:
            g_out['volume_ma_ratio'] = 0; g_out['volume_trend'] = 0
        g_out['momentum_5'] = g_out['Close'].pct_change(5)
        g_out['momentum_10_vs_20'] = g_out['momentum_10'] - g_out['momentum_20']
        g_out['atr_ratio'] = g_out['ATR'] / g_out['ATR'].rolling(20).mean().replace(0, np.nan)
        g_out['volatility_change'] = g_out['ATR'].pct_change()

        # Block 4: Derived & Advanced Features
        g_out = self._calculate_indicator_dynamics(g_out)
        g_out = self._calculate_markov_features(g_out)
        g_out = self._calculate_seasonality(g_out)
        g_out['market_regime']=np.where(g_out['ADX']>self.config.TREND_FILTER_THRESHOLD,1,0)
        sma_fast = g_out['Close'].rolling(window=20).mean(); sma_slow = g_out['Close'].rolling(window=50).mean()
        g_out['primary_model_signal'] = pd.Series(np.where(sma_fast > sma_slow, 1.0, -1.0), index=g_out.index).diff().fillna(0)
        g_out['market_volatility_index'] = g_out['ATR'].rolling(100).rank(pct=True)
        g_out['close_fracdiff'] = self._fractional_differentiation(g_out['Close'], d=0.5)
        g_out['abs_log_returns'] = g_out['log_returns'].abs().fillna(0)
        g_out['volatility_hawkes'] = self.hawkes_process(g_out['abs_log_returns'], kappa=self.config.HAWKES_KAPPA)
        g_out['returns_autocorr_10'] = g_out['log_returns'].rolling(10).corr(g_out['log_returns'].shift(1))
        g_out['donchian_upper'] = g_out['High'].rolling(20).max()
        g_out['donchian_lower'] = g_out['Low'].rolling(20).min()
        g_out['donchian_channel'] = g_out['donchian_upper'] - g_out['donchian_lower']
        g_out['linear_regression'] = g_out['Close'].rolling(window=14).apply(lambda x: np.polyfit(np.arange(len(x)), x, 1)[0], raw=False)
        g_out['SMA_30_weekly'] = g_out['Close'].rolling(window=30*5).mean()
        ha_close = (g_out['Open'] + g_out['High'] + g_out['Low'] + g_out['Close']) / 4
        ha_open = ((g_out['Open'].shift(1) + g_out['Close'].shift(1)) / 2).bfill()
        g_out['ha_body_size'] = abs(ha_close - ha_open)
        g_out['ha_color'] = np.sign(ha_close - ha_open)
        g_out['ha_streak'] = g_out.groupby((g_out['ha_color'] != g_out['ha_color'].shift()).cumsum())['ha_color'].cumsum().abs()
        g_out['fractal_up'] = ((g_out['High'] > g_out['High'].shift(1)) & (g_out['High'] > g_out['High'].shift(2)) & (g_out['High'] > g_out['High'].shift(-1)) & (g_out['High'] > g_out['High'].shift(-2))).astype(int)
        g_out['fractal_down'] = ((g_out['Low'] < g_out['Low'].shift(1)) & (g_out['Low'] < g_out['Low'].shift(2)) & (g_out['Low'] < g_out['Low'].shift(-1)) & (g_out['Low'] < g_out['Low'].shift(-2))).astype(int)
        
        g_out = self._enhance_volume_features(g_out)
        if 'pct_change' in g_out.columns:
            if g_out.index.nlevels > 1:
                g_out['relative_strength'] = (g_out['pct_change'] - g_out.groupby(level=0)['pct_change'].transform('mean'))
            else: g_out['relative_strength'] = 0
        else: g_out['relative_strength'] = 0
            
        g_out = self._calculate_rsi_divergence(g_out, lookback=lookback)
        g_out = self._calculate_hoffman_features(g_out)
        g_out = self._calculate_ict_features(g_out, swing_lookback=10)
        g_out = self._calculate_market_structure(g_out, swing_lookback=10)
        g_out = self._calculate_volatility_regime(g_out, hurst_window=100)
        g_out = self._calculate_support_resistance(g_out, period=20)
        g_out = self._calculate_zscores_and_interactions(g_out, z_window=50)

        if g_out['volume'].sum() > 0:
            tpv = ((g_out['High'] + g_out['Low'] + g_out['Close']) / 3) * g_out['volume']
            cum_volume = g_out.groupby(g_out.index.date)['volume'].transform('cumsum')
            cum_tpv = g_out.groupby(g_out.index.date).apply(lambda x: tpv.loc[x.index].cumsum()).reset_index(level=0, drop=True)
            g_out['VWAP'] = cum_tpv / cum_volume.replace(0, np.nan)
            g_out['VWAP'] = g_out['VWAP'].ffill()
            g_out['price_to_vwap'] = (g_out['Close'] - g_out['VWAP']) / g_out['ATR'].replace(0, np.nan)
            g_out['price_vs_vwap_sign'] = np.sign(g_out['Close'] - g_out['VWAP'])
            g_out['vwap_slope'] = g_out['VWAP'].diff()
        else:
            g_out['VWAP']=np.nan; g_out['price_to_vwap']=np.nan; g_out['price_vs_vwap_sign']=np.nan; g_out['vwap_slope']=np.nan
        return g_out

    def _calculate_relative_performance(self, df: pd.DataFrame) -> pd.DataFrame:
        if 'pct_change' not in df.columns:
            return df
        if 'Symbol' in df.columns and df['Symbol'].nunique() > 1:
            df['avg_market_pct_change'] = df.groupby(level=0)['pct_change'].transform('mean')
            df['relative_performance'] = df['pct_change'] - df['avg_market_pct_change']
        else:
            df['relative_performance'] = 0
        return df

    def _process_single_symbol_stack(self, data_by_tf_single_symbol: Dict[str, pd.DataFrame]) -> pd.DataFrame:
        base_tf, medium_tf, high_tf = self.roles['base'], self.roles['medium'], self.roles['high']
        df_base = data_by_tf_single_symbol[base_tf]
        df_base_featured = self._calculate_base_tf_native(df_base)
        df_merged = df_base_featured.reset_index()

        if medium_tf and medium_tf in data_by_tf_single_symbol and not data_by_tf_single_symbol[medium_tf].empty:
            df_medium_ctx = self._calculate_htf_features(data_by_tf_single_symbol[medium_tf], medium_tf, 50, 14)
            if not df_medium_ctx.empty:
                df_merged = pd.merge_asof(df_merged.sort_values('Timestamp'), df_medium_ctx.sort_values('Timestamp'), on='Timestamp', by='Symbol', direction='backward')

        if high_tf and high_tf in data_by_tf_single_symbol and not data_by_tf_single_symbol[high_tf].empty:
            df_high_ctx = self._calculate_htf_features(data_by_tf_single_symbol[high_tf], high_tf, 20, 14)
            if not df_high_ctx.empty:
                df_merged = pd.merge_asof(df_merged.sort_values('Timestamp'), df_high_ctx.sort_values('Timestamp'), on='Timestamp', by='Symbol', direction='backward')

        df_final = df_merged.set_index('Timestamp').copy()
        del df_merged, df_base_featured

        if medium_tf and 'ADX' in df_final.columns and f'{medium_tf.upper()}_ctx_Trend' in df_final.columns:
            df_final[f'adx_x_{medium_tf.upper()}_trend'] = df_final['ADX'] * df_final[f'{medium_tf.upper()}_ctx_Trend']
        if high_tf and 'ATR' in df_final.columns and f'{high_tf.upper()}_ctx_Trend' in df_final.columns:
            df_final[f'atr_x_{high_tf.upper()}_trend'] = df_final['ATR'] * df_final[f'{high_tf.upper()}_ctx_Trend']
            if f'{high_tf.upper()}_ctx_ATR' in df_final.columns:
                df_final['atr_vs_daily_atr'] = df_final['ATR'] / df_final[f'{high_tf.upper()}_ctx_ATR'].replace(0, 1)

        if getattr(self.config, 'USE_PCA_REDUCTION', False):
            rsi_pc_df = self._apply_pca_to_features(df_final, 'rsi_', self.config.PCA_N_COMPONENTS)
            if not rsi_pc_df.empty:
                df_final = df_final.join(rsi_pc_df)
                df_final.drop(columns=[c for c in df_final.columns if c.startswith('rsi_')], inplace=True)
        df_final['anomaly_score'] = self._get_anomaly_scores(df_final, self.config.anomaly_contamination_factor)
        return df_final

    def create_feature_stack(self, data_by_tf: Dict[str, pd.DataFrame]) -> pd.DataFrame:
        logger.info("-> Stage 2: Engineering Features (Processing Symbol-by-Symbol)...")
        base_tf = self.roles['base']
        if base_tf not in data_by_tf:
            logger.critical(f"Base timeframe '{base_tf}' data is missing. Cannot proceed.")
            return pd.DataFrame()

        all_symbols_processed_dfs = []
        unique_symbols = data_by_tf[base_tf]['Symbol'].unique()

        for i, symbol in enumerate(unique_symbols):
            logger.info(f"  - ({i+1}/{len(unique_symbols)}) Processing features for symbol: {symbol}")
            symbol_specific_data = {tf: df[df['Symbol'] == symbol].copy() for tf, df in data_by_tf.items()}
            processed_symbol_df = self._process_single_symbol_stack(symbol_specific_data)
            del symbol_specific_data
            if not processed_symbol_df.empty:
                all_symbols_processed_dfs.append(processed_symbol_df)

        if not all_symbols_processed_dfs:
            logger.critical("Feature engineering resulted in no processable data across all symbols.")
            return pd.DataFrame()

        final_df = pd.concat(all_symbols_processed_dfs, sort=False).sort_index()
        del all_symbols_processed_dfs
        
        final_df = self._calculate_relative_performance(final_df)
        feature_cols = [c for c in final_df.columns if c not in ['Open','High','Low','Close','RealVolume','Symbol']]
        final_df[feature_cols] = final_df.groupby('Symbol', sort=False)[feature_cols].shift(1)
        final_df.replace([np.inf,-np.inf],np.nan,inplace=True)
        core_features = ['ATR', 'RSI', 'ADX', 'EMA_50', 'EMA_200']
        final_df.dropna(subset=core_features, inplace=True)
        
        logger.info(f"[SUCCESS] Feature engineering complete. Final dataset shape: {final_df.shape}")
        return final_df

    def label_outcomes(self,df:pd.DataFrame,lookahead:int)->pd.DataFrame:
        logger.info("  - Generating trade labels with VOLATILITY-ADJUSTED DYNAMIC BARRIERS...");
        labeled_dfs=[self._label_group(group,lookahead) for _,group in df.groupby('Symbol')];
        return pd.concat(labeled_dfs)

    def _label_group(self,group:pd.DataFrame,lookahead:int)->pd.DataFrame:
        group = group.copy()
        if 'ATR' not in group.columns or len(group) < lookahead + 1:
            group['target'] = 0; return group
        tp_multiplier = getattr(self.config, 'TP_ATR_MULTIPLIER', 2.0)
        sl_multiplier = getattr(self.config, 'SL_ATR_MULTIPLIER', 1.5)
        profit_target_points = group['ATR'] * tp_multiplier
        stop_loss_points = group['ATR'] * sl_multiplier
        outcomes = np.zeros(len(group))
        prices, highs, lows = group['Close'].values, group['High'].values, group['Low'].values
        for i in range(len(group) - lookahead):
            sl_dist, tp_dist = stop_loss_points.iloc[i], profit_target_points.iloc[i]
            if pd.isna(sl_dist) or sl_dist <= 1e-9: continue
            tp_long_level, sl_long_level = prices[i] + tp_dist, prices[i] - sl_dist
            tp_short_level, sl_short_level = prices[i] - tp_dist, prices[i] + sl_dist
            future_highs, future_lows = highs[i+1 : i+1+lookahead], lows[i+1 : i+1+lookahead]
            hit_tp_long_idx = np.where(future_highs >= tp_long_level)[0]
            hit_sl_long_idx = np.where(future_lows <= sl_long_level)[0]
            first_tp_long = hit_tp_long_idx[0] if len(hit_tp_long_idx) > 0 else np.inf
            first_sl_long = hit_sl_long_idx[0] if len(hit_sl_long_idx) > 0 else np.inf
            hit_tp_short_idx = np.where(future_lows <= tp_short_level)[0]
            hit_sl_short_idx = np.where(future_highs >= sl_short_level)[0]
            first_tp_short = hit_tp_short_idx[0] if len(hit_tp_short_idx) > 0 else np.inf
            first_sl_short = hit_sl_short_idx[0] if len(hit_sl_short_idx) > 0 else np.inf
            if first_tp_long < first_sl_long: outcomes[i] = 1
            if first_tp_short < first_sl_short: outcomes[i] = -1
        group['target'] = outcomes
        return group

    def _label_meta_group(self, group: pd.DataFrame, lookahead: int) -> pd.DataFrame:
        group = group.copy()
        if 'primary_model_signal' not in group.columns or len(group) < lookahead + 1:
            group['target'] = 0; return group
        is_trending = group['market_regime'] == 1
        sl_multiplier = np.where(is_trending, 2.0, 1.5)
        tp_multiplier = np.where(is_trending, 4.0, 2.5)
        sl_atr_dynamic, tp_atr_dynamic = group['ATR'] * sl_multiplier, group['ATR'] * tp_multiplier
        outcomes = np.zeros(len(group))
        prices, lows, highs = group['Close'].values, group['Low'].values, group['High'].values
        primary_signals = group['primary_model_signal'].values
        min_return = getattr(self.config, 'LABEL_MIN_RETURN_PCT', 0.004)
        for i in range(len(group) - lookahead):
            signal = primary_signals[i]
            if signal == 0: continue
            sl_dist, tp_dist = sl_atr_dynamic[i], tp_atr_dynamic[i]
            if pd.isna(sl_dist) or sl_dist <= 1e-9: continue
            future_highs, future_lows = highs[i + 1:i + 1 + lookahead], lows[i + 1:i + 1 + lookahead]
            if signal > 0:
                tp_long, sl_long = prices[i] + tp_dist, prices[i] - sl_dist
                if (tp_long / prices[i] - 1) <= min_return: continue
                time_to_tp, time_to_sl = np.where(future_highs >= tp_long)[0], np.where(future_lows <= sl_long)[0]
                if len(time_to_tp) > 0 and (len(time_to_sl) == 0 or time_to_tp[0] < time_to_sl[0]):
                    outcomes[i] = 1
            elif signal < 0:
                tp_short, sl_short = prices[i] - tp_dist, prices[i] + sl_dist
                if (prices[i] / tp_short - 1) <= min_return: continue
                time_to_tp, time_to_sl = np.where(future_lows <= tp_short)[0], np.where(future_highs >= sl_short)[0]
                if len(time_to_tp) > 0 and (len(time_to_sl) == 0 or time_to_tp[0] < time_to_sl[0]):
                    outcomes[i] = 1
        group['target'] = outcomes
        return group

    def label_meta_outcomes(self, df: pd.DataFrame, lookahead: int) -> pd.DataFrame:
        logger.info("  - Generating BINARY meta-labels (1=correct, 0=incorrect)...")
        labeled_dfs = [self._label_meta_group(group, lookahead) for _, group in df.groupby('Symbol')]
        if not labeled_dfs: return pd.DataFrame()
        return pd.concat(labeled_dfs)
