trading-platform-ml-engine/src/data/indicators.py

345 lines
13 KiB
Python

"""
Technical indicators module
Implements the 14 essential indicators identified in the analysis
"""
import pandas as pd
import numpy as np
from typing import Optional, Dict, Any
import pandas_ta as ta
from loguru import logger
class TechnicalIndicators:
"""Calculate technical indicators for trading data"""
def __init__(self):
"""Initialize technical indicators calculator"""
self.minimal_indicators = [
'macd_signal', 'macd_histogram', 'rsi',
'sma_10', 'sma_20', 'sar',
'atr', 'obv', 'ad', 'cmf', 'mfi',
'volume_zscore', 'fractals_high', 'fractals_low'
]
def calculate_all_indicators(
self,
df: pd.DataFrame,
minimal: bool = True
) -> pd.DataFrame:
"""
Calculate all technical indicators
Args:
df: DataFrame with OHLCV data
minimal: If True, only calculate minimal set (14 indicators)
Returns:
DataFrame with indicators added
"""
df_ind = df.copy()
# Ensure we have required columns
required = ['open', 'high', 'low', 'close', 'volume']
if not all(col in df_ind.columns for col in required):
raise ValueError(f"DataFrame must contain columns: {required}")
# MACD
macd = ta.macd(df_ind['close'], fast=12, slow=26, signal=9)
if macd is not None:
df_ind['macd'] = macd['MACD_12_26_9']
df_ind['macd_signal'] = macd['MACDs_12_26_9']
df_ind['macd_histogram'] = macd['MACDh_12_26_9']
# RSI
df_ind['rsi'] = ta.rsi(df_ind['close'], length=14)
# Simple Moving Averages
df_ind['sma_10'] = ta.sma(df_ind['close'], length=10)
df_ind['sma_20'] = ta.sma(df_ind['close'], length=20)
# Parabolic SAR
sar = ta.psar(df_ind['high'], df_ind['low'], df_ind['close'])
if sar is not None:
df_ind['sar'] = sar.iloc[:, 0] # Get the SAR values
# ATR (Average True Range)
df_ind['atr'] = ta.atr(df_ind['high'], df_ind['low'], df_ind['close'], length=14)
# Volume indicators
df_ind['obv'] = ta.obv(df_ind['close'], df_ind['volume'])
df_ind['ad'] = ta.ad(df_ind['high'], df_ind['low'], df_ind['close'], df_ind['volume'])
df_ind['cmf'] = ta.cmf(df_ind['high'], df_ind['low'], df_ind['close'], df_ind['volume'])
df_ind['mfi'] = ta.mfi(df_ind['high'], df_ind['low'], df_ind['close'], df_ind['volume'])
# Volume Z-Score
df_ind['volume_zscore'] = self._calculate_volume_zscore(df_ind['volume'])
# Fractals
df_ind['fractals_high'], df_ind['fractals_low'] = self._calculate_fractals(
df_ind['high'], df_ind['low']
)
if not minimal:
# Add extended indicators
df_ind = self._add_extended_indicators(df_ind)
# Fill NaN values
df_ind = df_ind.fillna(method='ffill').fillna(0)
logger.info(f"Calculated {len(df_ind.columns) - len(df.columns)} indicators")
return df_ind
def _calculate_volume_zscore(
self,
volume: pd.Series,
window: int = 20
) -> pd.Series:
"""
Calculate volume Z-score for anomaly detection
Args:
volume: Volume series
window: Rolling window size
Returns:
Volume Z-score series
"""
vol_mean = volume.rolling(window=window).mean()
vol_std = volume.rolling(window=window).std()
# Avoid division by zero
vol_std = vol_std.replace(0, 1)
zscore = (volume - vol_mean) / vol_std
return zscore
def _calculate_fractals(
self,
high: pd.Series,
low: pd.Series,
n: int = 2
) -> tuple[pd.Series, pd.Series]:
"""
Calculate Williams Fractals
Args:
high: High price series
low: Low price series
n: Number of bars on each side
Returns:
Tuple of (bullish fractals, bearish fractals)
"""
fractals_high = pd.Series(0, index=high.index)
fractals_low = pd.Series(0, index=low.index)
for i in range(n, len(high) - n):
# Bearish fractal (high point)
if high.iloc[i] == high.iloc[i-n:i+n+1].max():
fractals_high.iloc[i] = 1
# Bullish fractal (low point)
if low.iloc[i] == low.iloc[i-n:i+n+1].min():
fractals_low.iloc[i] = 1
return fractals_high, fractals_low
def _add_extended_indicators(self, df: pd.DataFrame) -> pd.DataFrame:
"""Add extended set of indicators for experimentation"""
# Stochastic
stoch = ta.stoch(df['high'], df['low'], df['close'])
if stoch is not None:
df['stoch_k'] = stoch.iloc[:, 0]
df['stoch_d'] = stoch.iloc[:, 1]
# CCI
df['cci'] = ta.cci(df['high'], df['low'], df['close'])
# EMA
df['ema_12'] = ta.ema(df['close'], length=12)
df['ema_26'] = ta.ema(df['close'], length=26)
# ADX
adx = ta.adx(df['high'], df['low'], df['close'])
if adx is not None:
df['adx'] = adx['ADX_14']
# Bollinger Bands
bbands = ta.bbands(df['close'], length=20)
if bbands is not None:
df['bb_upper'] = bbands['BBU_20_2.0']
df['bb_middle'] = bbands['BBM_20_2.0']
df['bb_lower'] = bbands['BBL_20_2.0']
# Keltner Channels
kc = ta.kc(df['high'], df['low'], df['close'])
if kc is not None:
df['kc_upper'] = kc.iloc[:, 0]
df['kc_middle'] = kc.iloc[:, 1]
df['kc_lower'] = kc.iloc[:, 2]
return df
def calculate_partial_hour_features(
self,
df: pd.DataFrame,
timeframe: int = 5
) -> pd.DataFrame:
"""
Calculate partial hour features to prevent look-ahead bias
Based on trading_bot_meta_model implementation
Args:
df: DataFrame with OHLCV data
timeframe: Timeframe in minutes
Returns:
DataFrame with partial hour features added
"""
df_partial = df.copy()
# Ensure datetime index
if not isinstance(df_partial.index, pd.DatetimeIndex):
raise ValueError("DataFrame must have datetime index")
# Calculate hour truncation
df_partial['hour_trunc'] = df_partial.index.floor('H')
# Partial hour OHLCV
df_partial['open_hr_partial'] = df_partial.groupby('hour_trunc')['open'].transform('first')
df_partial['close_hr_partial'] = df_partial['close'] # Current close
df_partial['high_hr_partial'] = df_partial.groupby('hour_trunc')['high'].transform('cummax')
df_partial['low_hr_partial'] = df_partial.groupby('hour_trunc')['low'].transform('cummin')
df_partial['volume_hr_partial'] = df_partial.groupby('hour_trunc')['volume'].transform('cumsum')
# Calculate indicators on partial hour data
partial_cols = ['open_hr_partial', 'close_hr_partial',
'high_hr_partial', 'low_hr_partial', 'volume_hr_partial']
df_temp = df_partial[partial_cols].copy()
df_temp.columns = ['open', 'close', 'high', 'low', 'volume']
# Calculate indicators on partial data
df_ind_partial = self.calculate_all_indicators(df_temp, minimal=True)
# Rename columns to indicate partial
for col in df_ind_partial.columns:
if col not in ['open', 'close', 'high', 'low', 'volume']:
df_partial[f"{col}_hr_partial"] = df_ind_partial[col]
# Drop temporary column
df_partial.drop('hour_trunc', axis=1, inplace=True)
logger.info(f"Added {len([c for c in df_partial.columns if '_hr_partial' in c])} partial hour features")
return df_partial
def calculate_rolling_features(
self,
df: pd.DataFrame,
windows: list = [15, 60, 120]
) -> pd.DataFrame:
"""
Calculate rolling window features
Args:
df: DataFrame with OHLCV data
windows: List of window sizes in minutes (assuming 5-min bars)
Returns:
DataFrame with rolling features added
"""
df_roll = df.copy()
for window_min in windows:
# Convert minutes to number of bars (5-min timeframe)
window_bars = window_min // 5
# Rolling aggregations
df_roll[f'open_{window_min}m'] = df_roll['open'].shift(window_bars - 1)
df_roll[f'high_{window_min}m'] = df_roll['high'].rolling(window_bars).max()
df_roll[f'low_{window_min}m'] = df_roll['low'].rolling(window_bars).min()
df_roll[f'close_{window_min}m'] = df_roll['close'] # Current close
df_roll[f'volume_{window_min}m'] = df_roll['volume'].rolling(window_bars).sum()
# Price changes
df_roll[f'return_{window_min}m'] = df_roll['close'].pct_change(window_bars)
# Volatility
df_roll[f'volatility_{window_min}m'] = df_roll['close'].pct_change().rolling(window_bars).std()
logger.info(f"Added rolling features for windows: {windows}")
return df_roll
def transform_to_ratios(
self,
df: pd.DataFrame,
reference_col: str = 'close'
) -> pd.DataFrame:
"""
Transform price columns to ratios for better model stability
Args:
df: DataFrame with price data
reference_col: Column to use as reference for ratios
Returns:
DataFrame with ratio transformations
"""
df_ratio = df.copy()
price_cols = ['open', 'high', 'low', 'close']
for col in price_cols:
if col in df_ratio.columns and col != reference_col:
df_ratio[f'{col}_ratio'] = (df_ratio[col] / df_ratio[reference_col]) - 1
# Volume ratio to mean
if 'volume' in df_ratio.columns:
vol_mean = df_ratio['volume'].rolling(20).mean()
df_ratio['volume_ratio'] = df_ratio['volume'] / vol_mean.fillna(1)
logger.info("Transformed prices to ratios")
return df_ratio
if __name__ == "__main__":
# Test indicators calculation
# Create sample data
dates = pd.date_range(start='2024-01-01', periods=1000, freq='5min')
np.random.seed(42)
df_test = pd.DataFrame({
'open': 100 + np.random.randn(1000).cumsum(),
'high': 102 + np.random.randn(1000).cumsum(),
'low': 98 + np.random.randn(1000).cumsum(),
'close': 100 + np.random.randn(1000).cumsum(),
'volume': np.random.randint(1000, 10000, 1000)
}, index=dates)
# Ensure high > low
df_test['high'] = df_test[['open', 'high', 'close']].max(axis=1)
df_test['low'] = df_test[['open', 'low', 'close']].min(axis=1)
# Calculate indicators
indicators = TechnicalIndicators()
# Test minimal indicators
df_with_ind = indicators.calculate_all_indicators(df_test, minimal=True)
print(f"Calculated indicators: {[c for c in df_with_ind.columns if c not in df_test.columns]}")
# Test partial hour features
df_partial = indicators.calculate_partial_hour_features(df_with_ind)
partial_cols = [c for c in df_partial.columns if '_hr_partial' in c]
print(f"\nPartial hour features ({len(partial_cols)}): {partial_cols[:5]}...")
# Test rolling features
df_roll = indicators.calculate_rolling_features(df_test, windows=[15, 60])
roll_cols = [c for c in df_roll.columns if 'm' in c and c not in df_test.columns]
print(f"\nRolling features: {roll_cols}")
# Test ratio transformation
df_ratio = indicators.transform_to_ratios(df_test)
ratio_cols = [c for c in df_ratio.columns if 'ratio' in c]
print(f"\nRatio features: {ratio_cols}")