diff --git a/models/ATTENTION_TRAINING_REPORT_20260125_060049.md b/models/ATTENTION_TRAINING_REPORT_20260125_060049.md new file mode 100644 index 0000000..7eeb8ff --- /dev/null +++ b/models/ATTENTION_TRAINING_REPORT_20260125_060049.md @@ -0,0 +1,118 @@ +# Attention Score Model Training Report + +**Generated:** 2026-01-25 06:00:49 + +## Overview + +The attention model learns to identify high-flow market moments using volume, volatility, and money flow indicators - WITHOUT hardcoding specific trading hours or sessions. + +## Configuration + +- **Symbols:** XAUUSD +- **Timeframes:** 5m +- **Training Data Cutoff:** 2026-01-20 +- **Training Years:** 1.0 +- **Holdout Years:** 0.1 + +### Model Parameters + +| Parameter | Value | +|-----------|-------| +| Factor Window | 200 | +| Horizon Bars | 3 | +| Low Flow Threshold | 1.0 | +| High Flow Threshold | 2.0 | + +### Features Used (9 total) + +| Feature | Description | +|---------|-------------| +| volume_ratio | volume / rolling_median(volume, 20) | +| volume_z | z-score of volume over 20 periods | +| ATR | Average True Range (14 periods) | +| ATR_ratio | ATR / rolling_median(ATR, 50) | +| CMF | Chaikin Money Flow (20 periods) | +| MFI | Money Flow Index (14 periods) | +| OBV_delta | diff(OBV) / rolling_std(OBV, 20) | +| BB_width | (BB_upper - BB_lower) / close | +| displacement | (close - open) / ATR | + +## Training Results + +| Model | Symbol | TF | Reg MAE | Reg R2 | Clf Acc | Clf F1 | N Train | High Flow % | +|-------|--------|-----|---------|--------|---------|--------|---------|-------------| +| XAUUSD_5m_attention | XAUUSD | 5m | 0.8237 | 0.2248 | 58.35% | 56.10% | 52983 | 40.2% | + + +## Class Distribution (Holdout Set) + +| Model | Low Flow | Medium Flow | High Flow | +|-------|----------|-------------|-----------| +| XAUUSD_5m_attention | 62 (0.9%) | 3890 (58.8%) | 2661 (40.2%) | + + +## Feature Importance + +### XAUUSD_5m_attention + +| Rank | Feature | Combined Importance | +|------|---------|--------------------| +| 1 | ATR_ratio | 0.4268 | +| 2 | ATR | 0.1115 | +| 3 | displacement | 0.0801 | +| 4 | BB_width | 0.0801 | +| 5 | volume_ratio | 0.0776 | +| 6 | CMF | 0.0637 | +| 7 | volume_z | 0.0592 | +| 8 | MFI | 0.0524 | +| 9 | OBV_delta | 0.0486 | + + + +## Interpretation + +### Attention Score (Regression) + +- **< 1.0**: Low flow period - below average market movement expected +- **1.0 - 2.0**: Medium flow period - average market conditions +- **> 2.0**: High flow period - above average movement expected (best trading opportunities) + +### Flow Class (Classification) + +- **0 (low_flow)**: move_multiplier < 1.0 +- **1 (medium_flow)**: 1.0 <= move_multiplier < 2.0 +- **2 (high_flow)**: move_multiplier >= 2.0 + +## Trading Recommendations + +1. **Filter by attention_score**: Only trade when attention_score > 1.0 +2. **Adjust position sizing**: Increase size when attention_score > 2.0 +3. **Combine with base models**: Use attention_score as feature #51 in prediction models +4. **Time-agnostic**: The model identifies flow without hardcoded sessions + +## Usage Example + +```python +from training.attention_trainer import AttentionModelTrainer + +# Load trained models +trainer = AttentionModelTrainer.load('models/attention/') + +# Get attention score for new OHLCV data +attention = trainer.get_attention_score(df_ohlcv, 'XAUUSD', '5m') + +# Filter trades +mask_trade = attention > 1.0 # Only trade in medium/high flow + +# Or use as feature in base models +df['attention_score'] = attention +``` + +## Files Generated + +- `models/attention/{symbol}_{timeframe}_attention/` - Model directories +- `models/attention/trainer_metadata.joblib` - Trainer configuration +- `models/attention/training_summary.csv` - Summary metrics + +--- +*Report generated by Attention Model Training Pipeline* diff --git a/models/ATTENTION_TRAINING_REPORT_20260125_060911.md b/models/ATTENTION_TRAINING_REPORT_20260125_060911.md new file mode 100644 index 0000000..c664c33 --- /dev/null +++ b/models/ATTENTION_TRAINING_REPORT_20260125_060911.md @@ -0,0 +1,294 @@ +# Attention Score Model Training Report + +**Generated:** 2026-01-25 06:09:11 + +## Overview + +The attention model learns to identify high-flow market moments using volume, volatility, and money flow indicators - WITHOUT hardcoding specific trading hours or sessions. + +## Configuration + +- **Symbols:** XAUUSD, EURUSD, BTCUSD, GBPUSD, USDJPY, AUDUSD +- **Timeframes:** 5m, 15m +- **Training Data Cutoff:** 2026-01-20 +- **Training Years:** 1.0 +- **Holdout Years:** 0.1 + +### Model Parameters + +| Parameter | Value | +|-----------|-------| +| Factor Window | 200 | +| Horizon Bars | 3 | +| Low Flow Threshold | 1.0 | +| High Flow Threshold | 2.0 | + +### Features Used (9 total) + +| Feature | Description | +|---------|-------------| +| volume_ratio | volume / rolling_median(volume, 20) | +| volume_z | z-score of volume over 20 periods | +| ATR | Average True Range (14 periods) | +| ATR_ratio | ATR / rolling_median(ATR, 50) | +| CMF | Chaikin Money Flow (20 periods) | +| MFI | Money Flow Index (14 periods) | +| OBV_delta | diff(OBV) / rolling_std(OBV, 20) | +| BB_width | (BB_upper - BB_lower) / close | +| displacement | (close - open) / ATR | + +## Training Results + +| Model | Symbol | TF | Reg MAE | Reg R2 | Clf Acc | Clf F1 | N Train | High Flow % | +|-------|--------|-----|---------|--------|---------|--------|---------|-------------| +| XAUUSD_5m_attention | XAUUSD | 5m | 0.8237 | 0.2248 | 58.35% | 56.10% | 52983 | 40.2% | +| XAUUSD_15m_attention | XAUUSD | 15m | 0.8585 | 0.1183 | 54.28% | 51.34% | 17744 | 41.0% | +| EURUSD_5m_attention | EURUSD | 5m | 0.4217 | 0.2472 | 63.32% | 57.12% | 55849 | 10.5% | +| EURUSD_15m_attention | EURUSD | 15m | 0.5014 | 0.1783 | 61.45% | 55.96% | 18577 | 15.0% | +| BTCUSD_5m_attention | BTCUSD | 5m | 0.5886 | 0.2960 | 62.27% | 55.79% | 79155 | 12.8% | +| BTCUSD_15m_attention | BTCUSD | 15m | 0.6936 | 0.1764 | 60.25% | 56.22% | 26330 | 17.7% | +| GBPUSD_5m_attention | GBPUSD | 5m | 0.5024 | 0.2676 | 59.39% | 56.40% | 55618 | 23.5% | +| GBPUSD_15m_attention | GBPUSD | 15m | 0.5831 | 0.2139 | 59.53% | 56.19% | 18550 | 24.6% | +| USDJPY_5m_attention | USDJPY | 5m | 0.6536 | 0.1863 | 58.56% | 56.72% | 55687 | 26.2% | +| USDJPY_15m_attention | USDJPY | 15m | 0.7212 | 0.0821 | 53.98% | 50.89% | 18567 | 26.6% | +| AUDUSD_5m_attention | AUDUSD | 5m | 0.3715 | -0.2385 | 77.20% | 70.93% | 55315 | 1.9% | +| AUDUSD_15m_attention | AUDUSD | 15m | 0.4988 | -0.1155 | 69.43% | 62.51% | 18387 | 4.6% | + + +## Class Distribution (Holdout Set) + +| Model | Low Flow | Medium Flow | High Flow | +|-------|----------|-------------|-----------| +| XAUUSD_5m_attention | 62 (0.9%) | 3890 (58.8%) | 2661 (40.2%) | +| XAUUSD_15m_attention | 0 (0.0%) | 1312 (59.0%) | 912 (41.0%) | +| EURUSD_5m_attention | 229 (3.3%) | 5964 (86.2%) | 727 (10.5%) | +| EURUSD_15m_attention | 60 (2.6%) | 1908 (82.4%) | 347 (15.0%) | +| BTCUSD_5m_attention | 113 (1.1%) | 9053 (86.1%) | 1347 (12.8%) | +| BTCUSD_15m_attention | 287 (8.2%) | 2597 (74.1%) | 621 (17.7%) | +| GBPUSD_5m_attention | 296 (4.3%) | 4985 (72.2%) | 1621 (23.5%) | +| GBPUSD_15m_attention | 97 (4.2%) | 1648 (71.2%) | 568 (24.6%) | +| USDJPY_5m_attention | 443 (6.4%) | 4661 (67.4%) | 1810 (26.2%) | +| USDJPY_15m_attention | 63 (2.7%) | 1636 (70.7%) | 615 (26.6%) | +| AUDUSD_5m_attention | 231 (3.3%) | 6580 (94.8%) | 130 (1.9%) | +| AUDUSD_15m_attention | 30 (1.3%) | 2181 (94.1%) | 106 (4.6%) | + + +## Feature Importance + +### XAUUSD_5m_attention + +| Rank | Feature | Combined Importance | +|------|---------|--------------------| +| 1 | ATR_ratio | 0.4268 | +| 2 | ATR | 0.1115 | +| 3 | displacement | 0.0801 | +| 4 | BB_width | 0.0801 | +| 5 | volume_ratio | 0.0776 | +| 6 | CMF | 0.0637 | +| 7 | volume_z | 0.0592 | +| 8 | MFI | 0.0524 | +| 9 | OBV_delta | 0.0486 | + +### XAUUSD_15m_attention + +| Rank | Feature | Combined Importance | +|------|---------|--------------------| +| 1 | ATR_ratio | 0.2016 | +| 2 | volume_ratio | 0.1666 | +| 3 | displacement | 0.1320 | +| 4 | volume_z | 0.0976 | +| 5 | BB_width | 0.0921 | +| 6 | ATR | 0.0830 | +| 7 | MFI | 0.0786 | +| 8 | OBV_delta | 0.0763 | +| 9 | CMF | 0.0722 | + +### EURUSD_5m_attention + +| Rank | Feature | Combined Importance | +|------|---------|--------------------| +| 1 | ATR | 0.3272 | +| 2 | ATR_ratio | 0.2003 | +| 3 | BB_width | 0.1141 | +| 4 | volume_z | 0.0970 | +| 5 | volume_ratio | 0.0849 | +| 6 | CMF | 0.0468 | +| 7 | displacement | 0.0468 | +| 8 | MFI | 0.0438 | +| 9 | OBV_delta | 0.0392 | + +### EURUSD_15m_attention + +| Rank | Feature | Combined Importance | +|------|---------|--------------------| +| 1 | ATR_ratio | 0.2958 | +| 2 | volume_z | 0.1386 | +| 3 | volume_ratio | 0.1346 | +| 4 | ATR | 0.1167 | +| 5 | BB_width | 0.0719 | +| 6 | MFI | 0.0636 | +| 7 | CMF | 0.0615 | +| 8 | displacement | 0.0598 | +| 9 | OBV_delta | 0.0574 | + +### BTCUSD_5m_attention + +| Rank | Feature | Combined Importance | +|------|---------|--------------------| +| 1 | ATR_ratio | 0.3239 | +| 2 | BB_width | 0.1286 | +| 3 | volume_ratio | 0.1037 | +| 4 | volume_z | 0.0852 | +| 5 | ATR | 0.0816 | +| 6 | CMF | 0.0793 | +| 7 | displacement | 0.0701 | +| 8 | MFI | 0.0661 | +| 9 | OBV_delta | 0.0616 | + +### BTCUSD_15m_attention + +| Rank | Feature | Combined Importance | +|------|---------|--------------------| +| 1 | ATR_ratio | 0.3038 | +| 2 | volume_ratio | 0.1570 | +| 3 | BB_width | 0.0998 | +| 4 | ATR | 0.0983 | +| 5 | volume_z | 0.0901 | +| 6 | displacement | 0.0679 | +| 7 | CMF | 0.0664 | +| 8 | OBV_delta | 0.0597 | +| 9 | MFI | 0.0569 | + +### GBPUSD_5m_attention + +| Rank | Feature | Combined Importance | +|------|---------|--------------------| +| 1 | ATR | 0.3587 | +| 2 | ATR_ratio | 0.1753 | +| 3 | volume_ratio | 0.1055 | +| 4 | BB_width | 0.0981 | +| 5 | volume_z | 0.0852 | +| 6 | displacement | 0.0514 | +| 7 | CMF | 0.0474 | +| 8 | OBV_delta | 0.0419 | +| 9 | MFI | 0.0365 | + +### GBPUSD_15m_attention + +| Rank | Feature | Combined Importance | +|------|---------|--------------------| +| 1 | ATR_ratio | 0.3105 | +| 2 | volume_z | 0.1476 | +| 3 | volume_ratio | 0.1287 | +| 4 | ATR | 0.1145 | +| 5 | BB_width | 0.0708 | +| 6 | MFI | 0.0605 | +| 7 | CMF | 0.0590 | +| 8 | OBV_delta | 0.0587 | +| 9 | displacement | 0.0499 | + +### USDJPY_5m_attention + +| Rank | Feature | Combined Importance | +|------|---------|--------------------| +| 1 | ATR_ratio | 0.3854 | +| 2 | ATR | 0.1623 | +| 3 | volume_z | 0.1100 | +| 4 | volume_ratio | 0.0971 | +| 5 | BB_width | 0.0901 | +| 6 | displacement | 0.0479 | +| 7 | OBV_delta | 0.0365 | +| 8 | MFI | 0.0359 | +| 9 | CMF | 0.0349 | + +### USDJPY_15m_attention + +| Rank | Feature | Combined Importance | +|------|---------|--------------------| +| 1 | volume_ratio | 0.2208 | +| 2 | volume_z | 0.2110 | +| 3 | ATR_ratio | 0.1173 | +| 4 | BB_width | 0.0934 | +| 5 | displacement | 0.0857 | +| 6 | ATR | 0.0829 | +| 7 | CMF | 0.0666 | +| 8 | OBV_delta | 0.0638 | +| 9 | MFI | 0.0585 | + +### AUDUSD_5m_attention + +| Rank | Feature | Combined Importance | +|------|---------|--------------------| +| 1 | ATR | 0.2316 | +| 2 | volume_ratio | 0.1677 | +| 3 | ATR_ratio | 0.1320 | +| 4 | volume_z | 0.1139 | +| 5 | MFI | 0.0923 | +| 6 | BB_width | 0.0796 | +| 7 | displacement | 0.0718 | +| 8 | CMF | 0.0717 | +| 9 | OBV_delta | 0.0394 | + +### AUDUSD_15m_attention + +| Rank | Feature | Combined Importance | +|------|---------|--------------------| +| 1 | volume_z | 0.1874 | +| 2 | volume_ratio | 0.1795 | +| 3 | BB_width | 0.1206 | +| 4 | ATR_ratio | 0.1140 | +| 5 | ATR | 0.0936 | +| 6 | CMF | 0.0923 | +| 7 | MFI | 0.0819 | +| 8 | displacement | 0.0779 | +| 9 | OBV_delta | 0.0529 | + + + +## Interpretation + +### Attention Score (Regression) + +- **< 1.0**: Low flow period - below average market movement expected +- **1.0 - 2.0**: Medium flow period - average market conditions +- **> 2.0**: High flow period - above average movement expected (best trading opportunities) + +### Flow Class (Classification) + +- **0 (low_flow)**: move_multiplier < 1.0 +- **1 (medium_flow)**: 1.0 <= move_multiplier < 2.0 +- **2 (high_flow)**: move_multiplier >= 2.0 + +## Trading Recommendations + +1. **Filter by attention_score**: Only trade when attention_score > 1.0 +2. **Adjust position sizing**: Increase size when attention_score > 2.0 +3. **Combine with base models**: Use attention_score as feature #51 in prediction models +4. **Time-agnostic**: The model identifies flow without hardcoded sessions + +## Usage Example + +```python +from training.attention_trainer import AttentionModelTrainer + +# Load trained models +trainer = AttentionModelTrainer.load('models/attention/') + +# Get attention score for new OHLCV data +attention = trainer.get_attention_score(df_ohlcv, 'XAUUSD', '5m') + +# Filter trades +mask_trade = attention > 1.0 # Only trade in medium/high flow + +# Or use as feature in base models +df['attention_score'] = attention +``` + +## Files Generated + +- `models/attention/{symbol}_{timeframe}_attention/` - Model directories +- `models/attention/trainer_metadata.joblib` - Trainer configuration +- `models/attention/training_summary.csv` - Summary metrics + +--- +*Report generated by Attention Model Training Pipeline* diff --git a/models/TRAINING_REPORT_20260125_061013.md b/models/TRAINING_REPORT_20260125_061013.md new file mode 100644 index 0000000..1db3244 --- /dev/null +++ b/models/TRAINING_REPORT_20260125_061013.md @@ -0,0 +1,46 @@ +# Symbol-Timeframe Model Training Report + +**Generated:** 2026-01-25 06:10:13 + +## Configuration + +- **Training Data Cutoff:** 2024-12-31 (excluding 2025 for backtesting) +- **Dynamic Factor Weighting:** Enabled +- **Sample Weight Method:** Softplus with beta=4.0, w_max=3.0 + +## Training Results Summary + +| Model | Symbol | Timeframe | Target | MAE | RMSE | R2 | Dir Accuracy | Train | Val | +|-------|--------|-----------|--------|-----|------|----|--------------| ----- | --- | + +## Model Files + +Models saved to: `/mnt/c/Empresas/ISEM/workspace-v2/projects/trading-platform/apps/ml-engine/models/symbol_timeframe_models` + +### Model Naming Convention +- `{symbol}_{timeframe}_high_h{horizon}.joblib` - High range predictor +- `{symbol}_{timeframe}_low_h{horizon}.joblib` - Low range predictor + +## Usage Example + +```python +from training.symbol_timeframe_trainer import SymbolTimeframeTrainer + +# Load trained models +trainer = SymbolTimeframeTrainer() +trainer.load('models/symbol_timeframe_models/') + +# Predict for XAUUSD 15m +predictions = trainer.predict(features, 'XAUUSD', '15m') +print(f"Predicted High: {predictions['high']}") +print(f"Predicted Low: {predictions['low']}") +``` + +## Notes + +1. Models exclude 2025 data for out-of-sample backtesting +2. Dynamic factor weighting emphasizes high-movement samples +3. Separate models for HIGH and LOW predictions per symbol/timeframe + +--- +*Report generated by Symbol-Timeframe Training Pipeline* diff --git a/models/TRAINING_REPORT_20260125_061049.md b/models/TRAINING_REPORT_20260125_061049.md new file mode 100644 index 0000000..810edca --- /dev/null +++ b/models/TRAINING_REPORT_20260125_061049.md @@ -0,0 +1,46 @@ +# Symbol-Timeframe Model Training Report + +**Generated:** 2026-01-25 06:10:49 + +## Configuration + +- **Training Data Cutoff:** 2024-12-31 (excluding 2025 for backtesting) +- **Dynamic Factor Weighting:** Enabled +- **Sample Weight Method:** Softplus with beta=4.0, w_max=3.0 + +## Training Results Summary + +| Model | Symbol | Timeframe | Target | MAE | RMSE | R2 | Dir Accuracy | Train | Val | +|-------|--------|-----------|--------|-----|------|----|--------------| ----- | --- | + +## Model Files + +Models saved to: `/mnt/c/Empresas/ISEM/workspace-v2/projects/trading-platform/apps/ml-engine/models/symbol_timeframe_models` + +### Model Naming Convention +- `{symbol}_{timeframe}_high_h{horizon}.joblib` - High range predictor +- `{symbol}_{timeframe}_low_h{horizon}.joblib` - Low range predictor + +## Usage Example + +```python +from training.symbol_timeframe_trainer import SymbolTimeframeTrainer + +# Load trained models +trainer = SymbolTimeframeTrainer() +trainer.load('models/symbol_timeframe_models/') + +# Predict for XAUUSD 15m +predictions = trainer.predict(features, 'XAUUSD', '15m') +print(f"Predicted High: {predictions['high']}") +print(f"Predicted Low: {predictions['low']}") +``` + +## Notes + +1. Models exclude 2025 data for out-of-sample backtesting +2. Dynamic factor weighting emphasizes high-movement samples +3. Separate models for HIGH and LOW predictions per symbol/timeframe + +--- +*Report generated by Symbol-Timeframe Training Pipeline* diff --git a/scripts/ingest_ohlcv_polygon.py b/scripts/ingest_ohlcv_polygon.py index a818751..144bf71 100644 --- a/scripts/ingest_ohlcv_polygon.py +++ b/scripts/ingest_ohlcv_polygon.py @@ -61,12 +61,15 @@ SYMBOL_CONFIG = { } # PostgreSQL connection (matches trading_platform config) +# Note: sslmode=disable for local WSL connections to avoid SSL timeout issues PG_CONFIG = { "host": os.getenv("DB_HOST", "localhost"), "port": int(os.getenv("DB_PORT", "5432")), "dbname": os.getenv("DB_NAME", "trading_platform"), "user": os.getenv("DB_USER", "trading_user"), "password": os.getenv("DB_PASSWORD", "trading_dev_2026"), + "sslmode": os.getenv("DB_SSLMODE", "disable"), + "connect_timeout": 30, } @@ -198,13 +201,12 @@ def get_last_timestamp(conn, ticker_id: int) -> datetime: return row[0] if row and row[0] else None -def insert_bars_to_postgres(conn, ticker_id: int, bars: list) -> int: +def insert_bars_to_postgres(ticker_id: int, bars: list) -> int: """ Insert OHLCV bars into PostgreSQL market_data.ohlcv_5m. - Uses ON CONFLICT for upsert behavior. + Uses ON CONFLICT for upsert behavior. Manages its own connection. Args: - conn: psycopg2 connection ticker_id: Ticker ID from market_data.tickers bars: List of Polygon API bar dicts @@ -244,16 +246,43 @@ def insert_bars_to_postgres(conn, ticker_id: int, bars: list) -> int: ts_ms, # ts_epoch in ms )) - # Insert in batches - batch_size = 5000 + # Insert in smaller batches with fresh connection per batch group + batch_size = 500 # Small batches for reliability total_inserted = 0 + conn = None - with conn.cursor() as cur: + try: + conn = psycopg2.connect(**PG_CONFIG) for i in range(0, len(rows), batch_size): batch = rows[i : i + batch_size] - psycopg2.extras.execute_batch(cur, insert_sql, batch, page_size=1000) - conn.commit() - total_inserted += len(batch) + retries = 3 + while retries > 0: + try: + with conn.cursor() as cur: + psycopg2.extras.execute_batch(cur, insert_sql, batch, page_size=100) + conn.commit() + total_inserted += len(batch) + break + except psycopg2.OperationalError as e: + retries -= 1 + logger.warning(f" DB error ({3 - retries}/3): {str(e)[:50]}") + try: + conn.close() + except Exception: + pass + conn = psycopg2.connect(**PG_CONFIG) + if retries == 0: + raise + # Progress every 10 batches + batch_num = i // batch_size + 1 + if batch_num % 20 == 0: + logger.info(f" Progress: {total_inserted:,} / {len(rows):,} rows") + finally: + if conn: + try: + conn.close() + except Exception: + pass return total_inserted @@ -264,7 +293,6 @@ def insert_bars_to_postgres(conn, ticker_id: int, bars: list) -> int: async def ingest_symbol( api_key: str, - conn, symbol: str, start_date: datetime, end_date: datetime, @@ -272,6 +300,7 @@ async def ingest_symbol( ) -> dict: """ Full ingestion pipeline for a single symbol. + Uses fresh DB connection for each symbol to avoid stale connections. Returns: Dict with results summary @@ -283,55 +312,69 @@ async def ingest_symbol( polygon_symbol = config["polygon"] - # Get ticker ID from DB + # Fresh connection for this symbol try: - ticker_id = get_ticker_id(conn, symbol) - except ValueError as e: - logger.error(str(e)) + conn = psycopg2.connect(**PG_CONFIG) + except Exception as e: + logger.error(f"PostgreSQL connection failed: {e}") return {"symbol": symbol, "status": "error", "error": str(e)} - # In incremental mode, start from last timestamp + 5 min - actual_start = start_date - if incremental: - last_ts = get_last_timestamp(conn, ticker_id) - if last_ts: - actual_start = last_ts + timedelta(minutes=5) - logger.info(f" Incremental: continuing from {actual_start}") + try: + # Get ticker ID from DB + try: + ticker_id = get_ticker_id(conn, symbol) + except ValueError as e: + logger.error(str(e)) + return {"symbol": symbol, "status": "error", "error": str(e)} - if actual_start >= end_date: - logger.info(f" {symbol}: already up to date") - return {"symbol": symbol, "status": "up_to_date", "rows": 0} + # In incremental mode, start from last timestamp + 5 min + actual_start = start_date + if incremental: + last_ts = get_last_timestamp(conn, ticker_id) + if last_ts: + actual_start = last_ts + timedelta(minutes=5) + logger.info(f" Incremental: continuing from {actual_start}") - # Fetch from Polygon - logger.info(f"Fetching {symbol} ({polygon_symbol}) from {actual_start.date()} to {end_date.date()}...") - bars = await fetch_polygon_bars( - api_key=api_key, - polygon_symbol=polygon_symbol, - start_date=actual_start, - end_date=end_date, - ) + if actual_start >= end_date: + logger.info(f" {symbol}: already up to date") + return {"symbol": symbol, "status": "up_to_date", "rows": 0} - if not bars: - logger.warning(f" {symbol}: no data received from API") - return {"symbol": symbol, "status": "no_data", "rows": 0} + # Fetch from Polygon + logger.info(f"Fetching {symbol} ({polygon_symbol}) from {actual_start.date()} to {end_date.date()}...") + bars = await fetch_polygon_bars( + api_key=api_key, + polygon_symbol=polygon_symbol, + start_date=actual_start, + end_date=end_date, + ) - # Show range - first_ts = datetime.fromtimestamp(bars[0]["t"] / 1000) - last_ts = datetime.fromtimestamp(bars[-1]["t"] / 1000) - logger.info(f" {symbol}: {len(bars)} bars from {first_ts} to {last_ts}") + if not bars: + logger.warning(f" {symbol}: no data received from API") + return {"symbol": symbol, "status": "no_data", "rows": 0} - # Insert into PostgreSQL - logger.info(f" Inserting {len(bars)} bars into market_data.ohlcv_5m...") - inserted = insert_bars_to_postgres(conn, ticker_id, bars) - logger.info(f" {symbol}: {inserted} rows inserted/updated") + # Show range + first_ts = datetime.fromtimestamp(bars[0]["t"] / 1000) + last_ts = datetime.fromtimestamp(bars[-1]["t"] / 1000) + logger.info(f" {symbol}: {len(bars)} bars from {first_ts} to {last_ts}") - return { - "symbol": symbol, - "status": "success", - "rows": inserted, - "first_bar": str(first_ts), - "last_bar": str(last_ts), - } + # Insert into PostgreSQL (uses its own connection) + conn.close() # Close read connection before bulk insert + logger.info(f" Inserting {len(bars)} bars into market_data.ohlcv_5m...") + inserted = insert_bars_to_postgres(ticker_id, bars) + logger.info(f" {symbol}: {inserted} rows inserted/updated") + + return { + "symbol": symbol, + "status": "success", + "rows": inserted, + "first_bar": str(first_ts), + "last_bar": str(last_ts), + } + finally: + try: + conn.close() + except Exception: + pass async def main(): @@ -385,15 +428,16 @@ async def main(): logger.info(f"Incremental: {args.incremental}") logger.info(f"Database: {PG_CONFIG['host']}:{PG_CONFIG['port']}/{PG_CONFIG['dbname']}") - # Connect to PostgreSQL + # Test PostgreSQL connectivity try: - conn = psycopg2.connect(**PG_CONFIG) - logger.info("PostgreSQL connected") + test_conn = psycopg2.connect(**PG_CONFIG) + test_conn.close() + logger.info("PostgreSQL connectivity verified") except Exception as e: logger.error(f"PostgreSQL connection failed: {e}") sys.exit(1) - # Process each symbol + # Process each symbol (each gets its own connection) results = [] for symbol in args.symbols: logger.info(f"\n{'='*40}") @@ -402,7 +446,6 @@ async def main(): result = await ingest_symbol( api_key=api_key, - conn=conn, symbol=symbol, start_date=start_date, end_date=end_date, @@ -410,8 +453,6 @@ async def main(): ) results.append(result) - conn.close() - # Print summary logger.info("\n" + "=" * 60) logger.info("INGESTION SUMMARY")