trading-platform-database-v2/ddl/schemas/ml/functions/05-calculate_prediction_accuracy.sql

-- =====================================================
-- ML SCHEMA - CALCULATE PREDICTION ACCURACY FUNCTION
-- =====================================================
-- Description: Function to calculate LLM prediction accuracy metrics
-- Schema: ml
-- Author: Database Agent
-- Date: 2026-01-04
-- Module: OQI-010-llm-trading-integration
-- =====================================================

-- -----------------------------------------------------
-- Function: ml.calculate_llm_prediction_accuracy
-- -----------------------------------------------------
-- Calculates accuracy metrics for LLM predictions
-- Parameters:
--   p_symbol: Trading symbol (required)
--   p_days: Number of days to analyze (default: 30)
-- Returns:
--   total_predictions: Total number of resolved predictions
--   direction_accuracy: Percentage of correct direction predictions
--   target_hit_rate: Percentage of predictions that hit take profit
--   avg_pnl_pips: Average profit/loss in pips
--   profit_factor: Ratio of gross profit to gross loss
--   win_rate: Percentage of profitable trades
--   avg_resolution_candles: Average candles to resolution
-- -----------------------------------------------------

CREATE OR REPLACE FUNCTION ml.calculate_llm_prediction_accuracy(
  p_symbol VARCHAR,
  p_days INT DEFAULT 30
)
RETURNS TABLE(
  total_predictions INT,
  direction_accuracy DECIMAL(5,4),
  target_hit_rate DECIMAL(5,4),
  stop_hit_rate DECIMAL(5,4),
  avg_pnl_pips DECIMAL(10,2),
  profit_factor DECIMAL(10,4),
  win_rate DECIMAL(5,4),
  avg_resolution_candles DECIMAL(10,2)
) AS $$
BEGIN
  RETURN QUERY
  SELECT
    COUNT(*)::INT AS total_predictions,

    -- Direction accuracy (correct predictions / total)
    COALESCE(
      AVG(CASE WHEN o.direction_correct = TRUE THEN 1.0 ELSE 0.0 END)::DECIMAL(5,4),
      0.0
    ) AS direction_accuracy,

    -- Target hit rate (predictions that reached take profit)
    COALESCE(
      AVG(CASE WHEN o.target_reached = TRUE THEN 1.0 ELSE 0.0 END)::DECIMAL(5,4),
      0.0
    ) AS target_hit_rate,

    -- Stop hit rate (predictions that hit stop loss)
    COALESCE(
      AVG(CASE WHEN o.stop_hit = TRUE THEN 1.0 ELSE 0.0 END)::DECIMAL(5,4),
      0.0
    ) AS stop_hit_rate,

    -- Average PnL in pips
    COALESCE(AVG(o.pnl_pips)::DECIMAL(10,2), 0.0) AS avg_pnl_pips,

    -- Profit factor (gross profit / gross loss)
    CASE
      WHEN COALESCE(SUM(CASE WHEN o.pnl_pips < 0 THEN ABS(o.pnl_pips) ELSE 0 END), 0) > 0
      THEN (
        COALESCE(SUM(CASE WHEN o.pnl_pips > 0 THEN o.pnl_pips ELSE 0 END), 0) /
        SUM(CASE WHEN o.pnl_pips < 0 THEN ABS(o.pnl_pips) ELSE 0 END)
      )::DECIMAL(10,4)
      ELSE NULL
    END AS profit_factor,

    -- Win rate (profitable trades / total)
    COALESCE(
      AVG(CASE WHEN o.pnl_pips > 0 THEN 1.0 ELSE 0.0 END)::DECIMAL(5,4),
      0.0
    ) AS win_rate,

    -- Average candles to resolution
    COALESCE(AVG(o.resolution_candles)::DECIMAL(10,2), 0.0) AS avg_resolution_candles

  FROM ml.llm_predictions p
  INNER JOIN ml.llm_prediction_outcomes o ON p.id = o.prediction_id
  WHERE p.symbol = p_symbol
    AND p.created_at >= NOW() - (p_days || ' days')::INTERVAL
    AND o.resolved_at IS NOT NULL;
END;
$$ LANGUAGE plpgsql STABLE;

COMMENT ON FUNCTION ml.calculate_llm_prediction_accuracy(VARCHAR, INT) IS
'Calculates comprehensive accuracy metrics for LLM predictions.

Parameters:
  - p_symbol: Trading symbol to analyze (e.g., XAUUSD, BTCUSDT)
  - p_days: Number of days to look back (default: 30)

Returns:
  - total_predictions: Count of resolved predictions in period
  - direction_accuracy: Ratio of correct direction predictions (0.0 to 1.0)
  - target_hit_rate: Ratio of predictions that hit take profit (0.0 to 1.0)
  - stop_hit_rate: Ratio of predictions that hit stop loss (0.0 to 1.0)
  - avg_pnl_pips: Average profit/loss in pips
  - profit_factor: Gross profit / Gross loss (>1.0 is profitable)
  - win_rate: Ratio of profitable trades (0.0 to 1.0)
  - avg_resolution_candles: Average candles until outcome determined

Example usage:
  SELECT * FROM ml.calculate_llm_prediction_accuracy(''XAUUSD'', 30);
  SELECT * FROM ml.calculate_llm_prediction_accuracy(''BTCUSDT'', 7);
';


-- -----------------------------------------------------
-- Function: ml.calculate_llm_prediction_accuracy_by_phase
-- -----------------------------------------------------
-- Calculates accuracy metrics grouped by AMD phase
-- -----------------------------------------------------

CREATE OR REPLACE FUNCTION ml.calculate_llm_prediction_accuracy_by_phase(
  p_symbol VARCHAR,
  p_days INT DEFAULT 30
)
RETURNS TABLE(
  amd_phase VARCHAR(50),
  total_predictions INT,
  direction_accuracy DECIMAL(5,4),
  avg_pnl_pips DECIMAL(10,2),
  win_rate DECIMAL(5,4)
) AS $$
BEGIN
  RETURN QUERY
  SELECT
    p.amd_phase,
    COUNT(*)::INT AS total_predictions,
    COALESCE(
      AVG(CASE WHEN o.direction_correct = TRUE THEN 1.0 ELSE 0.0 END)::DECIMAL(5,4),
      0.0
    ) AS direction_accuracy,
    COALESCE(AVG(o.pnl_pips)::DECIMAL(10,2), 0.0) AS avg_pnl_pips,
    COALESCE(
      AVG(CASE WHEN o.pnl_pips > 0 THEN 1.0 ELSE 0.0 END)::DECIMAL(5,4),
      0.0
    ) AS win_rate
  FROM ml.llm_predictions p
  INNER JOIN ml.llm_prediction_outcomes o ON p.id = o.prediction_id
  WHERE p.symbol = p_symbol
    AND p.created_at >= NOW() - (p_days || ' days')::INTERVAL
    AND o.resolved_at IS NOT NULL
    AND p.amd_phase IS NOT NULL
  GROUP BY p.amd_phase
  ORDER BY total_predictions DESC;
END;
$$ LANGUAGE plpgsql STABLE;

COMMENT ON FUNCTION ml.calculate_llm_prediction_accuracy_by_phase(VARCHAR, INT) IS
'Calculates prediction accuracy metrics grouped by AMD phase.

Useful for understanding which AMD phases produce the most accurate predictions.

Example usage:
  SELECT * FROM ml.calculate_llm_prediction_accuracy_by_phase(''XAUUSD'', 30);
';


-- -----------------------------------------------------
-- Function: ml.calculate_llm_prediction_accuracy_by_killzone
-- -----------------------------------------------------
-- Calculates accuracy metrics grouped by ICT Killzone
-- -----------------------------------------------------

CREATE OR REPLACE FUNCTION ml.calculate_llm_prediction_accuracy_by_killzone(
  p_symbol VARCHAR,
  p_days INT DEFAULT 30
)
RETURNS TABLE(
  killzone VARCHAR(50),
  total_predictions INT,
  direction_accuracy DECIMAL(5,4),
  avg_pnl_pips DECIMAL(10,2),
  win_rate DECIMAL(5,4)
) AS $$
BEGIN
  RETURN QUERY
  SELECT
    p.killzone,
    COUNT(*)::INT AS total_predictions,
    COALESCE(
      AVG(CASE WHEN o.direction_correct = TRUE THEN 1.0 ELSE 0.0 END)::DECIMAL(5,4),
      0.0
    ) AS direction_accuracy,
    COALESCE(AVG(o.pnl_pips)::DECIMAL(10,2), 0.0) AS avg_pnl_pips,
    COALESCE(
      AVG(CASE WHEN o.pnl_pips > 0 THEN 1.0 ELSE 0.0 END)::DECIMAL(5,4),
      0.0
    ) AS win_rate
  FROM ml.llm_predictions p
  INNER JOIN ml.llm_prediction_outcomes o ON p.id = o.prediction_id
  WHERE p.symbol = p_symbol
    AND p.created_at >= NOW() - (p_days || ' days')::INTERVAL
    AND o.resolved_at IS NOT NULL
    AND p.killzone IS NOT NULL
  GROUP BY p.killzone
  ORDER BY total_predictions DESC;
END;
$$ LANGUAGE plpgsql STABLE;

COMMENT ON FUNCTION ml.calculate_llm_prediction_accuracy_by_killzone(VARCHAR, INT) IS
'Calculates prediction accuracy metrics grouped by ICT Killzone.

Useful for understanding which trading sessions produce the best predictions.

Example usage:
  SELECT * FROM ml.calculate_llm_prediction_accuracy_by_killzone(''XAUUSD'', 30);
';


-- -----------------------------------------------------
-- Function: ml.calculate_llm_prediction_accuracy_by_confluence
-- -----------------------------------------------------
-- Calculates accuracy metrics grouped by confluence score ranges
-- -----------------------------------------------------

CREATE OR REPLACE FUNCTION ml.calculate_llm_prediction_accuracy_by_confluence(
  p_symbol VARCHAR,
  p_days INT DEFAULT 30
)
RETURNS TABLE(
  confluence_range VARCHAR(20),
  total_predictions INT,
  direction_accuracy DECIMAL(5,4),
  avg_pnl_pips DECIMAL(10,2),
  win_rate DECIMAL(5,4)
) AS $$
BEGIN
  RETURN QUERY
  SELECT
    CASE
      WHEN p.confluence_score >= 0.8 THEN '0.8-1.0 (High)'
      WHEN p.confluence_score >= 0.6 THEN '0.6-0.8 (Medium-High)'
      WHEN p.confluence_score >= 0.4 THEN '0.4-0.6 (Medium)'
      WHEN p.confluence_score >= 0.2 THEN '0.2-0.4 (Medium-Low)'
      ELSE '0.0-0.2 (Low)'
    END AS confluence_range,
    COUNT(*)::INT AS total_predictions,
    COALESCE(
      AVG(CASE WHEN o.direction_correct = TRUE THEN 1.0 ELSE 0.0 END)::DECIMAL(5,4),
      0.0
    ) AS direction_accuracy,
    COALESCE(AVG(o.pnl_pips)::DECIMAL(10,2), 0.0) AS avg_pnl_pips,
    COALESCE(
      AVG(CASE WHEN o.pnl_pips > 0 THEN 1.0 ELSE 0.0 END)::DECIMAL(5,4),
      0.0
    ) AS win_rate
  FROM ml.llm_predictions p
  INNER JOIN ml.llm_prediction_outcomes o ON p.id = o.prediction_id
  WHERE p.symbol = p_symbol
    AND p.created_at >= NOW() - (p_days || ' days')::INTERVAL
    AND o.resolved_at IS NOT NULL
    AND p.confluence_score IS NOT NULL
  GROUP BY
    CASE
      WHEN p.confluence_score >= 0.8 THEN '0.8-1.0 (High)'
      WHEN p.confluence_score >= 0.6 THEN '0.6-0.8 (Medium-High)'
      WHEN p.confluence_score >= 0.4 THEN '0.4-0.6 (Medium)'
      WHEN p.confluence_score >= 0.2 THEN '0.2-0.4 (Medium-Low)'
      ELSE '0.0-0.2 (Low)'
    END
  ORDER BY confluence_range DESC;
END;
$$ LANGUAGE plpgsql STABLE;

COMMENT ON FUNCTION ml.calculate_llm_prediction_accuracy_by_confluence(VARCHAR, INT) IS
'Calculates prediction accuracy metrics grouped by confluence score ranges.

Validates whether higher confluence scores correlate with better accuracy.

Example usage:
  SELECT * FROM ml.calculate_llm_prediction_accuracy_by_confluence(''XAUUSD'', 30);
';


-- -----------------------------------------------------
-- Function: ml.get_active_risk_events
-- -----------------------------------------------------
-- Returns all unresolved risk events for a user
-- -----------------------------------------------------

CREATE OR REPLACE FUNCTION ml.get_active_risk_events(
  p_user_id UUID DEFAULT NULL
)
RETURNS TABLE(
  id UUID,
  event_type VARCHAR(50),
  severity VARCHAR(20),
  details JSONB,
  action_taken VARCHAR(100),
  created_at TIMESTAMPTZ
) AS $$
BEGIN
  RETURN QUERY
  SELECT
    r.id,
    r.event_type,
    r.severity,
    r.details,
    r.action_taken,
    r.created_at
  FROM ml.risk_events r
  WHERE r.resolved = FALSE
    AND (p_user_id IS NULL OR r.user_id = p_user_id OR r.user_id IS NULL)
  ORDER BY
    CASE r.severity
      WHEN 'emergency' THEN 1
      WHEN 'critical' THEN 2
      WHEN 'warning' THEN 3
      ELSE 4
    END,
    r.created_at DESC;
END;
$$ LANGUAGE plpgsql STABLE;

COMMENT ON FUNCTION ml.get_active_risk_events(UUID) IS
'Returns all unresolved risk events, optionally filtered by user.

Parameters:
  - p_user_id: User ID to filter by (NULL for all events including system-wide)

Returns events ordered by severity (emergency first) then by time.

Example usage:
  SELECT * FROM ml.get_active_risk_events();
  SELECT * FROM ml.get_active_risk_events(''550e8400-e29b-41d4-a716-446655440000'');
';


-- -----------------------------------------------------
-- Function: ml.check_circuit_breaker_status
-- -----------------------------------------------------
-- Checks if circuit breaker is active for a user
-- -----------------------------------------------------

CREATE OR REPLACE FUNCTION ml.check_circuit_breaker_status(
  p_user_id UUID DEFAULT NULL
)
RETURNS TABLE(
  is_active BOOLEAN,
  event_id UUID,
  trigger_reason TEXT,
  created_at TIMESTAMPTZ,
  details JSONB
) AS $$
BEGIN
  RETURN QUERY
  SELECT
    TRUE AS is_active,
    r.id AS event_id,
    r.details->>'trigger_reason' AS trigger_reason,
    r.created_at,
    r.details
  FROM ml.risk_events r
  WHERE r.event_type = 'CIRCUIT_BREAKER'
    AND r.resolved = FALSE
    AND (p_user_id IS NULL OR r.user_id = p_user_id OR r.user_id IS NULL)
  ORDER BY r.created_at DESC
  LIMIT 1;

  -- If no rows returned, return false status
  IF NOT FOUND THEN
    RETURN QUERY SELECT FALSE, NULL::UUID, NULL::TEXT, NULL::TIMESTAMPTZ, NULL::JSONB;
  END IF;
END;
$$ LANGUAGE plpgsql STABLE;

COMMENT ON FUNCTION ml.check_circuit_breaker_status(UUID) IS
'Checks if circuit breaker is currently active for a user.

Returns:
  - is_active: TRUE if circuit breaker is engaged
  - event_id: ID of the active circuit breaker event
  - trigger_reason: Reason the circuit breaker was triggered
  - created_at: When the circuit breaker was activated
  - details: Full details of the event

Example usage:
  SELECT * FROM ml.check_circuit_breaker_status();
  SELECT is_active FROM ml.check_circuit_breaker_status(''550e8400-e29b-41d4-a716-446655440000'');
';