139 lines
3.8 KiB
Python
139 lines
3.8 KiB
Python
"""
|
|
Pytest configuration and fixtures for Inference Engine tests.
|
|
"""
|
|
|
|
import asyncio
|
|
from typing import Any, Dict, List
|
|
from unittest.mock import AsyncMock, MagicMock
|
|
|
|
import pytest
|
|
from fastapi.testclient import TestClient
|
|
from httpx import ASGITransport, AsyncClient
|
|
|
|
from src.config import Settings, get_settings
|
|
from src.engine.backend_manager import BackendManager
|
|
from src.engine.ollama_backend import OllamaBackend
|
|
from src.main import app
|
|
|
|
|
|
# Override settings for testing
|
|
@pytest.fixture
|
|
def test_settings() -> Settings:
|
|
"""Get test settings."""
|
|
return Settings(
|
|
inference_port=3161,
|
|
inference_host="localhost",
|
|
inference_backend="ollama",
|
|
ollama_host="http://localhost:11434",
|
|
ollama_model="tinyllama",
|
|
max_tokens_limit=4096,
|
|
default_max_tokens=512,
|
|
request_timeout_ms=5000,
|
|
retry_max_attempts=2,
|
|
retry_base_delay_ms=100,
|
|
models_cache_ttl_seconds=60,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_ollama_response() -> Dict[str, Any]:
|
|
"""Sample Ollama API response."""
|
|
return {
|
|
"message": {
|
|
"role": "assistant",
|
|
"content": "Hello! How can I help you today?",
|
|
},
|
|
"prompt_eval_count": 10,
|
|
"eval_count": 8,
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_ollama_models() -> Dict[str, Any]:
|
|
"""Sample Ollama models list response."""
|
|
return {
|
|
"models": [
|
|
{"name": "tinyllama:latest"},
|
|
{"name": "llama2:latest"},
|
|
]
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_messages() -> List[Dict[str, str]]:
|
|
"""Sample chat messages."""
|
|
return [
|
|
{"role": "system", "content": "You are a helpful assistant."},
|
|
{"role": "user", "content": "Hello!"},
|
|
]
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_ollama_backend(mock_ollama_response: Dict[str, Any], mock_ollama_models: Dict[str, Any]) -> OllamaBackend:
|
|
"""Create a mocked OllamaBackend."""
|
|
backend = OllamaBackend(base_url="http://localhost:11434")
|
|
|
|
# Mock the HTTP client
|
|
mock_client = AsyncMock()
|
|
|
|
# Mock health check
|
|
mock_health_response = MagicMock()
|
|
mock_health_response.status_code = 200
|
|
|
|
# Mock models list
|
|
mock_models_response = MagicMock()
|
|
mock_models_response.status_code = 200
|
|
mock_models_response.json.return_value = mock_ollama_models
|
|
mock_models_response.raise_for_status = MagicMock()
|
|
|
|
# Mock chat completion
|
|
mock_chat_response = MagicMock()
|
|
mock_chat_response.status_code = 200
|
|
mock_chat_response.json.return_value = mock_ollama_response
|
|
mock_chat_response.raise_for_status = MagicMock()
|
|
|
|
mock_client.get = AsyncMock(side_effect=lambda path:
|
|
mock_health_response if "/api/tags" in path else mock_models_response
|
|
)
|
|
mock_client.post = AsyncMock(return_value=mock_chat_response)
|
|
mock_client.is_closed = False
|
|
|
|
backend._client = mock_client
|
|
return backend
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_backend_manager(mock_ollama_backend: OllamaBackend) -> BackendManager:
|
|
"""Create a mocked BackendManager."""
|
|
manager = BackendManager.__new__(BackendManager)
|
|
manager.backend_type = "ollama"
|
|
manager._backend = mock_ollama_backend
|
|
return manager
|
|
|
|
|
|
@pytest.fixture
|
|
def test_client(mock_backend_manager: BackendManager) -> TestClient:
|
|
"""Create a test client with mocked backend."""
|
|
app.state.backend_manager = mock_backend_manager
|
|
return TestClient(app)
|
|
|
|
|
|
@pytest.fixture
|
|
async def async_client(mock_backend_manager: BackendManager) -> AsyncClient:
|
|
"""Create an async test client."""
|
|
app.state.backend_manager = mock_backend_manager
|
|
async with AsyncClient(
|
|
transport=ASGITransport(app=app),
|
|
base_url="http://test"
|
|
) as client:
|
|
yield client
|
|
|
|
|
|
# Pytest configuration
|
|
@pytest.fixture(scope="session")
|
|
def event_loop():
|
|
"""Create an event loop for async tests."""
|
|
loop = asyncio.get_event_loop_policy().new_event_loop()
|
|
yield loop
|
|
loop.close()
|