inmobiliaria-analytics/docs/01-fase-alcance-inicial/IAI-007-webscraper/especificaciones/ET-SCR-003-proxies.md
rckrdmrd f570727617 feat: Documentation and orchestration updates
🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-07 05:35:40 -06:00

1147 lines
31 KiB
Markdown

---
id: "ET-SCR-003"
title: "Especificacion Tecnica - Gestion de Pool de Proxies"
type: "Technical Specification"
epic: "IAI-007"
status: "Draft"
version: "1.0"
project: "inmobiliaria-analytics"
created_date: "2026-01-04"
updated_date: "2026-01-04"
---
# ET-SCR-003: Gestion de Pool de Proxies
---
## 1. Resumen
Sistema de gestion de proxies residenciales y datacenter para rotacion automatica, evitar bloqueos IP, y mantener tasas de exito altas en el scraping.
---
## 2. Arquitectura del Sistema de Proxies
```
┌─────────────────────────────────────────────────────────────┐
│ PROXY MANAGER │
├─────────────────────────────────────────────────────────────┤
│ │
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
│ │ Residential │ │ Datacenter │ │ Mobile │ │
│ │ Pool │ │ Pool │ │ Pool │ │
│ │ (Premium) │ │ (Backup) │ │ (Reserved) │ │
│ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ │
│ │ │ │ │
│ └────────────┬────┴────────────────┘ │
│ │ │
│ ┌───────▼───────┐ │
│ │ Selector │ │
│ │ Engine │ │
│ └───────┬───────┘ │
│ │ │
│ ┌─────────────────┼─────────────────┐ │
│ │ │ │ │
│ ▼ ▼ ▼ │
│ ┌──────┐ ┌──────┐ ┌──────────┐ │
│ │Health│ │ Geo │ │ Cooldown │ │
│ │Check │ │Filter│ │ Manager │ │
│ └──────┘ └──────┘ └──────────┘ │
│ │
└─────────────────────────────────────────────────────────────┘
┌─────────────┐
│ Browser │
│ Manager │
└─────────────┘
```
---
## 3. Proveedores de Proxies
### 3.1 Configuracion de Proveedores
```yaml
# config/proxies.yml
providers:
brightdata:
type: residential
priority: 1
endpoint: "brd.superproxy.io"
port: 22225
username: "${BRIGHTDATA_USER}"
password: "${BRIGHTDATA_PASS}"
geo:
country: "mx"
city: "guadalajara"
sticky_session: true
session_duration: 600 # 10 minutos
monthly_bandwidth: "100GB"
cost_per_gb: 15 # USD
smartproxy:
type: residential
priority: 2
endpoint: "mx.smartproxy.com"
port: 10000
username: "${SMARTPROXY_USER}"
password: "${SMARTPROXY_PASS}"
geo:
country: "mx"
rotation: "per_request"
monthly_bandwidth: "50GB"
cost_per_gb: 12
datacenter_pool:
type: datacenter
priority: 3
proxies:
- host: "proxy1.example.com"
port: 3128
- host: "proxy2.example.com"
port: 3128
auth:
username: "${DC_PROXY_USER}"
password: "${DC_PROXY_PASS}"
cost_per_request: 0.001
settings:
default_provider: "brightdata"
fallback_chain: ["brightdata", "smartproxy", "datacenter_pool"]
max_failures_before_switch: 3
cooldown_after_block: 300 # 5 minutos
health_check_interval: 60 # 1 minuto
```
### 3.2 Tipos de Proxy y Uso
| Tipo | Uso Principal | Costo | Tasa Exito |
|------|--------------|-------|------------|
| Residential | Sitios con anti-bot agresivo | Alto | 95%+ |
| Datacenter | Sitios simples, backup | Bajo | 70-80% |
| Mobile | Casos especiales, Cloudflare | Muy Alto | 98%+ |
---
## 4. Implementacion
### 4.1 Interfaz de Proxy
```typescript
// src/proxy/types.ts
export interface ProxyConfig {
host: string;
port: number;
username?: string;
password?: string;
protocol: 'http' | 'https' | 'socks5';
}
export interface ProxyWithMetadata extends ProxyConfig {
id: string;
provider: string;
type: 'residential' | 'datacenter' | 'mobile';
geo: {
country: string;
city?: string;
region?: string;
};
// Metricas
stats: ProxyStats;
// Estado
status: 'active' | 'cooling' | 'blocked' | 'inactive';
lastUsed: Date | null;
cooldownUntil: Date | null;
}
export interface ProxyStats {
totalRequests: number;
successfulRequests: number;
failedRequests: number;
blockedRequests: number;
avgLatencyMs: number;
bandwidthUsedMb: number;
lastSuccess: Date | null;
lastFailure: Date | null;
}
export interface ProxySelection {
proxy: ProxyWithMetadata;
sessionId?: string;
}
```
### 4.2 Proxy Pool Manager
```typescript
// src/proxy/pool-manager.ts
import { Redis } from 'ioredis';
import { ProxyWithMetadata, ProxyConfig, ProxySelection } from './types';
import { ProxyHealthChecker } from './health-checker';
import { Logger } from '../utils/logger';
export class ProxyPoolManager {
private redis: Redis;
private healthChecker: ProxyHealthChecker;
private logger: Logger;
private providers: Map<string, ProxyProvider>;
constructor() {
this.redis = new Redis(process.env.REDIS_URL);
this.healthChecker = new ProxyHealthChecker();
this.logger = new Logger('ProxyPool');
this.providers = new Map();
this.initializeProviders();
}
private initializeProviders(): void {
// Bright Data
this.providers.set('brightdata', new BrightDataProvider({
endpoint: process.env.BRIGHTDATA_ENDPOINT!,
username: process.env.BRIGHTDATA_USER!,
password: process.env.BRIGHTDATA_PASS!,
}));
// SmartProxy
this.providers.set('smartproxy', new SmartProxyProvider({
endpoint: process.env.SMARTPROXY_ENDPOINT!,
username: process.env.SMARTPROXY_USER!,
password: process.env.SMARTPROXY_PASS!,
}));
// Datacenter Pool
this.providers.set('datacenter', new DatacenterProxyProvider({
proxies: JSON.parse(process.env.DC_PROXIES || '[]'),
}));
}
async getProxy(options: {
targetDomain: string;
preferredType?: 'residential' | 'datacenter' | 'mobile';
requireFresh?: boolean;
stickySession?: boolean;
sessionId?: string;
}): Promise<ProxySelection> {
const { targetDomain, preferredType, requireFresh, stickySession, sessionId } = options;
// 1. Si hay sesion sticky activa, reusar
if (stickySession && sessionId) {
const existingProxy = await this.getStickySession(sessionId);
if (existingProxy) {
return { proxy: existingProxy, sessionId };
}
}
// 2. Obtener pool de candidatos
const candidates = await this.getCandidates({
domain: targetDomain,
type: preferredType,
excludeCooling: true,
excludeBlocked: true,
});
if (candidates.length === 0) {
throw new Error(`No proxies available for ${targetDomain}`);
}
// 3. Seleccionar mejor proxy
const selected = this.selectBestProxy(candidates, {
requireFresh,
domain: targetDomain,
});
// 4. Crear sesion si es sticky
let newSessionId = sessionId;
if (stickySession) {
newSessionId = await this.createStickySession(selected);
}
// 5. Marcar como usado
await this.markUsed(selected.id);
this.logger.debug(`Selected proxy ${selected.id} for ${targetDomain}`);
return { proxy: selected, sessionId: newSessionId };
}
private async getCandidates(options: {
domain: string;
type?: string;
excludeCooling: boolean;
excludeBlocked: boolean;
}): Promise<ProxyWithMetadata[]> {
const allProxies = await this.getAllProxies();
const now = new Date();
return allProxies.filter(proxy => {
// Filtrar por tipo
if (options.type && proxy.type !== options.type) {
return false;
}
// Excluir en cooling
if (options.excludeCooling && proxy.status === 'cooling') {
if (proxy.cooldownUntil && proxy.cooldownUntil > now) {
return false;
}
}
// Excluir bloqueados para este dominio
if (options.excludeBlocked) {
const blockKey = `proxy:blocked:${proxy.id}:${options.domain}`;
// Check async - simplified here
}
return proxy.status === 'active';
});
}
private selectBestProxy(
candidates: ProxyWithMetadata[],
options: { requireFresh?: boolean; domain: string }
): ProxyWithMetadata {
// Scoring algorithm
const scored = candidates.map(proxy => {
let score = 100;
// Penalizar por uso reciente
if (proxy.lastUsed) {
const minutesSinceUse = (Date.now() - proxy.lastUsed.getTime()) / 60000;
if (minutesSinceUse < 5) {
score -= (5 - minutesSinceUse) * 10;
}
}
// Bonus por alta tasa de exito
const successRate = proxy.stats.totalRequests > 0
? proxy.stats.successfulRequests / proxy.stats.totalRequests
: 0.5;
score += successRate * 20;
// Penalizar por latencia alta
if (proxy.stats.avgLatencyMs > 2000) {
score -= 10;
}
// Bonus por tipo preferido
if (proxy.type === 'residential') {
score += 15;
}
// Penalizar si se requiere fresh y fue usado recientemente
if (options.requireFresh && proxy.lastUsed) {
const minutesSinceUse = (Date.now() - proxy.lastUsed.getTime()) / 60000;
if (minutesSinceUse < 30) {
score -= 50;
}
}
return { proxy, score };
});
// Ordenar por score y agregar algo de randomizacion
scored.sort((a, b) => b.score - a.score);
// Seleccionar del top 3 aleatoriamente para evitar patrones
const topN = scored.slice(0, Math.min(3, scored.length));
const randomIndex = Math.floor(Math.random() * topN.length);
return topN[randomIndex].proxy;
}
async reportSuccess(proxyId: string, domain: string, latencyMs: number): Promise<void> {
const key = `proxy:stats:${proxyId}`;
await this.redis.multi()
.hincrby(key, 'totalRequests', 1)
.hincrby(key, 'successfulRequests', 1)
.hset(key, 'lastSuccess', Date.now().toString())
.exec();
// Actualizar latencia promedio
await this.updateAvgLatency(proxyId, latencyMs);
this.logger.debug(`Proxy ${proxyId} success on ${domain} (${latencyMs}ms)`);
}
async reportFailure(
proxyId: string,
domain: string,
error: Error,
isBlock: boolean = false
): Promise<void> {
const key = `proxy:stats:${proxyId}`;
await this.redis.multi()
.hincrby(key, 'totalRequests', 1)
.hincrby(key, 'failedRequests', 1)
.hincrby(key, isBlock ? 'blockedRequests' : 'failedRequests', 1)
.hset(key, 'lastFailure', Date.now().toString())
.exec();
if (isBlock) {
await this.handleBlock(proxyId, domain);
}
this.logger.warn(`Proxy ${proxyId} failed on ${domain}: ${error.message}`);
}
private async handleBlock(proxyId: string, domain: string): Promise<void> {
// Poner en cooling para este dominio
const cooldownMinutes = 30;
const cooldownUntil = Date.now() + (cooldownMinutes * 60 * 1000);
await this.redis.set(
`proxy:blocked:${proxyId}:${domain}`,
cooldownUntil.toString(),
'EX',
cooldownMinutes * 60
);
// Verificar si esta bloqueado en multiples dominios
const blockedDomains = await this.redis.keys(`proxy:blocked:${proxyId}:*`);
if (blockedDomains.length >= 3) {
// Marcar como cooling general
await this.redis.hset(`proxy:${proxyId}`, 'status', 'cooling');
await this.redis.hset(`proxy:${proxyId}`, 'cooldownUntil', (Date.now() + 3600000).toString());
this.logger.warn(`Proxy ${proxyId} put in cooling (blocked on ${blockedDomains.length} domains)`);
}
}
private async getStickySession(sessionId: string): Promise<ProxyWithMetadata | null> {
const proxyId = await this.redis.get(`proxy:session:${sessionId}`);
if (!proxyId) return null;
return this.getProxyById(proxyId);
}
private async createStickySession(proxy: ProxyWithMetadata): Promise<string> {
const sessionId = `sess_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
// Session dura 10 minutos
await this.redis.setex(`proxy:session:${sessionId}`, 600, proxy.id);
return sessionId;
}
private async markUsed(proxyId: string): Promise<void> {
await this.redis.hset(`proxy:${proxyId}`, 'lastUsed', Date.now().toString());
}
private async updateAvgLatency(proxyId: string, latencyMs: number): Promise<void> {
const key = `proxy:latency:${proxyId}`;
// Rolling average de ultimas 100 requests
await this.redis.lpush(key, latencyMs.toString());
await this.redis.ltrim(key, 0, 99);
const latencies = await this.redis.lrange(key, 0, -1);
const avg = latencies.reduce((sum, l) => sum + parseInt(l), 0) / latencies.length;
await this.redis.hset(`proxy:stats:${proxyId}`, 'avgLatencyMs', Math.round(avg).toString());
}
private async getAllProxies(): Promise<ProxyWithMetadata[]> {
const keys = await this.redis.keys('proxy:*');
const proxies: ProxyWithMetadata[] = [];
for (const key of keys) {
if (key.match(/^proxy:[a-z0-9]+$/)) {
const data = await this.redis.hgetall(key);
if (data.host) {
proxies.push(this.parseProxyData(data));
}
}
}
return proxies;
}
private async getProxyById(id: string): Promise<ProxyWithMetadata | null> {
const data = await this.redis.hgetall(`proxy:${id}`);
if (!data.host) return null;
return this.parseProxyData(data);
}
private parseProxyData(data: Record<string, string>): ProxyWithMetadata {
return {
id: data.id,
host: data.host,
port: parseInt(data.port),
username: data.username,
password: data.password,
protocol: data.protocol as 'http' | 'https' | 'socks5',
provider: data.provider,
type: data.type as 'residential' | 'datacenter' | 'mobile',
geo: JSON.parse(data.geo || '{}'),
stats: JSON.parse(data.stats || '{}'),
status: data.status as any,
lastUsed: data.lastUsed ? new Date(parseInt(data.lastUsed)) : null,
cooldownUntil: data.cooldownUntil ? new Date(parseInt(data.cooldownUntil)) : null,
};
}
}
```
### 4.3 Health Checker
```typescript
// src/proxy/health-checker.ts
import { ProxyWithMetadata } from './types';
import fetch from 'node-fetch';
import { HttpsProxyAgent } from 'https-proxy-agent';
export class ProxyHealthChecker {
private testUrls = [
'https://httpbin.org/ip',
'https://api.ipify.org?format=json',
'https://www.google.com.mx',
];
async checkProxy(proxy: ProxyWithMetadata): Promise<{
healthy: boolean;
latencyMs: number;
detectedIp: string | null;
error?: string;
}> {
const proxyUrl = this.buildProxyUrl(proxy);
const agent = new HttpsProxyAgent(proxyUrl);
const startTime = Date.now();
try {
const response = await fetch(this.testUrls[0], {
agent,
timeout: 10000,
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
},
});
if (!response.ok) {
return {
healthy: false,
latencyMs: Date.now() - startTime,
detectedIp: null,
error: `HTTP ${response.status}`,
};
}
const data = await response.json() as { origin?: string; ip?: string };
const detectedIp = data.origin || data.ip || null;
return {
healthy: true,
latencyMs: Date.now() - startTime,
detectedIp,
};
} catch (error) {
return {
healthy: false,
latencyMs: Date.now() - startTime,
detectedIp: null,
error: (error as Error).message,
};
}
}
async checkBatch(proxies: ProxyWithMetadata[]): Promise<Map<string, boolean>> {
const results = new Map<string, boolean>();
// Check en paralelo con limite de concurrencia
const concurrency = 10;
const chunks = this.chunkArray(proxies, concurrency);
for (const chunk of chunks) {
const checks = chunk.map(async proxy => {
const result = await this.checkProxy(proxy);
results.set(proxy.id, result.healthy);
});
await Promise.all(checks);
}
return results;
}
private buildProxyUrl(proxy: ProxyWithMetadata): string {
const auth = proxy.username && proxy.password
? `${proxy.username}:${proxy.password}@`
: '';
return `${proxy.protocol}://${auth}${proxy.host}:${proxy.port}`;
}
private chunkArray<T>(array: T[], size: number): T[][] {
const chunks: T[][] = [];
for (let i = 0; i < array.length; i += size) {
chunks.push(array.slice(i, i + size));
}
return chunks;
}
}
```
### 4.4 Bright Data Provider
```typescript
// src/proxy/providers/brightdata.provider.ts
import { ProxyProvider, ProxyConfig } from '../types';
export class BrightDataProvider implements ProxyProvider {
private config: {
endpoint: string;
username: string;
password: string;
zone?: string;
};
constructor(config: typeof this.config) {
this.config = config;
}
getProxy(options?: {
country?: string;
city?: string;
sessionId?: string;
sticky?: boolean;
}): ProxyConfig {
// Construir username con opciones
let username = this.config.username;
if (options?.country) {
username += `-country-${options.country}`;
}
if (options?.city) {
username += `-city-${options.city}`;
}
if (options?.sticky && options?.sessionId) {
username += `-session-${options.sessionId}`;
}
return {
host: this.config.endpoint,
port: 22225,
username,
password: this.config.password,
protocol: 'http',
};
}
async getResidentialProxy(options: {
country: string;
city?: string;
sticky?: boolean;
}): Promise<ProxyConfig> {
const sessionId = options.sticky
? `sess_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`
: undefined;
return this.getProxy({
country: options.country,
city: options.city,
sessionId,
sticky: options.sticky,
});
}
async getMobileProxy(options: {
country: string;
carrier?: string;
}): Promise<ProxyConfig> {
let username = `${this.config.username}-zone-mobile-country-${options.country}`;
if (options.carrier) {
username += `-carrier-${options.carrier}`;
}
return {
host: this.config.endpoint,
port: 22225,
username,
password: this.config.password,
protocol: 'http',
};
}
}
```
---
## 5. Rotacion Inteligente
### 5.1 Estrategias de Rotacion
```typescript
// src/proxy/rotation-strategies.ts
import { ProxyWithMetadata, ProxySelection } from './types';
import { ProxyPoolManager } from './pool-manager';
export interface RotationStrategy {
name: string;
selectProxy(
pool: ProxyPoolManager,
context: RotationContext
): Promise<ProxySelection>;
}
export interface RotationContext {
domain: string;
requestCount: number;
lastProxy?: ProxyWithMetadata;
sessionStart?: Date;
}
// Estrategia: Rotar cada N requests
export class EveryNRequestsStrategy implements RotationStrategy {
name = 'every_n_requests';
private n: number;
constructor(n: number = 10) {
this.n = n;
}
async selectProxy(
pool: ProxyPoolManager,
context: RotationContext
): Promise<ProxySelection> {
const shouldRotate = context.requestCount % this.n === 0;
if (!shouldRotate && context.lastProxy) {
return { proxy: context.lastProxy };
}
return pool.getProxy({
targetDomain: context.domain,
requireFresh: true,
});
}
}
// Estrategia: Rotar por tiempo
export class TimeBasedStrategy implements RotationStrategy {
name = 'time_based';
private intervalMs: number;
constructor(intervalMinutes: number = 10) {
this.intervalMs = intervalMinutes * 60 * 1000;
}
async selectProxy(
pool: ProxyPoolManager,
context: RotationContext
): Promise<ProxySelection> {
const elapsed = context.sessionStart
? Date.now() - context.sessionStart.getTime()
: Infinity;
if (elapsed < this.intervalMs && context.lastProxy) {
return { proxy: context.lastProxy };
}
return pool.getProxy({
targetDomain: context.domain,
stickySession: true,
});
}
}
// Estrategia: Round Robin ponderado
export class WeightedRoundRobinStrategy implements RotationStrategy {
name = 'weighted_round_robin';
private currentIndex = 0;
async selectProxy(
pool: ProxyPoolManager,
context: RotationContext
): Promise<ProxySelection> {
// Implementar round robin con pesos basados en success rate
return pool.getProxy({
targetDomain: context.domain,
});
}
}
// Estrategia: Adaptativa basada en respuestas
export class AdaptiveStrategy implements RotationStrategy {
name = 'adaptive';
private failureThreshold = 2;
private consecutiveFailures = 0;
async selectProxy(
pool: ProxyPoolManager,
context: RotationContext
): Promise<ProxySelection> {
// Si hay muchos fallos consecutivos, forzar rotacion
if (this.consecutiveFailures >= this.failureThreshold) {
this.consecutiveFailures = 0;
return pool.getProxy({
targetDomain: context.domain,
requireFresh: true,
});
}
// De lo contrario, mantener proxy actual si existe
if (context.lastProxy) {
return { proxy: context.lastProxy };
}
return pool.getProxy({
targetDomain: context.domain,
stickySession: true,
});
}
recordSuccess(): void {
this.consecutiveFailures = 0;
}
recordFailure(): void {
this.consecutiveFailures++;
}
}
```
---
## 6. Integracion con Playwright
```typescript
// src/proxy/playwright-integration.ts
import { Browser, BrowserContext, Page } from 'playwright';
import { ProxyPoolManager } from './pool-manager';
import { ProxyWithMetadata } from './types';
export class PlaywrightProxyIntegration {
private proxyPool: ProxyPoolManager;
constructor() {
this.proxyPool = new ProxyPoolManager();
}
async createContextWithProxy(
browser: Browser,
options: {
domain: string;
preferredType?: 'residential' | 'datacenter';
userAgent?: string;
}
): Promise<{
context: BrowserContext;
proxy: ProxyWithMetadata;
sessionId: string;
}> {
const { proxy, sessionId } = await this.proxyPool.getProxy({
targetDomain: options.domain,
preferredType: options.preferredType,
stickySession: true,
});
const context = await browser.newContext({
proxy: {
server: `${proxy.protocol}://${proxy.host}:${proxy.port}`,
username: proxy.username,
password: proxy.password,
},
userAgent: options.userAgent || this.getRandomUserAgent(),
viewport: { width: 1920, height: 1080 },
locale: 'es-MX',
timezoneId: 'America/Mexico_City',
});
return { context, proxy, sessionId: sessionId! };
}
async wrapPageWithProxyHandling(
page: Page,
proxy: ProxyWithMetadata,
domain: string
): Promise<Page> {
// Interceptar errores de red para reportar al pool
page.on('requestfailed', async (request) => {
const failure = request.failure();
if (failure) {
const isBlock = this.isBlockError(failure.errorText);
await this.proxyPool.reportFailure(
proxy.id,
domain,
new Error(failure.errorText),
isBlock
);
}
});
page.on('response', async (response) => {
const status = response.status();
if (status === 403 || status === 429 || status === 503) {
await this.proxyPool.reportFailure(
proxy.id,
domain,
new Error(`HTTP ${status}`),
true
);
} else if (status >= 200 && status < 400) {
const timing = response.request().timing();
await this.proxyPool.reportSuccess(
proxy.id,
domain,
timing.responseEnd - timing.requestStart
);
}
});
return page;
}
private isBlockError(errorText: string): boolean {
const blockPatterns = [
'net::ERR_PROXY_CONNECTION_FAILED',
'net::ERR_TUNNEL_CONNECTION_FAILED',
'Cloudflare',
'Access Denied',
'blocked',
];
return blockPatterns.some(pattern =>
errorText.toLowerCase().includes(pattern.toLowerCase())
);
}
private getRandomUserAgent(): string {
const userAgents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0',
];
return userAgents[Math.floor(Math.random() * userAgents.length)];
}
}
```
---
## 7. Dashboard de Monitoreo
### 7.1 Metricas Prometheus
```typescript
// src/proxy/metrics.ts
import { Counter, Gauge, Histogram } from 'prom-client';
export const proxyMetrics = {
// Contadores
requests_total: new Counter({
name: 'proxy_requests_total',
help: 'Total proxy requests',
labelNames: ['provider', 'type', 'status'],
}),
blocks_total: new Counter({
name: 'proxy_blocks_total',
help: 'Total proxy blocks detected',
labelNames: ['provider', 'domain'],
}),
rotations_total: new Counter({
name: 'proxy_rotations_total',
help: 'Total proxy rotations',
labelNames: ['reason'],
}),
// Gauges
active_proxies: new Gauge({
name: 'proxy_active_count',
help: 'Number of active proxies',
labelNames: ['provider', 'type'],
}),
cooling_proxies: new Gauge({
name: 'proxy_cooling_count',
help: 'Number of proxies in cooling period',
labelNames: ['provider'],
}),
bandwidth_used_mb: new Gauge({
name: 'proxy_bandwidth_used_mb',
help: 'Bandwidth used in MB',
labelNames: ['provider'],
}),
// Histogramas
latency_seconds: new Histogram({
name: 'proxy_latency_seconds',
help: 'Proxy request latency',
labelNames: ['provider'],
buckets: [0.1, 0.5, 1, 2, 5, 10],
}),
success_rate: new Histogram({
name: 'proxy_success_rate',
help: 'Proxy success rate',
labelNames: ['provider'],
buckets: [0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 1.0],
}),
};
```
### 7.2 API de Estado
```typescript
// src/proxy/routes.ts
import { Router } from 'express';
import { ProxyPoolManager } from './pool-manager';
const router = Router();
const pool = new ProxyPoolManager();
// GET /api/proxies/status
router.get('/status', async (req, res) => {
const stats = await pool.getPoolStatus();
res.json({
overview: {
totalProxies: stats.total,
activeProxies: stats.active,
coolingProxies: stats.cooling,
blockedProxies: stats.blocked,
avgSuccessRate: stats.avgSuccessRate,
avgLatencyMs: stats.avgLatencyMs,
},
byProvider: stats.byProvider,
byType: stats.byType,
recentBlocks: stats.recentBlocks,
bandwidthUsage: stats.bandwidthUsage,
});
});
// GET /api/proxies/:id
router.get('/:id', async (req, res) => {
const proxy = await pool.getProxyDetails(req.params.id);
if (!proxy) {
return res.status(404).json({ error: 'Proxy not found' });
}
res.json(proxy);
});
// POST /api/proxies/:id/reset
router.post('/:id/reset', async (req, res) => {
await pool.resetProxyStats(req.params.id);
res.json({ success: true });
});
// POST /api/proxies/:id/cooldown
router.post('/:id/cooldown', async (req, res) => {
const { minutes = 30 } = req.body;
await pool.setCooldown(req.params.id, minutes);
res.json({ success: true });
});
export default router;
```
---
## 8. Costos y Presupuesto
```yaml
# config/proxy-budget.yml
monthly_budget:
total_usd: 500
allocation:
residential: 400 # 80%
datacenter: 50 # 10%
mobile: 50 # 10% (reserva)
alerts:
warning_threshold: 0.7 # 70% del budget
critical_threshold: 0.9 # 90% del budget
actions_on_limit:
warning:
- reduce_concurrency
- prefer_datacenter
critical:
- pause_non_essential
- alert_admin
cost_per_request:
inmuebles24: 0.02 # Sitio dificil
metros_cubicos: 0.01 # Sitio facil
vivanuncios: 0.015 # Sitio medio
```
---
## 9. Tests
```typescript
// src/proxy/__tests__/pool-manager.test.ts
import { ProxyPoolManager } from '../pool-manager';
import { Redis } from 'ioredis';
jest.mock('ioredis');
describe('ProxyPoolManager', () => {
let manager: ProxyPoolManager;
beforeEach(() => {
manager = new ProxyPoolManager();
});
describe('getProxy', () => {
it('should return a proxy for valid domain', async () => {
const result = await manager.getProxy({
targetDomain: 'inmuebles24.com',
});
expect(result.proxy).toBeDefined();
expect(result.proxy.host).toBeDefined();
});
it('should reuse sticky session when provided', async () => {
const first = await manager.getProxy({
targetDomain: 'test.com',
stickySession: true,
});
const second = await manager.getProxy({
targetDomain: 'test.com',
stickySession: true,
sessionId: first.sessionId,
});
expect(first.proxy.id).toBe(second.proxy.id);
});
});
describe('reportFailure', () => {
it('should put proxy in cooling after block', async () => {
const { proxy } = await manager.getProxy({
targetDomain: 'test.com',
});
await manager.reportFailure(proxy.id, 'test.com', new Error('403'), true);
// Verify proxy is in cooling for this domain
// ... assertions
});
});
});
```
---
**Anterior:** [ET-IA-007-etl.md](./ET-IA-007-etl.md)
**Siguiente:** [ET-IA-007-monitoring.md](./ET-IA-007-monitoring.md)