inmobiliaria-analytics/docs/01-fase-alcance-inicial/IAI-007-webscraper/especificaciones/ET-SCR-003-proxies.md
rckrdmrd f570727617 feat: Documentation and orchestration updates
🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-07 05:35:40 -06:00

31 KiB

id title type epic status version project created_date updated_date
ET-SCR-003 Especificacion Tecnica - Gestion de Pool de Proxies Technical Specification IAI-007 Draft 1.0 inmobiliaria-analytics 2026-01-04 2026-01-04

ET-SCR-003: Gestion de Pool de Proxies


1. Resumen

Sistema de gestion de proxies residenciales y datacenter para rotacion automatica, evitar bloqueos IP, y mantener tasas de exito altas en el scraping.


2. Arquitectura del Sistema de Proxies

┌─────────────────────────────────────────────────────────────┐
│                    PROXY MANAGER                             │
├─────────────────────────────────────────────────────────────┤
│                                                              │
│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐       │
│  │  Residential │  │  Datacenter  │  │   Mobile     │       │
│  │    Pool      │  │    Pool      │  │    Pool      │       │
│  │  (Premium)   │  │  (Backup)    │  │  (Reserved)  │       │
│  └──────┬───────┘  └──────┬───────┘  └──────┬───────┘       │
│         │                 │                 │                │
│         └────────────┬────┴────────────────┘                │
│                      │                                       │
│              ┌───────▼───────┐                              │
│              │   Selector    │                              │
│              │   Engine      │                              │
│              └───────┬───────┘                              │
│                      │                                       │
│    ┌─────────────────┼─────────────────┐                    │
│    │                 │                 │                    │
│    ▼                 ▼                 ▼                    │
│ ┌──────┐        ┌──────┐        ┌──────────┐               │
│ │Health│        │ Geo  │        │ Cooldown │               │
│ │Check │        │Filter│        │  Manager │               │
│ └──────┘        └──────┘        └──────────┘               │
│                                                              │
└─────────────────────────────────────────────────────────────┘
                           │
                           ▼
                    ┌─────────────┐
                    │   Browser   │
                    │   Manager   │
                    └─────────────┘

3. Proveedores de Proxies

3.1 Configuracion de Proveedores

# config/proxies.yml
providers:
  brightdata:
    type: residential
    priority: 1
    endpoint: "brd.superproxy.io"
    port: 22225
    username: "${BRIGHTDATA_USER}"
    password: "${BRIGHTDATA_PASS}"
    geo:
      country: "mx"
      city: "guadalajara"
    sticky_session: true
    session_duration: 600  # 10 minutos
    monthly_bandwidth: "100GB"
    cost_per_gb: 15  # USD

  smartproxy:
    type: residential
    priority: 2
    endpoint: "mx.smartproxy.com"
    port: 10000
    username: "${SMARTPROXY_USER}"
    password: "${SMARTPROXY_PASS}"
    geo:
      country: "mx"
    rotation: "per_request"
    monthly_bandwidth: "50GB"
    cost_per_gb: 12

  datacenter_pool:
    type: datacenter
    priority: 3
    proxies:
      - host: "proxy1.example.com"
        port: 3128
      - host: "proxy2.example.com"
        port: 3128
    auth:
      username: "${DC_PROXY_USER}"
      password: "${DC_PROXY_PASS}"
    cost_per_request: 0.001

settings:
  default_provider: "brightdata"
  fallback_chain: ["brightdata", "smartproxy", "datacenter_pool"]
  max_failures_before_switch: 3
  cooldown_after_block: 300  # 5 minutos
  health_check_interval: 60  # 1 minuto

3.2 Tipos de Proxy y Uso

Tipo Uso Principal Costo Tasa Exito
Residential Sitios con anti-bot agresivo Alto 95%+
Datacenter Sitios simples, backup Bajo 70-80%
Mobile Casos especiales, Cloudflare Muy Alto 98%+

4. Implementacion

4.1 Interfaz de Proxy

// src/proxy/types.ts
export interface ProxyConfig {
  host: string;
  port: number;
  username?: string;
  password?: string;
  protocol: 'http' | 'https' | 'socks5';
}

export interface ProxyWithMetadata extends ProxyConfig {
  id: string;
  provider: string;
  type: 'residential' | 'datacenter' | 'mobile';
  geo: {
    country: string;
    city?: string;
    region?: string;
  };

  // Metricas
  stats: ProxyStats;

  // Estado
  status: 'active' | 'cooling' | 'blocked' | 'inactive';
  lastUsed: Date | null;
  cooldownUntil: Date | null;
}

export interface ProxyStats {
  totalRequests: number;
  successfulRequests: number;
  failedRequests: number;
  blockedRequests: number;
  avgLatencyMs: number;
  bandwidthUsedMb: number;
  lastSuccess: Date | null;
  lastFailure: Date | null;
}

export interface ProxySelection {
  proxy: ProxyWithMetadata;
  sessionId?: string;
}

4.2 Proxy Pool Manager

// src/proxy/pool-manager.ts
import { Redis } from 'ioredis';
import { ProxyWithMetadata, ProxyConfig, ProxySelection } from './types';
import { ProxyHealthChecker } from './health-checker';
import { Logger } from '../utils/logger';

export class ProxyPoolManager {
  private redis: Redis;
  private healthChecker: ProxyHealthChecker;
  private logger: Logger;
  private providers: Map<string, ProxyProvider>;

  constructor() {
    this.redis = new Redis(process.env.REDIS_URL);
    this.healthChecker = new ProxyHealthChecker();
    this.logger = new Logger('ProxyPool');
    this.providers = new Map();

    this.initializeProviders();
  }

  private initializeProviders(): void {
    // Bright Data
    this.providers.set('brightdata', new BrightDataProvider({
      endpoint: process.env.BRIGHTDATA_ENDPOINT!,
      username: process.env.BRIGHTDATA_USER!,
      password: process.env.BRIGHTDATA_PASS!,
    }));

    // SmartProxy
    this.providers.set('smartproxy', new SmartProxyProvider({
      endpoint: process.env.SMARTPROXY_ENDPOINT!,
      username: process.env.SMARTPROXY_USER!,
      password: process.env.SMARTPROXY_PASS!,
    }));

    // Datacenter Pool
    this.providers.set('datacenter', new DatacenterProxyProvider({
      proxies: JSON.parse(process.env.DC_PROXIES || '[]'),
    }));
  }

  async getProxy(options: {
    targetDomain: string;
    preferredType?: 'residential' | 'datacenter' | 'mobile';
    requireFresh?: boolean;
    stickySession?: boolean;
    sessionId?: string;
  }): Promise<ProxySelection> {
    const { targetDomain, preferredType, requireFresh, stickySession, sessionId } = options;

    // 1. Si hay sesion sticky activa, reusar
    if (stickySession && sessionId) {
      const existingProxy = await this.getStickySession(sessionId);
      if (existingProxy) {
        return { proxy: existingProxy, sessionId };
      }
    }

    // 2. Obtener pool de candidatos
    const candidates = await this.getCandidates({
      domain: targetDomain,
      type: preferredType,
      excludeCooling: true,
      excludeBlocked: true,
    });

    if (candidates.length === 0) {
      throw new Error(`No proxies available for ${targetDomain}`);
    }

    // 3. Seleccionar mejor proxy
    const selected = this.selectBestProxy(candidates, {
      requireFresh,
      domain: targetDomain,
    });

    // 4. Crear sesion si es sticky
    let newSessionId = sessionId;
    if (stickySession) {
      newSessionId = await this.createStickySession(selected);
    }

    // 5. Marcar como usado
    await this.markUsed(selected.id);

    this.logger.debug(`Selected proxy ${selected.id} for ${targetDomain}`);

    return { proxy: selected, sessionId: newSessionId };
  }

  private async getCandidates(options: {
    domain: string;
    type?: string;
    excludeCooling: boolean;
    excludeBlocked: boolean;
  }): Promise<ProxyWithMetadata[]> {
    const allProxies = await this.getAllProxies();
    const now = new Date();

    return allProxies.filter(proxy => {
      // Filtrar por tipo
      if (options.type && proxy.type !== options.type) {
        return false;
      }

      // Excluir en cooling
      if (options.excludeCooling && proxy.status === 'cooling') {
        if (proxy.cooldownUntil && proxy.cooldownUntil > now) {
          return false;
        }
      }

      // Excluir bloqueados para este dominio
      if (options.excludeBlocked) {
        const blockKey = `proxy:blocked:${proxy.id}:${options.domain}`;
        // Check async - simplified here
      }

      return proxy.status === 'active';
    });
  }

  private selectBestProxy(
    candidates: ProxyWithMetadata[],
    options: { requireFresh?: boolean; domain: string }
  ): ProxyWithMetadata {
    // Scoring algorithm
    const scored = candidates.map(proxy => {
      let score = 100;

      // Penalizar por uso reciente
      if (proxy.lastUsed) {
        const minutesSinceUse = (Date.now() - proxy.lastUsed.getTime()) / 60000;
        if (minutesSinceUse < 5) {
          score -= (5 - minutesSinceUse) * 10;
        }
      }

      // Bonus por alta tasa de exito
      const successRate = proxy.stats.totalRequests > 0
        ? proxy.stats.successfulRequests / proxy.stats.totalRequests
        : 0.5;
      score += successRate * 20;

      // Penalizar por latencia alta
      if (proxy.stats.avgLatencyMs > 2000) {
        score -= 10;
      }

      // Bonus por tipo preferido
      if (proxy.type === 'residential') {
        score += 15;
      }

      // Penalizar si se requiere fresh y fue usado recientemente
      if (options.requireFresh && proxy.lastUsed) {
        const minutesSinceUse = (Date.now() - proxy.lastUsed.getTime()) / 60000;
        if (minutesSinceUse < 30) {
          score -= 50;
        }
      }

      return { proxy, score };
    });

    // Ordenar por score y agregar algo de randomizacion
    scored.sort((a, b) => b.score - a.score);

    // Seleccionar del top 3 aleatoriamente para evitar patrones
    const topN = scored.slice(0, Math.min(3, scored.length));
    const randomIndex = Math.floor(Math.random() * topN.length);

    return topN[randomIndex].proxy;
  }

  async reportSuccess(proxyId: string, domain: string, latencyMs: number): Promise<void> {
    const key = `proxy:stats:${proxyId}`;

    await this.redis.multi()
      .hincrby(key, 'totalRequests', 1)
      .hincrby(key, 'successfulRequests', 1)
      .hset(key, 'lastSuccess', Date.now().toString())
      .exec();

    // Actualizar latencia promedio
    await this.updateAvgLatency(proxyId, latencyMs);

    this.logger.debug(`Proxy ${proxyId} success on ${domain} (${latencyMs}ms)`);
  }

  async reportFailure(
    proxyId: string,
    domain: string,
    error: Error,
    isBlock: boolean = false
  ): Promise<void> {
    const key = `proxy:stats:${proxyId}`;

    await this.redis.multi()
      .hincrby(key, 'totalRequests', 1)
      .hincrby(key, 'failedRequests', 1)
      .hincrby(key, isBlock ? 'blockedRequests' : 'failedRequests', 1)
      .hset(key, 'lastFailure', Date.now().toString())
      .exec();

    if (isBlock) {
      await this.handleBlock(proxyId, domain);
    }

    this.logger.warn(`Proxy ${proxyId} failed on ${domain}: ${error.message}`);
  }

  private async handleBlock(proxyId: string, domain: string): Promise<void> {
    // Poner en cooling para este dominio
    const cooldownMinutes = 30;
    const cooldownUntil = Date.now() + (cooldownMinutes * 60 * 1000);

    await this.redis.set(
      `proxy:blocked:${proxyId}:${domain}`,
      cooldownUntil.toString(),
      'EX',
      cooldownMinutes * 60
    );

    // Verificar si esta bloqueado en multiples dominios
    const blockedDomains = await this.redis.keys(`proxy:blocked:${proxyId}:*`);

    if (blockedDomains.length >= 3) {
      // Marcar como cooling general
      await this.redis.hset(`proxy:${proxyId}`, 'status', 'cooling');
      await this.redis.hset(`proxy:${proxyId}`, 'cooldownUntil', (Date.now() + 3600000).toString());

      this.logger.warn(`Proxy ${proxyId} put in cooling (blocked on ${blockedDomains.length} domains)`);
    }
  }

  private async getStickySession(sessionId: string): Promise<ProxyWithMetadata | null> {
    const proxyId = await this.redis.get(`proxy:session:${sessionId}`);
    if (!proxyId) return null;

    return this.getProxyById(proxyId);
  }

  private async createStickySession(proxy: ProxyWithMetadata): Promise<string> {
    const sessionId = `sess_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;

    // Session dura 10 minutos
    await this.redis.setex(`proxy:session:${sessionId}`, 600, proxy.id);

    return sessionId;
  }

  private async markUsed(proxyId: string): Promise<void> {
    await this.redis.hset(`proxy:${proxyId}`, 'lastUsed', Date.now().toString());
  }

  private async updateAvgLatency(proxyId: string, latencyMs: number): Promise<void> {
    const key = `proxy:latency:${proxyId}`;

    // Rolling average de ultimas 100 requests
    await this.redis.lpush(key, latencyMs.toString());
    await this.redis.ltrim(key, 0, 99);

    const latencies = await this.redis.lrange(key, 0, -1);
    const avg = latencies.reduce((sum, l) => sum + parseInt(l), 0) / latencies.length;

    await this.redis.hset(`proxy:stats:${proxyId}`, 'avgLatencyMs', Math.round(avg).toString());
  }

  private async getAllProxies(): Promise<ProxyWithMetadata[]> {
    const keys = await this.redis.keys('proxy:*');
    const proxies: ProxyWithMetadata[] = [];

    for (const key of keys) {
      if (key.match(/^proxy:[a-z0-9]+$/)) {
        const data = await this.redis.hgetall(key);
        if (data.host) {
          proxies.push(this.parseProxyData(data));
        }
      }
    }

    return proxies;
  }

  private async getProxyById(id: string): Promise<ProxyWithMetadata | null> {
    const data = await this.redis.hgetall(`proxy:${id}`);
    if (!data.host) return null;
    return this.parseProxyData(data);
  }

  private parseProxyData(data: Record<string, string>): ProxyWithMetadata {
    return {
      id: data.id,
      host: data.host,
      port: parseInt(data.port),
      username: data.username,
      password: data.password,
      protocol: data.protocol as 'http' | 'https' | 'socks5',
      provider: data.provider,
      type: data.type as 'residential' | 'datacenter' | 'mobile',
      geo: JSON.parse(data.geo || '{}'),
      stats: JSON.parse(data.stats || '{}'),
      status: data.status as any,
      lastUsed: data.lastUsed ? new Date(parseInt(data.lastUsed)) : null,
      cooldownUntil: data.cooldownUntil ? new Date(parseInt(data.cooldownUntil)) : null,
    };
  }
}

4.3 Health Checker

// src/proxy/health-checker.ts
import { ProxyWithMetadata } from './types';
import fetch from 'node-fetch';
import { HttpsProxyAgent } from 'https-proxy-agent';

export class ProxyHealthChecker {
  private testUrls = [
    'https://httpbin.org/ip',
    'https://api.ipify.org?format=json',
    'https://www.google.com.mx',
  ];

  async checkProxy(proxy: ProxyWithMetadata): Promise<{
    healthy: boolean;
    latencyMs: number;
    detectedIp: string | null;
    error?: string;
  }> {
    const proxyUrl = this.buildProxyUrl(proxy);
    const agent = new HttpsProxyAgent(proxyUrl);

    const startTime = Date.now();

    try {
      const response = await fetch(this.testUrls[0], {
        agent,
        timeout: 10000,
        headers: {
          'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
        },
      });

      if (!response.ok) {
        return {
          healthy: false,
          latencyMs: Date.now() - startTime,
          detectedIp: null,
          error: `HTTP ${response.status}`,
        };
      }

      const data = await response.json() as { origin?: string; ip?: string };
      const detectedIp = data.origin || data.ip || null;

      return {
        healthy: true,
        latencyMs: Date.now() - startTime,
        detectedIp,
      };
    } catch (error) {
      return {
        healthy: false,
        latencyMs: Date.now() - startTime,
        detectedIp: null,
        error: (error as Error).message,
      };
    }
  }

  async checkBatch(proxies: ProxyWithMetadata[]): Promise<Map<string, boolean>> {
    const results = new Map<string, boolean>();

    // Check en paralelo con limite de concurrencia
    const concurrency = 10;
    const chunks = this.chunkArray(proxies, concurrency);

    for (const chunk of chunks) {
      const checks = chunk.map(async proxy => {
        const result = await this.checkProxy(proxy);
        results.set(proxy.id, result.healthy);
      });

      await Promise.all(checks);
    }

    return results;
  }

  private buildProxyUrl(proxy: ProxyWithMetadata): string {
    const auth = proxy.username && proxy.password
      ? `${proxy.username}:${proxy.password}@`
      : '';
    return `${proxy.protocol}://${auth}${proxy.host}:${proxy.port}`;
  }

  private chunkArray<T>(array: T[], size: number): T[][] {
    const chunks: T[][] = [];
    for (let i = 0; i < array.length; i += size) {
      chunks.push(array.slice(i, i + size));
    }
    return chunks;
  }
}

4.4 Bright Data Provider

// src/proxy/providers/brightdata.provider.ts
import { ProxyProvider, ProxyConfig } from '../types';

export class BrightDataProvider implements ProxyProvider {
  private config: {
    endpoint: string;
    username: string;
    password: string;
    zone?: string;
  };

  constructor(config: typeof this.config) {
    this.config = config;
  }

  getProxy(options?: {
    country?: string;
    city?: string;
    sessionId?: string;
    sticky?: boolean;
  }): ProxyConfig {
    // Construir username con opciones
    let username = this.config.username;

    if (options?.country) {
      username += `-country-${options.country}`;
    }
    if (options?.city) {
      username += `-city-${options.city}`;
    }
    if (options?.sticky && options?.sessionId) {
      username += `-session-${options.sessionId}`;
    }

    return {
      host: this.config.endpoint,
      port: 22225,
      username,
      password: this.config.password,
      protocol: 'http',
    };
  }

  async getResidentialProxy(options: {
    country: string;
    city?: string;
    sticky?: boolean;
  }): Promise<ProxyConfig> {
    const sessionId = options.sticky
      ? `sess_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`
      : undefined;

    return this.getProxy({
      country: options.country,
      city: options.city,
      sessionId,
      sticky: options.sticky,
    });
  }

  async getMobileProxy(options: {
    country: string;
    carrier?: string;
  }): Promise<ProxyConfig> {
    let username = `${this.config.username}-zone-mobile-country-${options.country}`;

    if (options.carrier) {
      username += `-carrier-${options.carrier}`;
    }

    return {
      host: this.config.endpoint,
      port: 22225,
      username,
      password: this.config.password,
      protocol: 'http',
    };
  }
}

5. Rotacion Inteligente

5.1 Estrategias de Rotacion

// src/proxy/rotation-strategies.ts
import { ProxyWithMetadata, ProxySelection } from './types';
import { ProxyPoolManager } from './pool-manager';

export interface RotationStrategy {
  name: string;
  selectProxy(
    pool: ProxyPoolManager,
    context: RotationContext
  ): Promise<ProxySelection>;
}

export interface RotationContext {
  domain: string;
  requestCount: number;
  lastProxy?: ProxyWithMetadata;
  sessionStart?: Date;
}

// Estrategia: Rotar cada N requests
export class EveryNRequestsStrategy implements RotationStrategy {
  name = 'every_n_requests';
  private n: number;

  constructor(n: number = 10) {
    this.n = n;
  }

  async selectProxy(
    pool: ProxyPoolManager,
    context: RotationContext
  ): Promise<ProxySelection> {
    const shouldRotate = context.requestCount % this.n === 0;

    if (!shouldRotate && context.lastProxy) {
      return { proxy: context.lastProxy };
    }

    return pool.getProxy({
      targetDomain: context.domain,
      requireFresh: true,
    });
  }
}

// Estrategia: Rotar por tiempo
export class TimeBasedStrategy implements RotationStrategy {
  name = 'time_based';
  private intervalMs: number;

  constructor(intervalMinutes: number = 10) {
    this.intervalMs = intervalMinutes * 60 * 1000;
  }

  async selectProxy(
    pool: ProxyPoolManager,
    context: RotationContext
  ): Promise<ProxySelection> {
    const elapsed = context.sessionStart
      ? Date.now() - context.sessionStart.getTime()
      : Infinity;

    if (elapsed < this.intervalMs && context.lastProxy) {
      return { proxy: context.lastProxy };
    }

    return pool.getProxy({
      targetDomain: context.domain,
      stickySession: true,
    });
  }
}

// Estrategia: Round Robin ponderado
export class WeightedRoundRobinStrategy implements RotationStrategy {
  name = 'weighted_round_robin';
  private currentIndex = 0;

  async selectProxy(
    pool: ProxyPoolManager,
    context: RotationContext
  ): Promise<ProxySelection> {
    // Implementar round robin con pesos basados en success rate
    return pool.getProxy({
      targetDomain: context.domain,
    });
  }
}

// Estrategia: Adaptativa basada en respuestas
export class AdaptiveStrategy implements RotationStrategy {
  name = 'adaptive';
  private failureThreshold = 2;
  private consecutiveFailures = 0;

  async selectProxy(
    pool: ProxyPoolManager,
    context: RotationContext
  ): Promise<ProxySelection> {
    // Si hay muchos fallos consecutivos, forzar rotacion
    if (this.consecutiveFailures >= this.failureThreshold) {
      this.consecutiveFailures = 0;
      return pool.getProxy({
        targetDomain: context.domain,
        requireFresh: true,
      });
    }

    // De lo contrario, mantener proxy actual si existe
    if (context.lastProxy) {
      return { proxy: context.lastProxy };
    }

    return pool.getProxy({
      targetDomain: context.domain,
      stickySession: true,
    });
  }

  recordSuccess(): void {
    this.consecutiveFailures = 0;
  }

  recordFailure(): void {
    this.consecutiveFailures++;
  }
}

6. Integracion con Playwright

// src/proxy/playwright-integration.ts
import { Browser, BrowserContext, Page } from 'playwright';
import { ProxyPoolManager } from './pool-manager';
import { ProxyWithMetadata } from './types';

export class PlaywrightProxyIntegration {
  private proxyPool: ProxyPoolManager;

  constructor() {
    this.proxyPool = new ProxyPoolManager();
  }

  async createContextWithProxy(
    browser: Browser,
    options: {
      domain: string;
      preferredType?: 'residential' | 'datacenter';
      userAgent?: string;
    }
  ): Promise<{
    context: BrowserContext;
    proxy: ProxyWithMetadata;
    sessionId: string;
  }> {
    const { proxy, sessionId } = await this.proxyPool.getProxy({
      targetDomain: options.domain,
      preferredType: options.preferredType,
      stickySession: true,
    });

    const context = await browser.newContext({
      proxy: {
        server: `${proxy.protocol}://${proxy.host}:${proxy.port}`,
        username: proxy.username,
        password: proxy.password,
      },
      userAgent: options.userAgent || this.getRandomUserAgent(),
      viewport: { width: 1920, height: 1080 },
      locale: 'es-MX',
      timezoneId: 'America/Mexico_City',
    });

    return { context, proxy, sessionId: sessionId! };
  }

  async wrapPageWithProxyHandling(
    page: Page,
    proxy: ProxyWithMetadata,
    domain: string
  ): Promise<Page> {
    // Interceptar errores de red para reportar al pool
    page.on('requestfailed', async (request) => {
      const failure = request.failure();
      if (failure) {
        const isBlock = this.isBlockError(failure.errorText);
        await this.proxyPool.reportFailure(
          proxy.id,
          domain,
          new Error(failure.errorText),
          isBlock
        );
      }
    });

    page.on('response', async (response) => {
      const status = response.status();

      if (status === 403 || status === 429 || status === 503) {
        await this.proxyPool.reportFailure(
          proxy.id,
          domain,
          new Error(`HTTP ${status}`),
          true
        );
      } else if (status >= 200 && status < 400) {
        const timing = response.request().timing();
        await this.proxyPool.reportSuccess(
          proxy.id,
          domain,
          timing.responseEnd - timing.requestStart
        );
      }
    });

    return page;
  }

  private isBlockError(errorText: string): boolean {
    const blockPatterns = [
      'net::ERR_PROXY_CONNECTION_FAILED',
      'net::ERR_TUNNEL_CONNECTION_FAILED',
      'Cloudflare',
      'Access Denied',
      'blocked',
    ];

    return blockPatterns.some(pattern =>
      errorText.toLowerCase().includes(pattern.toLowerCase())
    );
  }

  private getRandomUserAgent(): string {
    const userAgents = [
      'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
      'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
      'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0',
    ];

    return userAgents[Math.floor(Math.random() * userAgents.length)];
  }
}

7. Dashboard de Monitoreo

7.1 Metricas Prometheus

// src/proxy/metrics.ts
import { Counter, Gauge, Histogram } from 'prom-client';

export const proxyMetrics = {
  // Contadores
  requests_total: new Counter({
    name: 'proxy_requests_total',
    help: 'Total proxy requests',
    labelNames: ['provider', 'type', 'status'],
  }),

  blocks_total: new Counter({
    name: 'proxy_blocks_total',
    help: 'Total proxy blocks detected',
    labelNames: ['provider', 'domain'],
  }),

  rotations_total: new Counter({
    name: 'proxy_rotations_total',
    help: 'Total proxy rotations',
    labelNames: ['reason'],
  }),

  // Gauges
  active_proxies: new Gauge({
    name: 'proxy_active_count',
    help: 'Number of active proxies',
    labelNames: ['provider', 'type'],
  }),

  cooling_proxies: new Gauge({
    name: 'proxy_cooling_count',
    help: 'Number of proxies in cooling period',
    labelNames: ['provider'],
  }),

  bandwidth_used_mb: new Gauge({
    name: 'proxy_bandwidth_used_mb',
    help: 'Bandwidth used in MB',
    labelNames: ['provider'],
  }),

  // Histogramas
  latency_seconds: new Histogram({
    name: 'proxy_latency_seconds',
    help: 'Proxy request latency',
    labelNames: ['provider'],
    buckets: [0.1, 0.5, 1, 2, 5, 10],
  }),

  success_rate: new Histogram({
    name: 'proxy_success_rate',
    help: 'Proxy success rate',
    labelNames: ['provider'],
    buckets: [0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 1.0],
  }),
};

7.2 API de Estado

// src/proxy/routes.ts
import { Router } from 'express';
import { ProxyPoolManager } from './pool-manager';

const router = Router();
const pool = new ProxyPoolManager();

// GET /api/proxies/status
router.get('/status', async (req, res) => {
  const stats = await pool.getPoolStatus();

  res.json({
    overview: {
      totalProxies: stats.total,
      activeProxies: stats.active,
      coolingProxies: stats.cooling,
      blockedProxies: stats.blocked,
      avgSuccessRate: stats.avgSuccessRate,
      avgLatencyMs: stats.avgLatencyMs,
    },
    byProvider: stats.byProvider,
    byType: stats.byType,
    recentBlocks: stats.recentBlocks,
    bandwidthUsage: stats.bandwidthUsage,
  });
});

// GET /api/proxies/:id
router.get('/:id', async (req, res) => {
  const proxy = await pool.getProxyDetails(req.params.id);

  if (!proxy) {
    return res.status(404).json({ error: 'Proxy not found' });
  }

  res.json(proxy);
});

// POST /api/proxies/:id/reset
router.post('/:id/reset', async (req, res) => {
  await pool.resetProxyStats(req.params.id);
  res.json({ success: true });
});

// POST /api/proxies/:id/cooldown
router.post('/:id/cooldown', async (req, res) => {
  const { minutes = 30 } = req.body;
  await pool.setCooldown(req.params.id, minutes);
  res.json({ success: true });
});

export default router;

8. Costos y Presupuesto

# config/proxy-budget.yml
monthly_budget:
  total_usd: 500

  allocation:
    residential: 400  # 80%
    datacenter: 50    # 10%
    mobile: 50        # 10% (reserva)

  alerts:
    warning_threshold: 0.7   # 70% del budget
    critical_threshold: 0.9  # 90% del budget

  actions_on_limit:
    warning:
      - reduce_concurrency
      - prefer_datacenter
    critical:
      - pause_non_essential
      - alert_admin

  cost_per_request:
    inmuebles24: 0.02     # Sitio dificil
    metros_cubicos: 0.01  # Sitio facil
    vivanuncios: 0.015    # Sitio medio

9. Tests

// src/proxy/__tests__/pool-manager.test.ts
import { ProxyPoolManager } from '../pool-manager';
import { Redis } from 'ioredis';

jest.mock('ioredis');

describe('ProxyPoolManager', () => {
  let manager: ProxyPoolManager;

  beforeEach(() => {
    manager = new ProxyPoolManager();
  });

  describe('getProxy', () => {
    it('should return a proxy for valid domain', async () => {
      const result = await manager.getProxy({
        targetDomain: 'inmuebles24.com',
      });

      expect(result.proxy).toBeDefined();
      expect(result.proxy.host).toBeDefined();
    });

    it('should reuse sticky session when provided', async () => {
      const first = await manager.getProxy({
        targetDomain: 'test.com',
        stickySession: true,
      });

      const second = await manager.getProxy({
        targetDomain: 'test.com',
        stickySession: true,
        sessionId: first.sessionId,
      });

      expect(first.proxy.id).toBe(second.proxy.id);
    });
  });

  describe('reportFailure', () => {
    it('should put proxy in cooling after block', async () => {
      const { proxy } = await manager.getProxy({
        targetDomain: 'test.com',
      });

      await manager.reportFailure(proxy.id, 'test.com', new Error('403'), true);

      // Verify proxy is in cooling for this domain
      // ... assertions
    });
  });
});

Anterior: ET-IA-007-etl.md Siguiente: ET-IA-007-monitoring.md