miinventario-v2/docs/02-integraciones/INT-006-ia-provider.md
rckrdmrd 1a53b5c4d3 [MIINVENTARIO] feat: Initial commit - Sistema de inventario con análisis de video IA
- Backend NestJS con módulos de autenticación, inventario, créditos
- Frontend React con dashboard y componentes UI
- Base de datos PostgreSQL con migraciones
- Tests E2E configurados
- Configuración de Docker y deployment

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-13 02:25:48 -06:00

11 KiB

INT-006: Integracion IA Provider


id: INT-006 type: Integration status: Pendiente version: "1.0.0" created_date: 2026-01-10 updated_date: 2026-01-10 simco_version: "4.0.0"

Metadata

Campo Valor
ID INT-006
Servicio Proveedores de Vision IA
Proposito Deteccion e identificacion de productos
Criticidad P0
Estado Pendiente

1. Descripcion

Capa de abstraccion para integrar multiples proveedores de Vision AI que permiten detectar e identificar productos en frames de video.


2. Proveedores Soportados

Proveedor API Uso Costo Aprox
OpenAI GPT-4 Vision Deteccion + identificacion $0.01-0.03/imagen
Claude Claude 3 Vision Deteccion + identificacion $0.01-0.02/imagen
Google Cloud Vision Deteccion de objetos $0.0015/imagen
Custom Modelo propio Fine-tuned Variable

3. Arquitectura de Abstraccion

┌─────────────────────────────────────────────────────────────────┐
│                    IA PROVIDER ABSTRACTION                       │
├─────────────────────────────────────────────────────────────────┤
│                                                                  │
│  ┌─────────────────────────────────────────────────────────┐    │
│  │                  IAProviderService                       │    │
│  │  ─────────────────────────────────────────────────────  │    │
│  │  + detectProducts(frames: Buffer[]): DetectionResult[]  │    │
│  │  + getActiveProvider(): IAProvider                       │    │
│  │  + getCost(sessionId): COGSRecord                        │    │
│  │  + switchProvider(name: string): void                    │    │
│  └────────────────────────┬────────────────────────────────┘    │
│                           │                                      │
│            ┌──────────────┼──────────────┐                      │
│            ▼              ▼              ▼                      │
│      ┌──────────┐  ┌──────────┐  ┌──────────┐                  │
│      │  OpenAI  │  │  Claude  │  │  Google  │                  │
│      │ Adapter  │  │ Adapter  │  │ Adapter  │                  │
│      └──────────┘  └──────────┘  └──────────┘                  │
│                                                                  │
└─────────────────────────────────────────────────────────────────┘

4. Interface del Provider

interface IAProvider {
  name: string;

  // Detectar productos en frames
  detectProducts(frames: Buffer[], options?: DetectionOptions): Promise<Detection[]>;

  // Identificar un producto especifico
  identifyProduct(image: Buffer, detection: Detection): Promise<ProductMatch>;

  // Obtener costos
  getCostPerFrame(): number;
  getCostPerToken(): number;

  // Verificar disponibilidad
  isAvailable(): Promise<boolean>;
}

interface Detection {
  frameNumber: number;
  boundingBox: {
    x: number;
    y: number;
    width: number;
    height: number;
  };
  confidence: number;
  label?: string;
  embedding?: number[];
}

interface ProductMatch {
  productId: string | null;
  productName: string | null;
  confidence: number;
  isKnown: boolean;
  alternatives?: Array<{
    productId: string;
    productName: string;
    confidence: number;
  }>;
}

5. Implementacion OpenAI

Configuracion

OPENAI_API_KEY=sk-...
OPENAI_MODEL=gpt-4-vision-preview
OPENAI_MAX_TOKENS=1000

Adapter

@Injectable()
export class OpenAIAdapter implements IAProvider {
  name = 'openai';
  private client: OpenAI;

  constructor() {
    this.client = new OpenAI({
      apiKey: process.env.OPENAI_API_KEY,
    });
  }

  async detectProducts(frames: Buffer[]): Promise<Detection[]> {
    const detections: Detection[] = [];

    for (let i = 0; i < frames.length; i++) {
      const base64 = frames[i].toString('base64');

      const response = await this.client.chat.completions.create({
        model: process.env.OPENAI_MODEL,
        max_tokens: parseInt(process.env.OPENAI_MAX_TOKENS),
        messages: [
          {
            role: 'user',
            content: [
              {
                type: 'text',
                text: `Analyze this shelf image and detect all products.
                       Return a JSON array with detected products:
                       [{
                         "name": "product name",
                         "brand": "brand name",
                         "presentation": "size/type",
                         "quantity": estimated count,
                         "confidence": 0-1,
                         "boundingBox": {"x": %, "y": %, "width": %, "height": %}
                       }]`,
              },
              {
                type: 'image_url',
                image_url: {
                  url: `data:image/jpeg;base64,${base64}`,
                },
              },
            ],
          },
        ],
      });

      const content = response.choices[0].message.content;
      const products = JSON.parse(content);

      for (const product of products) {
        detections.push({
          frameNumber: i,
          boundingBox: product.boundingBox,
          confidence: product.confidence,
          label: `${product.brand} ${product.name} ${product.presentation}`,
        });
      }
    }

    return detections;
  }

  getCostPerFrame(): number {
    return 0.02; // USD aproximado
  }

  getCostPerToken(): number {
    return 0.00001;
  }

  async isAvailable(): Promise<boolean> {
    try {
      await this.client.models.retrieve('gpt-4-vision-preview');
      return true;
    } catch {
      return false;
    }
  }
}

6. Implementacion Claude

@Injectable()
export class ClaudeAdapter implements IAProvider {
  name = 'claude';
  private client: Anthropic;

  constructor() {
    this.client = new Anthropic({
      apiKey: process.env.ANTHROPIC_API_KEY,
    });
  }

  async detectProducts(frames: Buffer[]): Promise<Detection[]> {
    const detections: Detection[] = [];

    for (let i = 0; i < frames.length; i++) {
      const base64 = frames[i].toString('base64');

      const response = await this.client.messages.create({
        model: 'claude-3-opus-20240229',
        max_tokens: 1024,
        messages: [
          {
            role: 'user',
            content: [
              {
                type: 'image',
                source: {
                  type: 'base64',
                  media_type: 'image/jpeg',
                  data: base64,
                },
              },
              {
                type: 'text',
                text: 'Detect all products on this shelf. Return JSON array with name, brand, quantity, confidence (0-1).',
              },
            ],
          },
        ],
      });

      // Parse response similar to OpenAI
      const content = response.content[0].text;
      const products = JSON.parse(content);

      for (const product of products) {
        detections.push({
          frameNumber: i,
          boundingBox: product.boundingBox || { x: 0, y: 0, width: 100, height: 100 },
          confidence: product.confidence,
          label: `${product.brand} ${product.name}`,
        });
      }
    }

    return detections;
  }

  getCostPerFrame(): number {
    return 0.015;
  }
}

7. Servicio Principal

@Injectable()
export class IAProviderService {
  private providers: Map<string, IAProvider> = new Map();
  private activeProvider: string;

  constructor(
    private openai: OpenAIAdapter,
    private claude: ClaudeAdapter,
    private configService: ConfigService,
  ) {
    this.providers.set('openai', openai);
    this.providers.set('claude', claude);
    this.activeProvider = configService.get('IA_ACTIVE_PROVIDER', 'openai');
  }

  async detectProducts(frames: Buffer[]): Promise<Detection[]> {
    const provider = this.getActiveProvider();
    return provider.detectProducts(frames);
  }

  getActiveProvider(): IAProvider {
    const provider = this.providers.get(this.activeProvider);
    if (!provider) {
      throw new Error(`Provider ${this.activeProvider} not found`);
    }
    return provider;
  }

  async switchProvider(name: string): Promise<void> {
    const provider = this.providers.get(name);
    if (!provider) {
      throw new Error(`Provider ${name} not found`);
    }
    if (!(await provider.isAvailable())) {
      throw new Error(`Provider ${name} is not available`);
    }
    this.activeProvider = name;
  }

  calculateCOGS(framesProcessed: number, tokensUsed: number): number {
    const provider = this.getActiveProvider();
    return (
      framesProcessed * provider.getCostPerFrame() +
      tokensUsed * provider.getCostPerToken()
    );
  }
}

8. Prompt Engineering

Prompt para Deteccion

You are an expert product recognition system for Mexican convenience stores.
Analyze this shelf image and detect ALL visible products.

For each product, provide:
1. Brand name (e.g., "Coca-Cola", "Sabritas", "Bimbo")
2. Product name (e.g., "Original", "Refresco", "Pan Blanco")
3. Presentation/Size (e.g., "600ml", "45g", "680g")
4. Quantity visible (count of units)
5. Confidence score (0.0 to 1.0)

Return ONLY valid JSON array. No explanations.

Example output:
[
  {"brand": "Coca-Cola", "name": "Original", "presentation": "600ml", "quantity": 12, "confidence": 0.95},
  {"brand": "Sabritas", "name": "Original", "presentation": "45g", "quantity": 8, "confidence": 0.88}
]

9. Testing

Mock Provider

@Injectable()
export class MockIAAdapter implements IAProvider {
  name = 'mock';

  async detectProducts(frames: Buffer[]): Promise<Detection[]> {
    return [
      {
        frameNumber: 0,
        boundingBox: { x: 10, y: 10, width: 20, height: 30 },
        confidence: 0.95,
        label: 'Coca-Cola 600ml',
      },
    ];
  }

  getCostPerFrame(): number {
    return 0;
  }

  async isAvailable(): Promise<boolean> {
    return true;
  }
}

10. Referencias


Ultima Actualizacion: 2026-01-10