🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1389 lines
38 KiB
Markdown
1389 lines
38 KiB
Markdown
---
|
|
id: "ET-SCR-002"
|
|
title: "Especificacion Tecnica - Pipeline ETL y Normalizacion"
|
|
type: "Technical Specification"
|
|
epic: "IAI-007"
|
|
status: "Draft"
|
|
version: "1.0"
|
|
project: "inmobiliaria-analytics"
|
|
created_date: "2026-01-04"
|
|
updated_date: "2026-01-04"
|
|
---
|
|
|
|
# ET-SCR-002: Pipeline ETL y Normalizacion
|
|
|
|
---
|
|
|
|
## 1. Resumen
|
|
|
|
Pipeline de Extract-Transform-Load para procesar datos crudos de propiedades scrapeadas, normalizarlos a un esquema unificado, enriquecerlos con geocoding y detectar duplicados.
|
|
|
|
---
|
|
|
|
## 2. Arquitectura del Pipeline
|
|
|
|
```
|
|
┌──────────────┐ ┌──────────────┐ ┌──────────────┐
|
|
│ EXTRACT │────▶│ TRANSFORM │────▶│ LOAD │
|
|
│ │ │ │ │ │
|
|
│ - Raw HTML │ │ - Parse │ │ - Validate │
|
|
│ - JSON APIs │ │ - Normalize │ │ - Dedupe │
|
|
│ - Sitemap │ │ - Geocode │ │ - Upsert │
|
|
└──────────────┘ └──────────────┘ └──────────────┘
|
|
│ │ │
|
|
▼ ▼ ▼
|
|
┌──────────────┐ ┌──────────────┐ ┌──────────────┐
|
|
│ staging │ │ enriched │ │ properties │
|
|
│ _raw │ │ _staging │ │ (final) │
|
|
└──────────────┘ └──────────────┘ └──────────────┘
|
|
```
|
|
|
|
---
|
|
|
|
## 3. Esquema de Datos
|
|
|
|
### 3.1 Raw Data (Entrada)
|
|
|
|
```typescript
|
|
interface RawPropertyData {
|
|
source: string;
|
|
sourceId: string;
|
|
sourceUrl: string;
|
|
scrapedAt: Date;
|
|
rawHtml?: string;
|
|
rawJson?: Record<string, unknown>;
|
|
|
|
// Campos extraidos (pueden variar por fuente)
|
|
titulo?: string;
|
|
precio?: string;
|
|
ubicacion?: string;
|
|
superficie?: string;
|
|
recamaras?: string;
|
|
banos?: string;
|
|
descripcion?: string;
|
|
imagenes?: string[];
|
|
amenidades?: string[];
|
|
contacto?: {
|
|
nombre?: string;
|
|
telefono?: string;
|
|
email?: string;
|
|
};
|
|
}
|
|
```
|
|
|
|
### 3.2 Normalized Data (Salida)
|
|
|
|
```typescript
|
|
interface NormalizedProperty {
|
|
// Identificadores
|
|
id: string; // UUID interno
|
|
sourceId: string;
|
|
source: string;
|
|
sourceUrl: string;
|
|
|
|
// Informacion basica
|
|
title: string;
|
|
description: string;
|
|
propertyType: PropertyType;
|
|
transactionType: TransactionType;
|
|
|
|
// Precios
|
|
price: number;
|
|
currency: 'MXN' | 'USD';
|
|
pricePerSqm: number | null;
|
|
|
|
// Superficie
|
|
landArea: number | null; // m2 terreno
|
|
constructedArea: number | null; // m2 construccion
|
|
|
|
// Caracteristicas
|
|
bedrooms: number | null;
|
|
bathrooms: number | null;
|
|
parkingSpaces: number | null;
|
|
floors: number | null;
|
|
yearBuilt: number | null;
|
|
|
|
// Ubicacion
|
|
location: {
|
|
rawAddress: string;
|
|
street: string | null;
|
|
neighborhood: string; // colonia
|
|
municipality: string; // municipio
|
|
state: string;
|
|
postalCode: string | null;
|
|
country: string;
|
|
coordinates: {
|
|
lat: number;
|
|
lng: number;
|
|
} | null;
|
|
geocodeConfidence: number;
|
|
};
|
|
|
|
// Media
|
|
images: PropertyImage[];
|
|
virtualTour: string | null;
|
|
video: string | null;
|
|
|
|
// Amenidades
|
|
amenities: string[];
|
|
|
|
// Contacto
|
|
agent: {
|
|
name: string | null;
|
|
phone: string | null;
|
|
email: string | null;
|
|
agency: string | null;
|
|
};
|
|
|
|
// Metadata
|
|
firstSeenAt: Date;
|
|
lastSeenAt: Date;
|
|
publishedAt: Date | null;
|
|
status: PropertyStatus;
|
|
|
|
// Calidad de datos
|
|
dataQuality: {
|
|
score: number; // 0-100
|
|
missingFields: string[];
|
|
warnings: string[];
|
|
};
|
|
}
|
|
|
|
enum PropertyType {
|
|
CASA = 'casa',
|
|
DEPARTAMENTO = 'departamento',
|
|
TERRENO = 'terreno',
|
|
LOCAL_COMERCIAL = 'local_comercial',
|
|
OFICINA = 'oficina',
|
|
BODEGA = 'bodega',
|
|
EDIFICIO = 'edificio',
|
|
OTRO = 'otro'
|
|
}
|
|
|
|
enum TransactionType {
|
|
VENTA = 'venta',
|
|
RENTA = 'renta',
|
|
TRASPASO = 'traspaso'
|
|
}
|
|
|
|
enum PropertyStatus {
|
|
ACTIVE = 'active',
|
|
SOLD = 'sold',
|
|
RENTED = 'rented',
|
|
INACTIVE = 'inactive',
|
|
REMOVED = 'removed'
|
|
}
|
|
|
|
interface PropertyImage {
|
|
url: string;
|
|
thumbnailUrl: string | null;
|
|
order: number;
|
|
isMain: boolean;
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## 4. Implementacion del Pipeline
|
|
|
|
### 4.1 Extractor Base
|
|
|
|
```typescript
|
|
// src/etl/extractors/base.extractor.ts
|
|
import { RawPropertyData } from '../types';
|
|
|
|
export abstract class BaseExtractor {
|
|
abstract source: string;
|
|
|
|
abstract extractFromHtml(html: string, url: string): Partial<RawPropertyData>;
|
|
abstract extractFromJson(json: unknown, url: string): Partial<RawPropertyData>;
|
|
|
|
protected cleanText(text: string | null | undefined): string {
|
|
if (!text) return '';
|
|
return text
|
|
.replace(/\s+/g, ' ')
|
|
.replace(/[\n\r\t]/g, ' ')
|
|
.trim();
|
|
}
|
|
|
|
protected extractNumbers(text: string): number[] {
|
|
const matches = text.match(/[\d,]+(\.\d+)?/g) || [];
|
|
return matches.map(m => parseFloat(m.replace(/,/g, '')));
|
|
}
|
|
}
|
|
```
|
|
|
|
### 4.2 Extractor Inmuebles24
|
|
|
|
```typescript
|
|
// src/etl/extractors/inmuebles24.extractor.ts
|
|
import * as cheerio from 'cheerio';
|
|
import { BaseExtractor } from './base.extractor';
|
|
import { RawPropertyData } from '../types';
|
|
|
|
export class Inmuebles24Extractor extends BaseExtractor {
|
|
source = 'inmuebles24';
|
|
|
|
extractFromHtml(html: string, url: string): Partial<RawPropertyData> {
|
|
const $ = cheerio.load(html);
|
|
|
|
return {
|
|
source: this.source,
|
|
sourceUrl: url,
|
|
sourceId: this.extractSourceId(url),
|
|
titulo: this.cleanText($('h1.title-type-sup').text()),
|
|
precio: this.cleanText($('.price-value').text()),
|
|
ubicacion: this.cleanText($('.location-container').text()),
|
|
|
|
superficie: this.extractSuperficie($),
|
|
recamaras: this.extractFeature($, 'recamaras'),
|
|
banos: this.extractFeature($, 'banos'),
|
|
|
|
descripcion: this.cleanText($('.description-content').text()),
|
|
|
|
imagenes: this.extractImages($),
|
|
amenidades: this.extractAmenidades($),
|
|
|
|
contacto: {
|
|
nombre: this.cleanText($('.publisher-name').text()),
|
|
telefono: $('[data-phone]').attr('data-phone') || null,
|
|
},
|
|
};
|
|
}
|
|
|
|
extractFromJson(json: any, url: string): Partial<RawPropertyData> {
|
|
// Procesar JSON-LD o APIs internas
|
|
if (json['@type'] === 'RealEstateListing') {
|
|
return {
|
|
source: this.source,
|
|
sourceUrl: url,
|
|
sourceId: json.identifier,
|
|
titulo: json.name,
|
|
precio: json.offers?.price?.toString(),
|
|
// ... mapear resto de campos
|
|
};
|
|
}
|
|
return {};
|
|
}
|
|
|
|
private extractSourceId(url: string): string {
|
|
const match = url.match(/propiedades\/(\d+)/);
|
|
return match ? match[1] : '';
|
|
}
|
|
|
|
private extractSuperficie($: cheerio.CheerioAPI): string {
|
|
const container = $('.surface-container').text();
|
|
return this.cleanText(container);
|
|
}
|
|
|
|
private extractFeature($: cheerio.CheerioAPI, feature: string): string {
|
|
const el = $(`.feature-${feature}`).text();
|
|
return this.cleanText(el);
|
|
}
|
|
|
|
private extractImages($: cheerio.CheerioAPI): string[] {
|
|
const images: string[] = [];
|
|
$('img.gallery-image').each((_, el) => {
|
|
const src = $(el).attr('src') || $(el).attr('data-src');
|
|
if (src) images.push(src);
|
|
});
|
|
return images;
|
|
}
|
|
|
|
private extractAmenidades($: cheerio.CheerioAPI): string[] {
|
|
const amenities: string[] = [];
|
|
$('.amenity-item').each((_, el) => {
|
|
amenities.push(this.cleanText($(el).text()));
|
|
});
|
|
return amenities;
|
|
}
|
|
}
|
|
```
|
|
|
|
### 4.3 Transformador/Normalizador
|
|
|
|
```typescript
|
|
// src/etl/transformers/normalizer.ts
|
|
import { RawPropertyData, NormalizedProperty, PropertyType, TransactionType } from '../types';
|
|
import { GeocodingService } from '../services/geocoding.service';
|
|
|
|
export class PropertyNormalizer {
|
|
constructor(private geocoder: GeocodingService) {}
|
|
|
|
async normalize(raw: RawPropertyData): Promise<NormalizedProperty> {
|
|
const price = this.parsePrice(raw.precio);
|
|
const areas = this.parseAreas(raw.superficie);
|
|
const location = await this.normalizeLocation(raw.ubicacion);
|
|
|
|
const normalized: NormalizedProperty = {
|
|
id: this.generateId(raw),
|
|
sourceId: raw.sourceId,
|
|
source: raw.source,
|
|
sourceUrl: raw.sourceUrl,
|
|
|
|
title: this.normalizeTitle(raw.titulo),
|
|
description: raw.descripcion || '',
|
|
propertyType: this.detectPropertyType(raw),
|
|
transactionType: this.detectTransactionType(raw),
|
|
|
|
price: price.amount,
|
|
currency: price.currency,
|
|
pricePerSqm: areas.constructed
|
|
? Math.round(price.amount / areas.constructed)
|
|
: null,
|
|
|
|
landArea: areas.land,
|
|
constructedArea: areas.constructed,
|
|
|
|
bedrooms: this.parseNumber(raw.recamaras),
|
|
bathrooms: this.parseNumber(raw.banos),
|
|
parkingSpaces: this.extractParkingSpaces(raw),
|
|
floors: null,
|
|
yearBuilt: null,
|
|
|
|
location,
|
|
|
|
images: this.normalizeImages(raw.imagenes),
|
|
virtualTour: null,
|
|
video: null,
|
|
|
|
amenities: this.normalizeAmenities(raw.amenidades),
|
|
|
|
agent: {
|
|
name: raw.contacto?.nombre || null,
|
|
phone: this.normalizePhone(raw.contacto?.telefono),
|
|
email: raw.contacto?.email || null,
|
|
agency: null,
|
|
},
|
|
|
|
firstSeenAt: raw.scrapedAt,
|
|
lastSeenAt: raw.scrapedAt,
|
|
publishedAt: null,
|
|
status: 'active',
|
|
|
|
dataQuality: this.calculateDataQuality(raw),
|
|
};
|
|
|
|
return normalized;
|
|
}
|
|
|
|
private parsePrice(priceStr?: string): { amount: number; currency: 'MXN' | 'USD' } {
|
|
if (!priceStr) return { amount: 0, currency: 'MXN' };
|
|
|
|
const currency = priceStr.includes('USD') || priceStr.includes('$') && priceStr.includes('dll')
|
|
? 'USD' : 'MXN';
|
|
|
|
const cleaned = priceStr.replace(/[^\d.]/g, '');
|
|
const amount = parseFloat(cleaned) || 0;
|
|
|
|
return { amount, currency };
|
|
}
|
|
|
|
private parseAreas(superficieStr?: string): { land: number | null; constructed: number | null } {
|
|
if (!superficieStr) return { land: null, constructed: null };
|
|
|
|
const result = { land: null as number | null, constructed: null as number | null };
|
|
|
|
// Buscar patrones como "180 m2 construccion" o "250 m2 terreno"
|
|
const constMatch = superficieStr.match(/(\d+(?:\.\d+)?)\s*m[2²]?\s*(const|constr)/i);
|
|
const landMatch = superficieStr.match(/(\d+(?:\.\d+)?)\s*m[2²]?\s*(terr|lote)/i);
|
|
|
|
if (constMatch) result.constructed = parseFloat(constMatch[1]);
|
|
if (landMatch) result.land = parseFloat(landMatch[1]);
|
|
|
|
// Si solo hay un numero, asumir es area construida para casas/deptos
|
|
if (!result.constructed && !result.land) {
|
|
const numbers = superficieStr.match(/(\d+(?:\.\d+)?)/g);
|
|
if (numbers && numbers.length === 1) {
|
|
result.constructed = parseFloat(numbers[0]);
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
private async normalizeLocation(rawAddress?: string): Promise<NormalizedProperty['location']> {
|
|
const defaultLocation = {
|
|
rawAddress: rawAddress || '',
|
|
street: null,
|
|
neighborhood: '',
|
|
municipality: '',
|
|
state: 'Jalisco',
|
|
postalCode: null,
|
|
country: 'Mexico',
|
|
coordinates: null,
|
|
geocodeConfidence: 0,
|
|
};
|
|
|
|
if (!rawAddress) return defaultLocation;
|
|
|
|
try {
|
|
const geocoded = await this.geocoder.geocode(rawAddress);
|
|
|
|
return {
|
|
rawAddress,
|
|
street: geocoded.street,
|
|
neighborhood: geocoded.neighborhood || this.extractColonia(rawAddress),
|
|
municipality: geocoded.municipality || 'Guadalajara',
|
|
state: geocoded.state || 'Jalisco',
|
|
postalCode: geocoded.postalCode,
|
|
country: 'Mexico',
|
|
coordinates: geocoded.coordinates,
|
|
geocodeConfidence: geocoded.confidence,
|
|
};
|
|
} catch (error) {
|
|
// Fallback: parsing manual
|
|
return {
|
|
...defaultLocation,
|
|
neighborhood: this.extractColonia(rawAddress),
|
|
municipality: this.extractMunicipio(rawAddress),
|
|
};
|
|
}
|
|
}
|
|
|
|
private extractColonia(address: string): string {
|
|
// Patrones comunes: "Col. Providencia", "Colonia Americana"
|
|
const match = address.match(/(?:col\.?|colonia)\s+([^,]+)/i);
|
|
return match ? match[1].trim() : '';
|
|
}
|
|
|
|
private extractMunicipio(address: string): string {
|
|
const municipios = [
|
|
'Guadalajara', 'Zapopan', 'Tlaquepaque', 'Tonala',
|
|
'Tlajomulco', 'El Salto', 'Ixtlahuacan'
|
|
];
|
|
|
|
for (const mun of municipios) {
|
|
if (address.toLowerCase().includes(mun.toLowerCase())) {
|
|
return mun;
|
|
}
|
|
}
|
|
return '';
|
|
}
|
|
|
|
private detectPropertyType(raw: RawPropertyData): PropertyType {
|
|
const text = `${raw.titulo} ${raw.descripcion}`.toLowerCase();
|
|
|
|
if (text.includes('departamento') || text.includes('depto')) {
|
|
return PropertyType.DEPARTAMENTO;
|
|
}
|
|
if (text.includes('casa')) {
|
|
return PropertyType.CASA;
|
|
}
|
|
if (text.includes('terreno') || text.includes('lote')) {
|
|
return PropertyType.TERRENO;
|
|
}
|
|
if (text.includes('local') || text.includes('comercial')) {
|
|
return PropertyType.LOCAL_COMERCIAL;
|
|
}
|
|
if (text.includes('oficina')) {
|
|
return PropertyType.OFICINA;
|
|
}
|
|
if (text.includes('bodega')) {
|
|
return PropertyType.BODEGA;
|
|
}
|
|
|
|
return PropertyType.OTRO;
|
|
}
|
|
|
|
private detectTransactionType(raw: RawPropertyData): TransactionType {
|
|
const text = `${raw.titulo} ${raw.sourceUrl}`.toLowerCase();
|
|
|
|
if (text.includes('renta') || text.includes('alquiler')) {
|
|
return TransactionType.RENTA;
|
|
}
|
|
if (text.includes('traspaso')) {
|
|
return TransactionType.TRASPASO;
|
|
}
|
|
|
|
return TransactionType.VENTA;
|
|
}
|
|
|
|
private normalizePhone(phone?: string | null): string | null {
|
|
if (!phone) return null;
|
|
|
|
// Limpiar y formatear telefono mexicano
|
|
const cleaned = phone.replace(/\D/g, '');
|
|
|
|
if (cleaned.length === 10) {
|
|
return cleaned;
|
|
}
|
|
if (cleaned.length === 12 && cleaned.startsWith('52')) {
|
|
return cleaned.substring(2);
|
|
}
|
|
|
|
return cleaned || null;
|
|
}
|
|
|
|
private normalizeImages(images?: string[]): NormalizedProperty['images'] {
|
|
if (!images || images.length === 0) return [];
|
|
|
|
return images.map((url, index) => ({
|
|
url: this.normalizeImageUrl(url),
|
|
thumbnailUrl: this.generateThumbnailUrl(url),
|
|
order: index,
|
|
isMain: index === 0,
|
|
}));
|
|
}
|
|
|
|
private normalizeImageUrl(url: string): string {
|
|
// Asegurar HTTPS y limpiar parametros innecesarios
|
|
return url.replace(/^http:/, 'https:');
|
|
}
|
|
|
|
private generateThumbnailUrl(url: string): string {
|
|
// Generar URL de thumbnail (depende del CDN usado)
|
|
return url.replace('/images/', '/thumbnails/');
|
|
}
|
|
|
|
private normalizeAmenities(amenities?: string[]): string[] {
|
|
if (!amenities) return [];
|
|
|
|
const normalized = new Set<string>();
|
|
const mapping: Record<string, string> = {
|
|
'alberca': 'Alberca',
|
|
'piscina': 'Alberca',
|
|
'jardin': 'Jardin',
|
|
'gym': 'Gimnasio',
|
|
'gimnasio': 'Gimnasio',
|
|
'roof': 'Roof Garden',
|
|
'terraza': 'Terraza',
|
|
'seguridad': 'Seguridad 24/7',
|
|
'vigilancia': 'Seguridad 24/7',
|
|
'estacionamiento': 'Estacionamiento',
|
|
'cochera': 'Estacionamiento',
|
|
};
|
|
|
|
for (const amenity of amenities) {
|
|
const lower = amenity.toLowerCase().trim();
|
|
const key = Object.keys(mapping).find(k => lower.includes(k));
|
|
normalized.add(key ? mapping[key] : amenity);
|
|
}
|
|
|
|
return Array.from(normalized);
|
|
}
|
|
|
|
private parseNumber(str?: string): number | null {
|
|
if (!str) return null;
|
|
const num = parseInt(str.replace(/\D/g, ''));
|
|
return isNaN(num) ? null : num;
|
|
}
|
|
|
|
private extractParkingSpaces(raw: RawPropertyData): number | null {
|
|
const text = `${raw.descripcion} ${raw.amenidades?.join(' ')}`;
|
|
const match = text.match(/(\d+)\s*(estacionamiento|cochera|parking)/i);
|
|
return match ? parseInt(match[1]) : null;
|
|
}
|
|
|
|
private generateId(raw: RawPropertyData): string {
|
|
// Crear hash unico basado en source + sourceId
|
|
const crypto = require('crypto');
|
|
const input = `${raw.source}:${raw.sourceId}`;
|
|
return crypto.createHash('sha256').update(input).digest('hex').substring(0, 32);
|
|
}
|
|
|
|
private calculateDataQuality(raw: RawPropertyData): NormalizedProperty['dataQuality'] {
|
|
const requiredFields = ['titulo', 'precio', 'ubicacion', 'superficie'];
|
|
const optionalFields = ['recamaras', 'banos', 'descripcion', 'imagenes'];
|
|
|
|
const missingRequired = requiredFields.filter(f => !raw[f as keyof RawPropertyData]);
|
|
const missingOptional = optionalFields.filter(f => !raw[f as keyof RawPropertyData]);
|
|
|
|
const warnings: string[] = [];
|
|
|
|
// Validaciones
|
|
if (raw.precio && parseFloat(raw.precio.replace(/\D/g, '')) < 100000) {
|
|
warnings.push('Precio sospechosamente bajo');
|
|
}
|
|
if (raw.imagenes && raw.imagenes.length < 3) {
|
|
warnings.push('Pocas imagenes');
|
|
}
|
|
|
|
const score = Math.max(0, 100
|
|
- (missingRequired.length * 20)
|
|
- (missingOptional.length * 5)
|
|
- (warnings.length * 10)
|
|
);
|
|
|
|
return {
|
|
score,
|
|
missingFields: [...missingRequired, ...missingOptional],
|
|
warnings,
|
|
};
|
|
}
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## 5. Servicio de Geocoding
|
|
|
|
```typescript
|
|
// src/etl/services/geocoding.service.ts
|
|
import { Redis } from 'ioredis';
|
|
|
|
interface GeocodedResult {
|
|
street: string | null;
|
|
neighborhood: string | null;
|
|
municipality: string | null;
|
|
state: string | null;
|
|
postalCode: string | null;
|
|
coordinates: { lat: number; lng: number } | null;
|
|
confidence: number;
|
|
}
|
|
|
|
export class GeocodingService {
|
|
private redis: Redis;
|
|
private nominatimUrl = 'https://nominatim.openstreetmap.org/search';
|
|
private rateLimiter: { lastCall: number; minInterval: number };
|
|
|
|
constructor() {
|
|
this.redis = new Redis(process.env.REDIS_URL);
|
|
this.rateLimiter = { lastCall: 0, minInterval: 1100 }; // 1 req/sec for Nominatim
|
|
}
|
|
|
|
async geocode(address: string): Promise<GeocodedResult> {
|
|
// 1. Check cache
|
|
const cacheKey = `geocode:${this.hashAddress(address)}`;
|
|
const cached = await this.redis.get(cacheKey);
|
|
if (cached) {
|
|
return JSON.parse(cached);
|
|
}
|
|
|
|
// 2. Rate limiting
|
|
await this.enforceRateLimit();
|
|
|
|
// 3. Call geocoding API
|
|
const result = await this.callNominatim(address);
|
|
|
|
// 4. Cache result (30 days)
|
|
await this.redis.setex(cacheKey, 60 * 60 * 24 * 30, JSON.stringify(result));
|
|
|
|
return result;
|
|
}
|
|
|
|
private async callNominatim(address: string): Promise<GeocodedResult> {
|
|
const params = new URLSearchParams({
|
|
q: `${address}, Jalisco, Mexico`,
|
|
format: 'json',
|
|
addressdetails: '1',
|
|
limit: '1',
|
|
});
|
|
|
|
try {
|
|
const response = await fetch(`${this.nominatimUrl}?${params}`, {
|
|
headers: {
|
|
'User-Agent': 'InmobiliariaAnalytics/1.0',
|
|
},
|
|
});
|
|
|
|
const data = await response.json();
|
|
|
|
if (!data || data.length === 0) {
|
|
return this.emptyResult();
|
|
}
|
|
|
|
const result = data[0];
|
|
const addr = result.address || {};
|
|
|
|
return {
|
|
street: addr.road || addr.street || null,
|
|
neighborhood: addr.suburb || addr.neighbourhood || null,
|
|
municipality: addr.city || addr.town || addr.municipality || null,
|
|
state: addr.state || null,
|
|
postalCode: addr.postcode || null,
|
|
coordinates: {
|
|
lat: parseFloat(result.lat),
|
|
lng: parseFloat(result.lon),
|
|
},
|
|
confidence: this.calculateConfidence(result),
|
|
};
|
|
} catch (error) {
|
|
console.error('Geocoding error:', error);
|
|
return this.emptyResult();
|
|
}
|
|
}
|
|
|
|
private calculateConfidence(result: any): number {
|
|
// Basado en importance y type de Nominatim
|
|
const importance = result.importance || 0;
|
|
const type = result.type;
|
|
|
|
let confidence = importance * 100;
|
|
|
|
// Bonus por tipo preciso
|
|
if (type === 'house' || type === 'building') {
|
|
confidence = Math.min(100, confidence + 20);
|
|
}
|
|
|
|
return Math.round(confidence);
|
|
}
|
|
|
|
private async enforceRateLimit(): Promise<void> {
|
|
const now = Date.now();
|
|
const elapsed = now - this.rateLimiter.lastCall;
|
|
|
|
if (elapsed < this.rateLimiter.minInterval) {
|
|
await new Promise(resolve =>
|
|
setTimeout(resolve, this.rateLimiter.minInterval - elapsed)
|
|
);
|
|
}
|
|
|
|
this.rateLimiter.lastCall = Date.now();
|
|
}
|
|
|
|
private hashAddress(address: string): string {
|
|
const crypto = require('crypto');
|
|
return crypto.createHash('md5').update(address.toLowerCase().trim()).digest('hex');
|
|
}
|
|
|
|
private emptyResult(): GeocodedResult {
|
|
return {
|
|
street: null,
|
|
neighborhood: null,
|
|
municipality: null,
|
|
state: null,
|
|
postalCode: null,
|
|
coordinates: null,
|
|
confidence: 0,
|
|
};
|
|
}
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## 6. Detector de Duplicados
|
|
|
|
```typescript
|
|
// src/etl/services/deduplication.service.ts
|
|
import { Pool } from 'pg';
|
|
import { NormalizedProperty } from '../types';
|
|
|
|
interface DuplicateCandidate {
|
|
id: string;
|
|
similarity: number;
|
|
matchedFields: string[];
|
|
}
|
|
|
|
export class DeduplicationService {
|
|
private db: Pool;
|
|
|
|
constructor() {
|
|
this.db = new Pool({ connectionString: process.env.DATABASE_URL });
|
|
}
|
|
|
|
async findDuplicates(property: NormalizedProperty): Promise<DuplicateCandidate[]> {
|
|
const candidates: DuplicateCandidate[] = [];
|
|
|
|
// 1. Exacto por sourceId de otra fuente
|
|
const exactMatch = await this.findExactMatch(property);
|
|
if (exactMatch) {
|
|
candidates.push({ ...exactMatch, similarity: 1.0 });
|
|
}
|
|
|
|
// 2. Fuzzy matching por caracteristicas
|
|
const fuzzyMatches = await this.findFuzzyMatches(property);
|
|
candidates.push(...fuzzyMatches);
|
|
|
|
return candidates.sort((a, b) => b.similarity - a.similarity);
|
|
}
|
|
|
|
private async findExactMatch(property: NormalizedProperty): Promise<DuplicateCandidate | null> {
|
|
// Buscar misma propiedad de diferente fuente
|
|
const query = `
|
|
SELECT id, source, source_id, title, price,
|
|
ST_Distance(
|
|
coordinates::geography,
|
|
ST_SetSRID(ST_MakePoint($1, $2), 4326)::geography
|
|
) as distance_meters
|
|
FROM properties
|
|
WHERE source != $3
|
|
AND price BETWEEN $4 * 0.95 AND $4 * 1.05
|
|
AND property_type = $5
|
|
AND ST_DWithin(
|
|
coordinates::geography,
|
|
ST_SetSRID(ST_MakePoint($1, $2), 4326)::geography,
|
|
100 -- 100 metros
|
|
)
|
|
LIMIT 5
|
|
`;
|
|
|
|
if (!property.location.coordinates) return null;
|
|
|
|
const result = await this.db.query(query, [
|
|
property.location.coordinates.lng,
|
|
property.location.coordinates.lat,
|
|
property.source,
|
|
property.price,
|
|
property.propertyType,
|
|
]);
|
|
|
|
for (const row of result.rows) {
|
|
const titleSimilarity = this.calculateTextSimilarity(property.title, row.title);
|
|
if (titleSimilarity > 0.8 && row.distance_meters < 50) {
|
|
return {
|
|
id: row.id,
|
|
similarity: 0.95,
|
|
matchedFields: ['coordinates', 'price', 'title', 'property_type'],
|
|
};
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
private async findFuzzyMatches(property: NormalizedProperty): Promise<DuplicateCandidate[]> {
|
|
const query = `
|
|
SELECT id, title, price, bedrooms, bathrooms, constructed_area,
|
|
neighborhood, coordinates
|
|
FROM properties
|
|
WHERE source != $1
|
|
AND neighborhood = $2
|
|
AND property_type = $3
|
|
AND price BETWEEN $4 * 0.9 AND $4 * 1.1
|
|
AND status = 'active'
|
|
LIMIT 20
|
|
`;
|
|
|
|
const result = await this.db.query(query, [
|
|
property.source,
|
|
property.location.neighborhood,
|
|
property.propertyType,
|
|
property.price,
|
|
]);
|
|
|
|
const candidates: DuplicateCandidate[] = [];
|
|
|
|
for (const row of result.rows) {
|
|
const similarity = this.calculatePropertySimilarity(property, row);
|
|
|
|
if (similarity > 0.75) {
|
|
candidates.push({
|
|
id: row.id,
|
|
similarity,
|
|
matchedFields: this.getMatchedFields(property, row),
|
|
});
|
|
}
|
|
}
|
|
|
|
return candidates;
|
|
}
|
|
|
|
private calculatePropertySimilarity(prop: NormalizedProperty, candidate: any): number {
|
|
let score = 0;
|
|
let totalWeight = 0;
|
|
|
|
// Precio (peso 0.3)
|
|
const priceDiff = Math.abs(prop.price - candidate.price) / prop.price;
|
|
score += (1 - Math.min(priceDiff, 1)) * 0.3;
|
|
totalWeight += 0.3;
|
|
|
|
// Area (peso 0.25)
|
|
if (prop.constructedArea && candidate.constructed_area) {
|
|
const areaDiff = Math.abs(prop.constructedArea - candidate.constructed_area) / prop.constructedArea;
|
|
score += (1 - Math.min(areaDiff, 1)) * 0.25;
|
|
totalWeight += 0.25;
|
|
}
|
|
|
|
// Recamaras (peso 0.15)
|
|
if (prop.bedrooms !== null && candidate.bedrooms !== null) {
|
|
score += (prop.bedrooms === candidate.bedrooms ? 1 : 0) * 0.15;
|
|
totalWeight += 0.15;
|
|
}
|
|
|
|
// Banos (peso 0.15)
|
|
if (prop.bathrooms !== null && candidate.bathrooms !== null) {
|
|
score += (prop.bathrooms === candidate.bathrooms ? 1 : 0) * 0.15;
|
|
totalWeight += 0.15;
|
|
}
|
|
|
|
// Titulo (peso 0.15)
|
|
const titleSim = this.calculateTextSimilarity(prop.title, candidate.title);
|
|
score += titleSim * 0.15;
|
|
totalWeight += 0.15;
|
|
|
|
return totalWeight > 0 ? score / totalWeight : 0;
|
|
}
|
|
|
|
private calculateTextSimilarity(text1: string, text2: string): number {
|
|
// Jaccard similarity de palabras
|
|
const words1 = new Set(text1.toLowerCase().split(/\s+/));
|
|
const words2 = new Set(text2.toLowerCase().split(/\s+/));
|
|
|
|
const intersection = new Set([...words1].filter(x => words2.has(x)));
|
|
const union = new Set([...words1, ...words2]);
|
|
|
|
return intersection.size / union.size;
|
|
}
|
|
|
|
private getMatchedFields(prop: NormalizedProperty, candidate: any): string[] {
|
|
const matched: string[] = [];
|
|
|
|
if (Math.abs(prop.price - candidate.price) / prop.price < 0.05) {
|
|
matched.push('price');
|
|
}
|
|
if (prop.bedrooms === candidate.bedrooms) {
|
|
matched.push('bedrooms');
|
|
}
|
|
if (prop.bathrooms === candidate.bathrooms) {
|
|
matched.push('bathrooms');
|
|
}
|
|
if (prop.location.neighborhood === candidate.neighborhood) {
|
|
matched.push('neighborhood');
|
|
}
|
|
|
|
return matched;
|
|
}
|
|
|
|
async mergeProperties(
|
|
primaryId: string,
|
|
duplicateIds: string[]
|
|
): Promise<void> {
|
|
const client = await this.db.connect();
|
|
|
|
try {
|
|
await client.query('BEGIN');
|
|
|
|
// Crear registros en property_aliases
|
|
for (const dupId of duplicateIds) {
|
|
await client.query(`
|
|
INSERT INTO property_aliases (primary_id, alias_id, merged_at)
|
|
VALUES ($1, $2, NOW())
|
|
ON CONFLICT DO NOTHING
|
|
`, [primaryId, dupId]);
|
|
}
|
|
|
|
// Marcar duplicados como merged
|
|
await client.query(`
|
|
UPDATE properties
|
|
SET status = 'merged', merged_into = $1
|
|
WHERE id = ANY($2)
|
|
`, [primaryId, duplicateIds]);
|
|
|
|
await client.query('COMMIT');
|
|
} catch (error) {
|
|
await client.query('ROLLBACK');
|
|
throw error;
|
|
} finally {
|
|
client.release();
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## 7. Loader (Carga a Base de Datos)
|
|
|
|
```typescript
|
|
// src/etl/loaders/property.loader.ts
|
|
import { Pool } from 'pg';
|
|
import { NormalizedProperty } from '../types';
|
|
import { DeduplicationService } from '../services/deduplication.service';
|
|
|
|
export class PropertyLoader {
|
|
private db: Pool;
|
|
private deduper: DeduplicationService;
|
|
|
|
constructor() {
|
|
this.db = new Pool({ connectionString: process.env.DATABASE_URL });
|
|
this.deduper = new DeduplicationService();
|
|
}
|
|
|
|
async load(property: NormalizedProperty): Promise<{ action: 'inserted' | 'updated' | 'duplicate'; id: string }> {
|
|
// 1. Verificar si ya existe por source + sourceId
|
|
const existing = await this.findExisting(property.source, property.sourceId);
|
|
|
|
if (existing) {
|
|
await this.update(existing.id, property);
|
|
return { action: 'updated', id: existing.id };
|
|
}
|
|
|
|
// 2. Buscar duplicados de otras fuentes
|
|
const duplicates = await this.deduper.findDuplicates(property);
|
|
|
|
if (duplicates.length > 0 && duplicates[0].similarity > 0.9) {
|
|
// Es un duplicado, vincular a existente
|
|
await this.linkDuplicate(duplicates[0].id, property);
|
|
return { action: 'duplicate', id: duplicates[0].id };
|
|
}
|
|
|
|
// 3. Insertar nueva propiedad
|
|
const id = await this.insert(property);
|
|
return { action: 'inserted', id };
|
|
}
|
|
|
|
private async findExisting(source: string, sourceId: string): Promise<{ id: string } | null> {
|
|
const result = await this.db.query(
|
|
'SELECT id FROM properties WHERE source = $1 AND source_id = $2',
|
|
[source, sourceId]
|
|
);
|
|
return result.rows[0] || null;
|
|
}
|
|
|
|
private async insert(property: NormalizedProperty): Promise<string> {
|
|
const query = `
|
|
INSERT INTO properties (
|
|
id, source, source_id, source_url,
|
|
title, description, property_type, transaction_type,
|
|
price, currency, price_per_sqm,
|
|
land_area, constructed_area,
|
|
bedrooms, bathrooms, parking_spaces, floors, year_built,
|
|
raw_address, street, neighborhood, municipality, state, postal_code, country,
|
|
coordinates, geocode_confidence,
|
|
images, virtual_tour, video,
|
|
amenities,
|
|
agent_name, agent_phone, agent_email, agent_agency,
|
|
first_seen_at, last_seen_at, published_at, status,
|
|
data_quality_score, missing_fields, data_warnings
|
|
) VALUES (
|
|
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10,
|
|
$11, $12, $13, $14, $15, $16, $17, $18,
|
|
$19, $20, $21, $22, $23, $24, $25,
|
|
ST_SetSRID(ST_MakePoint($26, $27), 4326), $28,
|
|
$29, $30, $31, $32,
|
|
$33, $34, $35, $36,
|
|
$37, $38, $39, $40,
|
|
$41, $42, $43
|
|
)
|
|
RETURNING id
|
|
`;
|
|
|
|
const coords = property.location.coordinates;
|
|
|
|
const result = await this.db.query(query, [
|
|
property.id,
|
|
property.source,
|
|
property.sourceId,
|
|
property.sourceUrl,
|
|
property.title,
|
|
property.description,
|
|
property.propertyType,
|
|
property.transactionType,
|
|
property.price,
|
|
property.currency,
|
|
property.pricePerSqm,
|
|
property.landArea,
|
|
property.constructedArea,
|
|
property.bedrooms,
|
|
property.bathrooms,
|
|
property.parkingSpaces,
|
|
property.floors,
|
|
property.yearBuilt,
|
|
property.location.rawAddress,
|
|
property.location.street,
|
|
property.location.neighborhood,
|
|
property.location.municipality,
|
|
property.location.state,
|
|
property.location.postalCode,
|
|
property.location.country,
|
|
coords?.lng || null,
|
|
coords?.lat || null,
|
|
property.location.geocodeConfidence,
|
|
JSON.stringify(property.images),
|
|
property.virtualTour,
|
|
property.video,
|
|
property.amenities,
|
|
property.agent.name,
|
|
property.agent.phone,
|
|
property.agent.email,
|
|
property.agent.agency,
|
|
property.firstSeenAt,
|
|
property.lastSeenAt,
|
|
property.publishedAt,
|
|
property.status,
|
|
property.dataQuality.score,
|
|
property.dataQuality.missingFields,
|
|
property.dataQuality.warnings,
|
|
]);
|
|
|
|
return result.rows[0].id;
|
|
}
|
|
|
|
private async update(id: string, property: NormalizedProperty): Promise<void> {
|
|
const query = `
|
|
UPDATE properties SET
|
|
title = $2,
|
|
description = $3,
|
|
price = $4,
|
|
price_per_sqm = $5,
|
|
last_seen_at = NOW(),
|
|
data_quality_score = $6,
|
|
images = $7
|
|
WHERE id = $1
|
|
`;
|
|
|
|
await this.db.query(query, [
|
|
id,
|
|
property.title,
|
|
property.description,
|
|
property.price,
|
|
property.pricePerSqm,
|
|
property.dataQuality.score,
|
|
JSON.stringify(property.images),
|
|
]);
|
|
}
|
|
|
|
private async linkDuplicate(existingId: string, property: NormalizedProperty): Promise<void> {
|
|
// Registrar como alias
|
|
await this.db.query(`
|
|
INSERT INTO property_aliases (primary_id, alias_source, alias_source_id, alias_url)
|
|
VALUES ($1, $2, $3, $4)
|
|
ON CONFLICT DO NOTHING
|
|
`, [existingId, property.source, property.sourceId, property.sourceUrl]);
|
|
|
|
// Actualizar last_seen del principal
|
|
await this.db.query(`
|
|
UPDATE properties SET last_seen_at = NOW() WHERE id = $1
|
|
`, [existingId]);
|
|
}
|
|
}
|
|
```
|
|
|
|
---
|
|
|
|
## 8. Esquema de Base de Datos
|
|
|
|
```sql
|
|
-- Tabla principal de propiedades
|
|
CREATE TABLE properties (
|
|
id VARCHAR(32) PRIMARY KEY,
|
|
source VARCHAR(50) NOT NULL,
|
|
source_id VARCHAR(100) NOT NULL,
|
|
source_url TEXT NOT NULL,
|
|
|
|
title VARCHAR(500) NOT NULL,
|
|
description TEXT,
|
|
property_type VARCHAR(50) NOT NULL,
|
|
transaction_type VARCHAR(20) NOT NULL,
|
|
|
|
price DECIMAL(15,2) NOT NULL,
|
|
currency VARCHAR(3) DEFAULT 'MXN',
|
|
price_per_sqm DECIMAL(10,2),
|
|
|
|
land_area DECIMAL(10,2),
|
|
constructed_area DECIMAL(10,2),
|
|
|
|
bedrooms SMALLINT,
|
|
bathrooms DECIMAL(3,1),
|
|
parking_spaces SMALLINT,
|
|
floors SMALLINT,
|
|
year_built SMALLINT,
|
|
|
|
raw_address TEXT,
|
|
street VARCHAR(200),
|
|
neighborhood VARCHAR(100),
|
|
municipality VARCHAR(100),
|
|
state VARCHAR(50),
|
|
postal_code VARCHAR(10),
|
|
country VARCHAR(50) DEFAULT 'Mexico',
|
|
coordinates GEOMETRY(Point, 4326),
|
|
geocode_confidence SMALLINT,
|
|
|
|
images JSONB DEFAULT '[]',
|
|
virtual_tour TEXT,
|
|
video TEXT,
|
|
|
|
amenities TEXT[],
|
|
|
|
agent_name VARCHAR(200),
|
|
agent_phone VARCHAR(20),
|
|
agent_email VARCHAR(200),
|
|
agent_agency VARCHAR(200),
|
|
|
|
first_seen_at TIMESTAMP NOT NULL,
|
|
last_seen_at TIMESTAMP NOT NULL,
|
|
published_at TIMESTAMP,
|
|
status VARCHAR(20) DEFAULT 'active',
|
|
merged_into VARCHAR(32) REFERENCES properties(id),
|
|
|
|
data_quality_score SMALLINT,
|
|
missing_fields TEXT[],
|
|
data_warnings TEXT[],
|
|
|
|
created_at TIMESTAMP DEFAULT NOW(),
|
|
updated_at TIMESTAMP DEFAULT NOW(),
|
|
|
|
UNIQUE(source, source_id)
|
|
);
|
|
|
|
-- Indices
|
|
CREATE INDEX idx_properties_location ON properties USING GIST(coordinates);
|
|
CREATE INDEX idx_properties_neighborhood ON properties(neighborhood);
|
|
CREATE INDEX idx_properties_price ON properties(price);
|
|
CREATE INDEX idx_properties_type ON properties(property_type);
|
|
CREATE INDEX idx_properties_status ON properties(status);
|
|
CREATE INDEX idx_properties_last_seen ON properties(last_seen_at);
|
|
|
|
-- Tabla de aliases (propiedades duplicadas)
|
|
CREATE TABLE property_aliases (
|
|
id SERIAL PRIMARY KEY,
|
|
primary_id VARCHAR(32) REFERENCES properties(id),
|
|
alias_source VARCHAR(50) NOT NULL,
|
|
alias_source_id VARCHAR(100) NOT NULL,
|
|
alias_url TEXT,
|
|
merged_at TIMESTAMP DEFAULT NOW(),
|
|
|
|
UNIQUE(alias_source, alias_source_id)
|
|
);
|
|
|
|
-- Historial de precios
|
|
CREATE TABLE price_history (
|
|
id SERIAL PRIMARY KEY,
|
|
property_id VARCHAR(32) REFERENCES properties(id),
|
|
price DECIMAL(15,2) NOT NULL,
|
|
currency VARCHAR(3) NOT NULL,
|
|
recorded_at TIMESTAMP DEFAULT NOW()
|
|
);
|
|
|
|
CREATE INDEX idx_price_history_property ON price_history(property_id);
|
|
```
|
|
|
|
---
|
|
|
|
## 9. Tests
|
|
|
|
```typescript
|
|
// src/etl/__tests__/normalizer.test.ts
|
|
import { PropertyNormalizer } from '../transformers/normalizer';
|
|
import { GeocodingService } from '../services/geocoding.service';
|
|
|
|
jest.mock('../services/geocoding.service');
|
|
|
|
describe('PropertyNormalizer', () => {
|
|
let normalizer: PropertyNormalizer;
|
|
|
|
beforeEach(() => {
|
|
const mockGeocoder = new GeocodingService();
|
|
(mockGeocoder.geocode as jest.Mock).mockResolvedValue({
|
|
street: 'Av. Providencia',
|
|
neighborhood: 'Providencia',
|
|
municipality: 'Guadalajara',
|
|
state: 'Jalisco',
|
|
postalCode: '44630',
|
|
coordinates: { lat: 20.6736, lng: -103.3927 },
|
|
confidence: 85,
|
|
});
|
|
|
|
normalizer = new PropertyNormalizer(mockGeocoder);
|
|
});
|
|
|
|
describe('parsePrice', () => {
|
|
it('should parse MXN price correctly', async () => {
|
|
const raw = {
|
|
source: 'test',
|
|
sourceId: '123',
|
|
sourceUrl: 'http://test.com/123',
|
|
scrapedAt: new Date(),
|
|
precio: '$4,500,000 MXN',
|
|
};
|
|
|
|
const result = await normalizer.normalize(raw);
|
|
|
|
expect(result.price).toBe(4500000);
|
|
expect(result.currency).toBe('MXN');
|
|
});
|
|
|
|
it('should parse USD price correctly', async () => {
|
|
const raw = {
|
|
source: 'test',
|
|
sourceId: '124',
|
|
sourceUrl: 'http://test.com/124',
|
|
scrapedAt: new Date(),
|
|
precio: '$350,000 USD',
|
|
};
|
|
|
|
const result = await normalizer.normalize(raw);
|
|
|
|
expect(result.price).toBe(350000);
|
|
expect(result.currency).toBe('USD');
|
|
});
|
|
});
|
|
|
|
describe('parseAreas', () => {
|
|
it('should parse both land and constructed areas', async () => {
|
|
const raw = {
|
|
source: 'test',
|
|
sourceId: '125',
|
|
sourceUrl: 'http://test.com/125',
|
|
scrapedAt: new Date(),
|
|
superficie: '180 m2 construccion, 250 m2 terreno',
|
|
};
|
|
|
|
const result = await normalizer.normalize(raw);
|
|
|
|
expect(result.constructedArea).toBe(180);
|
|
expect(result.landArea).toBe(250);
|
|
});
|
|
});
|
|
|
|
describe('detectPropertyType', () => {
|
|
it('should detect departamento', async () => {
|
|
const raw = {
|
|
source: 'test',
|
|
sourceId: '126',
|
|
sourceUrl: 'http://test.com/126',
|
|
scrapedAt: new Date(),
|
|
titulo: 'Hermoso departamento en Providencia',
|
|
};
|
|
|
|
const result = await normalizer.normalize(raw);
|
|
|
|
expect(result.propertyType).toBe('departamento');
|
|
});
|
|
});
|
|
});
|
|
```
|
|
|
|
---
|
|
|
|
## 10. Metricas y Monitoreo
|
|
|
|
```typescript
|
|
// Metricas del pipeline ETL
|
|
export const etlMetrics = {
|
|
// Contadores
|
|
properties_extracted_total: new Counter({
|
|
name: 'etl_properties_extracted_total',
|
|
help: 'Total properties extracted',
|
|
labelNames: ['source'],
|
|
}),
|
|
|
|
properties_normalized_total: new Counter({
|
|
name: 'etl_properties_normalized_total',
|
|
help: 'Total properties normalized',
|
|
labelNames: ['source', 'property_type'],
|
|
}),
|
|
|
|
properties_loaded_total: new Counter({
|
|
name: 'etl_properties_loaded_total',
|
|
help: 'Total properties loaded',
|
|
labelNames: ['action'], // inserted, updated, duplicate
|
|
}),
|
|
|
|
geocoding_requests_total: new Counter({
|
|
name: 'etl_geocoding_requests_total',
|
|
help: 'Total geocoding requests',
|
|
labelNames: ['status'], // success, error, cache_hit
|
|
}),
|
|
|
|
// Histogramas
|
|
normalization_duration_seconds: new Histogram({
|
|
name: 'etl_normalization_duration_seconds',
|
|
help: 'Time to normalize a property',
|
|
buckets: [0.01, 0.05, 0.1, 0.5, 1, 2, 5],
|
|
}),
|
|
|
|
data_quality_score: new Histogram({
|
|
name: 'etl_data_quality_score',
|
|
help: 'Data quality scores',
|
|
buckets: [20, 40, 60, 80, 100],
|
|
}),
|
|
};
|
|
```
|
|
|
|
---
|
|
|
|
**Siguiente:** [ET-IA-007-proxies.md](./ET-IA-007-proxies.md)
|