Files
Alpha/src/services/parse-transcription.ts
T
ricardo 7d299554bf K-10 pipeline transcripciones + settings IA + cache-aside + session doc
Nuevos modulos:
- services/ai.ts: cliente IA provider-agnostico (OpenRouter, MiniMax)
- services/db.ts: Dexie core con tabla settings + project_docs
- services/storage.ts: Cache-Aside + Write-Through (L1 Map → L2 Dexie → L3 localStorage)
- services/parse-transcription.ts: parser .docx/.vtt/.txt/.md
- services/session-analyzer.ts: extraccion IA de sesiones (resumen, tareas, decisiones)
- services/project-doc.ts: documento maestro MD (Bloque 1 resumen + Bloque 2 sesiones)
- stores/settings.ts: proveedores IA, modelos, API keys separadas por provider
- stores/transcriptions.ts: pipeline upload → analyze → create HU en KAPPA
- views/SettingsView.vue: configuracion IA (OpenRouter, MiniMax, OpenCode bridge)
- views/TranscriptionsView.vue: subida multiple + analisis sesion + visor MD + calendario
- components/AiProjectChat.vue: chat contextual por proyecto con selector de modelo

Cambios en existentes:
- stores/auth.ts, kappa-api.ts, upload-hu.ts: migrados a storage service (Dexie + localStorage)
- stores/projects.ts, workitems.ts: kappa_last_project via storage
- DashboardView.vue: descripcion reemplazada por AiProjectChat
- NewDashboardView.vue: tabs transcriptions + settings + navigate-settings events
- NavMain.vue: items Transcripciones + Configuracion
- SiteHeader.vue: labels tabs + language via storage
- LoginView.vue: remember_email via storage
- i18n: +80 keys español/ingles
- vite.config.ts: proxy CORS para MiniMax
- package.json: +mammoth.js
2026-05-28 12:42:30 -05:00

70 lines
1.8 KiB
TypeScript

import * as mammoth from 'mammoth'
export type TranscriptionFileType = 'docx' | 'vtt' | 'txt' | 'md'
export interface ParsedTranscription {
fileName: string
fileType: TranscriptionFileType
text: string
size: number
}
const VTT_HEADER_RE = /^WEBVTT\s/mi
const VTT_TIMING_RE = /^\d{2}:\d{2}:\d{2}\.\d{3}\s*-->\s*\d{2}:\d{2}:\d{2}\.\d{3}/m
const VTT_CUE_RE = /\d{2}:\d{2}:\d{2}\.\d{3}\s*-->\s*\d{2}:\d{2}:\d{2}\.\d{3}/g
function isVTT(text: string): boolean {
return VTT_HEADER_RE.test(text) || VTT_TIMING_RE.test(text)
}
function parseVtt(raw: string): string {
return raw
.replace(VTT_HEADER_RE, '')
.replace(/Kind:.*\n?/gi, '')
.replace(/Language:.*\n?/gi, '')
.split(/\r?\n/)
.filter(line => {
const trimmed = line.trim()
if (!trimmed) return false
if (/^\d+$/.test(trimmed)) return false
if (VTT_TIMING_RE.test(trimmed)) return false
return true
})
.join(' ')
.replace(/\s+/g, ' ')
.trim()
}
function detectType(fileName: string): TranscriptionFileType {
const ext = fileName.split('.').pop()?.toLowerCase()
if (ext === 'docx') return 'docx'
if (ext === 'vtt') return 'vtt'
if (ext === 'md') return 'md'
return 'txt'
}
export async function parseFile(file: File): Promise<ParsedTranscription> {
const fileName = file.name
const fileType = detectType(fileName)
const size = file.size
console.log(`[Alpha] Parsing file: ${fileName} (${fileType}, ${size} bytes)`)
let text: string
if (fileType === 'docx') {
const arrayBuffer = await file.arrayBuffer()
const result = await mammoth.extractRawText({ arrayBuffer })
text = result.value.trim()
} else {
const raw = await file.text()
if (fileType === 'vtt' || isVTT(raw)) {
text = parseVtt(raw)
} else {
text = raw.trim()
}
}
return { fileName, fileType, text, size }
}