7d299554bf
Nuevos modulos: - services/ai.ts: cliente IA provider-agnostico (OpenRouter, MiniMax) - services/db.ts: Dexie core con tabla settings + project_docs - services/storage.ts: Cache-Aside + Write-Through (L1 Map → L2 Dexie → L3 localStorage) - services/parse-transcription.ts: parser .docx/.vtt/.txt/.md - services/session-analyzer.ts: extraccion IA de sesiones (resumen, tareas, decisiones) - services/project-doc.ts: documento maestro MD (Bloque 1 resumen + Bloque 2 sesiones) - stores/settings.ts: proveedores IA, modelos, API keys separadas por provider - stores/transcriptions.ts: pipeline upload → analyze → create HU en KAPPA - views/SettingsView.vue: configuracion IA (OpenRouter, MiniMax, OpenCode bridge) - views/TranscriptionsView.vue: subida multiple + analisis sesion + visor MD + calendario - components/AiProjectChat.vue: chat contextual por proyecto con selector de modelo Cambios en existentes: - stores/auth.ts, kappa-api.ts, upload-hu.ts: migrados a storage service (Dexie + localStorage) - stores/projects.ts, workitems.ts: kappa_last_project via storage - DashboardView.vue: descripcion reemplazada por AiProjectChat - NewDashboardView.vue: tabs transcriptions + settings + navigate-settings events - NavMain.vue: items Transcripciones + Configuracion - SiteHeader.vue: labels tabs + language via storage - LoginView.vue: remember_email via storage - i18n: +80 keys español/ingles - vite.config.ts: proxy CORS para MiniMax - package.json: +mammoth.js
70 lines
1.8 KiB
TypeScript
70 lines
1.8 KiB
TypeScript
import * as mammoth from 'mammoth'
|
|
|
|
export type TranscriptionFileType = 'docx' | 'vtt' | 'txt' | 'md'
|
|
|
|
export interface ParsedTranscription {
|
|
fileName: string
|
|
fileType: TranscriptionFileType
|
|
text: string
|
|
size: number
|
|
}
|
|
|
|
const VTT_HEADER_RE = /^WEBVTT\s/mi
|
|
const VTT_TIMING_RE = /^\d{2}:\d{2}:\d{2}\.\d{3}\s*-->\s*\d{2}:\d{2}:\d{2}\.\d{3}/m
|
|
const VTT_CUE_RE = /\d{2}:\d{2}:\d{2}\.\d{3}\s*-->\s*\d{2}:\d{2}:\d{2}\.\d{3}/g
|
|
|
|
function isVTT(text: string): boolean {
|
|
return VTT_HEADER_RE.test(text) || VTT_TIMING_RE.test(text)
|
|
}
|
|
|
|
function parseVtt(raw: string): string {
|
|
return raw
|
|
.replace(VTT_HEADER_RE, '')
|
|
.replace(/Kind:.*\n?/gi, '')
|
|
.replace(/Language:.*\n?/gi, '')
|
|
.split(/\r?\n/)
|
|
.filter(line => {
|
|
const trimmed = line.trim()
|
|
if (!trimmed) return false
|
|
if (/^\d+$/.test(trimmed)) return false
|
|
if (VTT_TIMING_RE.test(trimmed)) return false
|
|
return true
|
|
})
|
|
.join(' ')
|
|
.replace(/\s+/g, ' ')
|
|
.trim()
|
|
}
|
|
|
|
function detectType(fileName: string): TranscriptionFileType {
|
|
const ext = fileName.split('.').pop()?.toLowerCase()
|
|
if (ext === 'docx') return 'docx'
|
|
if (ext === 'vtt') return 'vtt'
|
|
if (ext === 'md') return 'md'
|
|
return 'txt'
|
|
}
|
|
|
|
export async function parseFile(file: File): Promise<ParsedTranscription> {
|
|
const fileName = file.name
|
|
const fileType = detectType(fileName)
|
|
const size = file.size
|
|
|
|
console.log(`[Alpha] Parsing file: ${fileName} (${fileType}, ${size} bytes)`)
|
|
|
|
let text: string
|
|
|
|
if (fileType === 'docx') {
|
|
const arrayBuffer = await file.arrayBuffer()
|
|
const result = await mammoth.extractRawText({ arrayBuffer })
|
|
text = result.value.trim()
|
|
} else {
|
|
const raw = await file.text()
|
|
if (fileType === 'vtt' || isVTT(raw)) {
|
|
text = parseVtt(raw)
|
|
} else {
|
|
text = raw.trim()
|
|
}
|
|
}
|
|
|
|
return { fileName, fileType, text, size }
|
|
}
|