K-10 pipeline transcripciones + settings IA + cache-aside + session doc
Nuevos modulos: - services/ai.ts: cliente IA provider-agnostico (OpenRouter, MiniMax) - services/db.ts: Dexie core con tabla settings + project_docs - services/storage.ts: Cache-Aside + Write-Through (L1 Map → L2 Dexie → L3 localStorage) - services/parse-transcription.ts: parser .docx/.vtt/.txt/.md - services/session-analyzer.ts: extraccion IA de sesiones (resumen, tareas, decisiones) - services/project-doc.ts: documento maestro MD (Bloque 1 resumen + Bloque 2 sesiones) - stores/settings.ts: proveedores IA, modelos, API keys separadas por provider - stores/transcriptions.ts: pipeline upload → analyze → create HU en KAPPA - views/SettingsView.vue: configuracion IA (OpenRouter, MiniMax, OpenCode bridge) - views/TranscriptionsView.vue: subida multiple + analisis sesion + visor MD + calendario - components/AiProjectChat.vue: chat contextual por proyecto con selector de modelo Cambios en existentes: - stores/auth.ts, kappa-api.ts, upload-hu.ts: migrados a storage service (Dexie + localStorage) - stores/projects.ts, workitems.ts: kappa_last_project via storage - DashboardView.vue: descripcion reemplazada por AiProjectChat - NewDashboardView.vue: tabs transcriptions + settings + navigate-settings events - NavMain.vue: items Transcripciones + Configuracion - SiteHeader.vue: labels tabs + language via storage - LoginView.vue: remember_email via storage - i18n: +80 keys español/ingles - vite.config.ts: proxy CORS para MiniMax - package.json: +mammoth.js
This commit is contained in:
@@ -0,0 +1,69 @@
|
||||
import * as mammoth from 'mammoth'
|
||||
|
||||
export type TranscriptionFileType = 'docx' | 'vtt' | 'txt' | 'md'
|
||||
|
||||
export interface ParsedTranscription {
|
||||
fileName: string
|
||||
fileType: TranscriptionFileType
|
||||
text: string
|
||||
size: number
|
||||
}
|
||||
|
||||
const VTT_HEADER_RE = /^WEBVTT\s/mi
|
||||
const VTT_TIMING_RE = /^\d{2}:\d{2}:\d{2}\.\d{3}\s*-->\s*\d{2}:\d{2}:\d{2}\.\d{3}/m
|
||||
const VTT_CUE_RE = /\d{2}:\d{2}:\d{2}\.\d{3}\s*-->\s*\d{2}:\d{2}:\d{2}\.\d{3}/g
|
||||
|
||||
function isVTT(text: string): boolean {
|
||||
return VTT_HEADER_RE.test(text) || VTT_TIMING_RE.test(text)
|
||||
}
|
||||
|
||||
function parseVtt(raw: string): string {
|
||||
return raw
|
||||
.replace(VTT_HEADER_RE, '')
|
||||
.replace(/Kind:.*\n?/gi, '')
|
||||
.replace(/Language:.*\n?/gi, '')
|
||||
.split(/\r?\n/)
|
||||
.filter(line => {
|
||||
const trimmed = line.trim()
|
||||
if (!trimmed) return false
|
||||
if (/^\d+$/.test(trimmed)) return false
|
||||
if (VTT_TIMING_RE.test(trimmed)) return false
|
||||
return true
|
||||
})
|
||||
.join(' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim()
|
||||
}
|
||||
|
||||
function detectType(fileName: string): TranscriptionFileType {
|
||||
const ext = fileName.split('.').pop()?.toLowerCase()
|
||||
if (ext === 'docx') return 'docx'
|
||||
if (ext === 'vtt') return 'vtt'
|
||||
if (ext === 'md') return 'md'
|
||||
return 'txt'
|
||||
}
|
||||
|
||||
export async function parseFile(file: File): Promise<ParsedTranscription> {
|
||||
const fileName = file.name
|
||||
const fileType = detectType(fileName)
|
||||
const size = file.size
|
||||
|
||||
console.log(`[Alpha] Parsing file: ${fileName} (${fileType}, ${size} bytes)`)
|
||||
|
||||
let text: string
|
||||
|
||||
if (fileType === 'docx') {
|
||||
const arrayBuffer = await file.arrayBuffer()
|
||||
const result = await mammoth.extractRawText({ arrayBuffer })
|
||||
text = result.value.trim()
|
||||
} else {
|
||||
const raw = await file.text()
|
||||
if (fileType === 'vtt' || isVTT(raw)) {
|
||||
text = parseVtt(raw)
|
||||
} else {
|
||||
text = raw.trim()
|
||||
}
|
||||
}
|
||||
|
||||
return { fileName, fileType, text, size }
|
||||
}
|
||||
Reference in New Issue
Block a user