package main import ( "bufio" "bytes" "fmt" "os" "os/exec" "path/filepath" "strconv" "strings" "time" "fyne.io/fyne/v2" "fyne.io/fyne/v2/canvas" "fyne.io/fyne/v2/container" "fyne.io/fyne/v2/layout" "fyne.io/fyne/v2/widget" "git.leaktechnologies.dev/stu/VideoTools/internal/ui" "git.leaktechnologies.dev/stu/VideoTools/internal/utils" ) const ( subtitleModeExternal = "External SRT" subtitleModeEmbed = "Embed Subtitle Track" subtitleModeBurn = "Burn In Subtitles" ) type subtitleCue struct { Start float64 End float64 Text string } func (s *appState) showSubtitlesView() { s.stopPreview() s.lastModule = s.active s.active = "subtitles" if s.subtitleOutputMode == "" { s.subtitleOutputMode = subtitleModeExternal } s.setContent(buildSubtitlesView(s)) } func buildSubtitlesView(state *appState) fyne.CanvasObject { subtitlesColor := moduleColor("subtitles") backBtn := widget.NewButton("< BACK", func() { state.showMainMenu() }) backBtn.Importance = widget.LowImportance queueBtn := widget.NewButton("View Queue", func() { state.showQueue() }) state.queueBtn = queueBtn state.updateQueueButtonLabel() topBar := ui.TintedBar(subtitlesColor, container.NewHBox(backBtn, layout.NewSpacer(), queueBtn)) bottomBar := moduleFooter(subtitlesColor, layout.NewSpacer(), state.statsBar) videoEntry := widget.NewEntry() videoEntry.SetPlaceHolder("Video file path (drag and drop works here)") videoEntry.SetText(state.subtitleVideoPath) videoEntry.OnChanged = func(val string) { state.subtitleVideoPath = strings.TrimSpace(val) } subtitleEntry := widget.NewEntry() subtitleEntry.SetPlaceHolder("Subtitle file path (.srt or .vtt)") subtitleEntry.SetText(state.subtitleFilePath) subtitleEntry.OnChanged = func(val string) { state.subtitleFilePath = strings.TrimSpace(val) } modelEntry := widget.NewEntry() modelEntry.SetPlaceHolder("Whisper model path (ggml-*.bin)") modelEntry.SetText(state.subtitleModelPath) modelEntry.OnChanged = func(val string) { state.subtitleModelPath = strings.TrimSpace(val) } backendEntry := widget.NewEntry() backendEntry.SetPlaceHolder("Whisper backend path (whisper.cpp/main)") backendEntry.SetText(state.subtitleBackendPath) backendEntry.OnChanged = func(val string) { state.subtitleBackendPath = strings.TrimSpace(val) } outputEntry := widget.NewEntry() outputEntry.SetPlaceHolder("Output video path (for embed/burn)") outputEntry.SetText(state.subtitleBurnOutput) outputEntry.OnChanged = func(val string) { state.subtitleBurnOutput = strings.TrimSpace(val) } statusLabel := widget.NewLabel("") statusLabel.Wrapping = fyne.TextWrapWord state.subtitleStatusLabel = statusLabel if state.subtitleStatus != "" { statusLabel.SetText(state.subtitleStatus) } var rebuildCues func() cueList := container.NewVBox() listScroll := container.NewVScroll(cueList) var emptyOverlay *fyne.Container rebuildCues = func() { cueList.Objects = nil if len(state.subtitleCues) == 0 { if emptyOverlay != nil { emptyOverlay.Show() } cueList.Refresh() return } if emptyOverlay != nil { emptyOverlay.Hide() } for i, cue := range state.subtitleCues { idx := i startEntry := widget.NewEntry() startEntry.SetPlaceHolder("00:00:00,000") startEntry.SetText(formatSRTTimestamp(cue.Start)) startEntry.OnChanged = func(val string) { if seconds, ok := parseSRTTimestamp(val); ok { state.subtitleCues[idx].Start = seconds } } endEntry := widget.NewEntry() endEntry.SetPlaceHolder("00:00:00,000") endEntry.SetText(formatSRTTimestamp(cue.End)) endEntry.OnChanged = func(val string) { if seconds, ok := parseSRTTimestamp(val); ok { state.subtitleCues[idx].End = seconds } } textEntry := widget.NewMultiLineEntry() textEntry.SetText(cue.Text) textEntry.Wrapping = fyne.TextWrapWord textEntry.OnChanged = func(val string) { state.subtitleCues[idx].Text = val } removeBtn := widget.NewButton("Remove", func() { state.subtitleCues = append(state.subtitleCues[:idx], state.subtitleCues[idx+1:]...) rebuildCues() }) removeBtn.Importance = widget.MediumImportance timesCol := container.NewVBox( widget.NewLabel("Start"), startEntry, widget.NewLabel("End"), endEntry, ) row := container.NewBorder(nil, nil, timesCol, removeBtn, textEntry) cardBg := canvas.NewRectangle(utils.MustHex("#171C2A")) cardBg.CornerRadius = 6 cardBg.SetMinSize(fyne.NewSize(0, startEntry.MinSize().Height+endEntry.MinSize().Height+textEntry.MinSize().Height+24)) cueList.Add(container.NewPadded(container.NewMax(cardBg, row))) } cueList.Refresh() } state.subtitleCuesRefresh = rebuildCues handleDrop := func(items []fyne.URI) { var videoPath string var subtitlePath string for _, uri := range items { if uri.Scheme() != "file" { continue } path := uri.Path() if videoPath == "" && state.isVideoFile(path) { videoPath = path } if subtitlePath == "" && state.isSubtitleFile(path) { subtitlePath = path } } if videoPath != "" { state.subtitleVideoPath = videoPath videoEntry.SetText(videoPath) } if subtitlePath != "" { subtitleEntry.SetText(subtitlePath) if err := state.loadSubtitleFile(subtitlePath); err != nil { state.setSubtitleStatus(err.Error()) } rebuildCues() } } emptyLabel := widget.NewLabel("Drag and drop subtitle files here\nor generate subtitles from speech") emptyLabel.Alignment = fyne.TextAlignCenter emptyOverlay = container.NewCenter(emptyLabel) listArea := container.NewMax(ui.NewDroppable(listScroll, handleDrop), emptyOverlay) addCueBtn := widget.NewButton("Add Cue", func() { start := 0.0 if len(state.subtitleCues) > 0 { start = state.subtitleCues[len(state.subtitleCues)-1].End } state.subtitleCues = append(state.subtitleCues, subtitleCue{ Start: start, End: start + 2.0, Text: "", }) rebuildCues() }) addCueBtn.Importance = widget.HighImportance clearBtn := widget.NewButton("Clear All", func() { state.subtitleCues = nil rebuildCues() }) loadBtn := widget.NewButton("Load Subtitles", func() { if err := state.loadSubtitleFile(state.subtitleFilePath); err != nil { state.setSubtitleStatus(err.Error()) return } rebuildCues() }) saveBtn := widget.NewButton("Save Subtitles", func() { path := strings.TrimSpace(state.subtitleFilePath) if path == "" { path = defaultSubtitlePath(state.subtitleVideoPath) state.subtitleFilePath = path subtitleEntry.SetText(path) } if err := state.saveSubtitleFile(path); err != nil { state.setSubtitleStatus(err.Error()) return } state.setSubtitleStatus(fmt.Sprintf("Saved subtitles to %s", filepath.Base(path))) }) generateBtn := widget.NewButton("Generate From Speech (Offline)", func() { state.generateSubtitlesFromSpeech() rebuildCues() }) generateBtn.Importance = widget.HighImportance outputModeSelect := widget.NewSelect( []string{subtitleModeExternal, subtitleModeEmbed, subtitleModeBurn}, func(val string) { state.subtitleOutputMode = val }, ) outputModeSelect.SetSelected(state.subtitleOutputMode) applyBtn := widget.NewButton("Create Output", func() { state.applySubtitlesToVideo() }) applyBtn.Importance = widget.HighImportance left := container.NewVBox( widget.NewLabelWithStyle("Sources", fyne.TextAlignLeading, fyne.TextStyle{Bold: true}), ui.NewDroppable(videoEntry, handleDrop), ui.NewDroppable(subtitleEntry, handleDrop), widget.NewLabelWithStyle("Offline Speech-to-Text (whisper.cpp)", fyne.TextAlignLeading, fyne.TextStyle{Bold: true}), backendEntry, modelEntry, container.NewHBox(generateBtn), widget.NewLabelWithStyle("Output", fyne.TextAlignLeading, fyne.TextStyle{Bold: true}), outputModeSelect, outputEntry, applyBtn, widget.NewLabelWithStyle("Status", fyne.TextAlignLeading, fyne.TextStyle{Bold: true}), statusLabel, ) right := container.NewBorder( container.NewVBox( widget.NewLabelWithStyle("Subtitle Cues", fyne.TextAlignLeading, fyne.TextStyle{Bold: true}), container.NewHBox(addCueBtn, clearBtn, loadBtn, saveBtn), ), nil, nil, nil, listArea, ) rebuildCues() content := container.NewGridWithColumns(2, left, right) return container.NewBorder(topBar, bottomBar, nil, nil, content) } func (s *appState) setSubtitleStatus(msg string) { s.subtitleStatus = msg if s.subtitleStatusLabel != nil { s.subtitleStatusLabel.SetText(msg) } } func (s *appState) setSubtitleStatusAsync(msg string) { app := fyne.CurrentApp() if app == nil || app.Driver() == nil { s.setSubtitleStatus(msg) return } app.Driver().DoFromGoroutine(func() { s.setSubtitleStatus(msg) }, false) } func (s *appState) handleSubtitlesModuleDrop(items []fyne.URI) { var videoPath string var subtitlePath string for _, uri := range items { if uri.Scheme() != "file" { continue } path := uri.Path() if videoPath == "" && s.isVideoFile(path) { videoPath = path } if subtitlePath == "" && s.isSubtitleFile(path) { subtitlePath = path } } if videoPath == "" && subtitlePath == "" { return } if videoPath != "" { s.subtitleVideoPath = videoPath } if subtitlePath != "" { if err := s.loadSubtitleFile(subtitlePath); err != nil { s.setSubtitleStatus(err.Error()) } } s.showSubtitlesView() } func (s *appState) loadSubtitleFile(path string) error { path = strings.TrimSpace(path) if path == "" { return fmt.Errorf("subtitle path is empty") } data, err := os.ReadFile(path) if err != nil { return fmt.Errorf("failed to read subtitles: %w", err) } cues, err := parseSubtitlePayload(path, string(data)) if err != nil { return err } s.subtitleFilePath = path s.subtitleCues = cues s.setSubtitleStatus(fmt.Sprintf("Loaded %d subtitle cues", len(cues))) return nil } func (s *appState) saveSubtitleFile(path string) error { path = strings.TrimSpace(path) if path == "" { return fmt.Errorf("subtitle output path is empty") } if len(s.subtitleCues) == 0 { return fmt.Errorf("no subtitle cues to save") } payload := formatSRT(s.subtitleCues) if err := os.WriteFile(path, []byte(payload), 0644); err != nil { return fmt.Errorf("failed to write subtitles: %w", err) } return nil } func (s *appState) generateSubtitlesFromSpeech() { videoPath := strings.TrimSpace(s.subtitleVideoPath) if videoPath == "" { s.setSubtitleStatus("Set a video file to generate subtitles.") return } if _, err := os.Stat(videoPath); err != nil { s.setSubtitleStatus("Video file not found.") return } modelPath := strings.TrimSpace(s.subtitleModelPath) if modelPath == "" { s.setSubtitleStatus("Set a whisper model path.") return } backendPath := strings.TrimSpace(s.subtitleBackendPath) if backendPath == "" { if detected := detectWhisperBackend(); detected != "" { backendPath = detected s.subtitleBackendPath = detected } } if backendPath == "" { s.setSubtitleStatus("Whisper backend not found. Set the backend path.") return } outputPath := strings.TrimSpace(s.subtitleFilePath) if outputPath == "" { outputPath = defaultSubtitlePath(videoPath) s.subtitleFilePath = outputPath } baseOutput := strings.TrimSuffix(outputPath, filepath.Ext(outputPath)) go func() { tmpWav := filepath.Join(os.TempDir(), fmt.Sprintf("vt-stt-%d.wav", time.Now().UnixNano())) defer os.Remove(tmpWav) s.setSubtitleStatusAsync("Extracting audio for speech-to-text...") if err := runFFmpeg([]string{ "-y", "-i", videoPath, "-vn", "-ac", "1", "-ar", "16000", "-f", "wav", tmpWav, }); err != nil { s.setSubtitleStatusAsync(fmt.Sprintf("Audio extraction failed: %v", err)) return } s.setSubtitleStatusAsync("Running offline speech-to-text...") if err := runWhisper(backendPath, modelPath, tmpWav, baseOutput); err != nil { s.setSubtitleStatusAsync(fmt.Sprintf("Speech-to-text failed: %v", err)) return } finalPath := baseOutput + ".srt" if err := s.loadSubtitleFile(finalPath); err != nil { s.setSubtitleStatusAsync(err.Error()) return } s.setSubtitleStatusAsync(fmt.Sprintf("Generated subtitles: %s", filepath.Base(finalPath))) app := fyne.CurrentApp() if app != nil && app.Driver() != nil { app.Driver().DoFromGoroutine(func() { if s.active == "subtitles" { s.showSubtitlesView() } }, false) } }() } func (s *appState) applySubtitlesToVideo() { videoPath := strings.TrimSpace(s.subtitleVideoPath) if videoPath == "" { s.setSubtitleStatus("Set a video file before creating output.") return } if _, err := os.Stat(videoPath); err != nil { s.setSubtitleStatus("Video file not found.") return } mode := s.subtitleOutputMode if mode == "" { mode = subtitleModeExternal } subPath := strings.TrimSpace(s.subtitleFilePath) if subPath == "" { subPath = defaultSubtitlePath(videoPath) s.subtitleFilePath = subPath } if err := s.saveSubtitleFile(subPath); err != nil { s.setSubtitleStatus(err.Error()) return } if mode == subtitleModeExternal { s.setSubtitleStatus(fmt.Sprintf("Saved subtitles to %s", filepath.Base(subPath))) return } outputPath := strings.TrimSpace(s.subtitleBurnOutput) if outputPath == "" { outputPath = defaultSubtitleOutputPath(videoPath) s.subtitleBurnOutput = outputPath } go func() { s.setSubtitleStatusAsync("Creating output with subtitles...") var args []string switch mode { case subtitleModeEmbed: subCodec := subtitleCodecForOutput(outputPath) args = []string{ "-y", "-i", videoPath, "-i", subPath, "-map", "0", "-map", "1", "-c", "copy", "-c:s", subCodec, outputPath, } case subtitleModeBurn: filterPath := escapeFFmpegFilterPath(subPath) args = []string{ "-y", "-i", videoPath, "-vf", fmt.Sprintf("subtitles=%s", filterPath), "-c:v", "libx264", "-crf", "18", "-preset", "fast", "-c:a", "copy", outputPath, } } if err := runFFmpeg(args); err != nil { s.setSubtitleStatusAsync(fmt.Sprintf("Subtitle output failed: %v", err)) return } s.setSubtitleStatusAsync(fmt.Sprintf("Output created: %s", filepath.Base(outputPath))) }() } func parseSubtitlePayload(path, content string) ([]subtitleCue, error) { ext := strings.ToLower(filepath.Ext(path)) switch ext { case ".vtt": content = stripVTTHeader(content) return parseSRT(content), nil case ".srt": return parseSRT(content), nil case ".ass", ".ssa": return nil, fmt.Errorf("ASS/SSA subtitles are not supported yet") default: return nil, fmt.Errorf("unsupported subtitle format") } } func stripVTTHeader(content string) string { content = strings.ReplaceAll(content, "\r\n", "\n") lines := strings.Split(content, "\n") var kept []string for i, line := range lines { if i == 0 && strings.HasPrefix(strings.TrimSpace(line), "WEBVTT") { continue } if strings.HasPrefix(strings.TrimSpace(line), "NOTE") { continue } kept = append(kept, line) } return strings.Join(kept, "\n") } func parseSRT(content string) []subtitleCue { content = strings.ReplaceAll(content, "\r\n", "\n") scanner := bufio.NewScanner(strings.NewReader(content)) var cues []subtitleCue var inCue bool var start float64 var end float64 var lines []string flush := func() { if inCue && len(lines) > 0 { cues = append(cues, subtitleCue{ Start: start, End: end, Text: strings.Join(lines, "\n"), }) } inCue = false lines = nil } for scanner.Scan() { line := strings.TrimSpace(scanner.Text()) if line == "" { flush() continue } if strings.Contains(line, "-->") { parts := strings.Split(line, "-->") if len(parts) >= 2 { if s, ok := parseSRTTimestamp(strings.TrimSpace(parts[0])); ok { if e, ok := parseSRTTimestamp(strings.TrimSpace(parts[1])); ok { start = s end = e inCue = true lines = nil continue } } } } if !inCue { continue } lines = append(lines, line) } flush() return cues } func parseSRTTimestamp(value string) (float64, bool) { value = strings.TrimSpace(value) if value == "" { return 0, false } value = strings.ReplaceAll(value, ",", ".") parts := strings.Split(value, ":") if len(parts) != 3 { return 0, false } hours, err := strconv.Atoi(parts[0]) if err != nil { return 0, false } minutes, err := strconv.Atoi(parts[1]) if err != nil { return 0, false } secParts := strings.SplitN(parts[2], ".", 2) seconds, err := strconv.Atoi(secParts[0]) if err != nil { return 0, false } ms := 0 if len(secParts) == 2 { msStr := secParts[1] if len(msStr) > 3 { msStr = msStr[:3] } for len(msStr) < 3 { msStr += "0" } ms, err = strconv.Atoi(msStr) if err != nil { return 0, false } } totalMs := ((hours*60+minutes)*60+seconds)*1000 + ms return float64(totalMs) / 1000.0, true } func formatSRTTimestamp(seconds float64) string { if seconds < 0 { seconds = 0 } totalMs := int64(seconds*1000 + 0.5) hours := totalMs / 3600000 minutes := (totalMs % 3600000) / 60000 secs := (totalMs % 60000) / 1000 ms := totalMs % 1000 return fmt.Sprintf("%02d:%02d:%02d,%03d", hours, minutes, secs, ms) } func formatSRT(cues []subtitleCue) string { var b strings.Builder for i, cue := range cues { b.WriteString(fmt.Sprintf("%d\n", i+1)) b.WriteString(fmt.Sprintf("%s --> %s\n", formatSRTTimestamp(cue.Start), formatSRTTimestamp(cue.End))) b.WriteString(strings.TrimSpace(cue.Text)) b.WriteString("\n\n") } return b.String() } func defaultSubtitlePath(videoPath string) string { if videoPath == "" { return "" } dir := filepath.Dir(videoPath) base := strings.TrimSuffix(filepath.Base(videoPath), filepath.Ext(videoPath)) return filepath.Join(dir, base+".srt") } func defaultSubtitleOutputPath(videoPath string) string { if videoPath == "" { return "" } dir := filepath.Dir(videoPath) base := strings.TrimSuffix(filepath.Base(videoPath), filepath.Ext(videoPath)) ext := filepath.Ext(videoPath) if ext == "" { ext = ".mp4" } return filepath.Join(dir, base+"-subtitled"+ext) } func subtitleCodecForOutput(outputPath string) string { ext := strings.ToLower(filepath.Ext(outputPath)) switch ext { case ".mp4", ".m4v", ".mov": return "mov_text" default: return "srt" } } func escapeFFmpegFilterPath(path string) string { escaped := strings.ReplaceAll(path, "\\", "\\\\") escaped = strings.ReplaceAll(escaped, ":", "\\:") escaped = strings.ReplaceAll(escaped, "'", "\\'") return escaped } func detectWhisperBackend() string { candidates := []string{"whisper.cpp", "whisper", "main", "main.exe", "whisper.exe"} for _, candidate := range candidates { if found, err := exec.LookPath(candidate); err == nil { return found } } return "" } func runWhisper(binaryPath, modelPath, inputPath, outputBase string) error { args := []string{ "-m", modelPath, "-f", inputPath, "-of", outputBase, "-osrt", } cmd := exec.Command(binaryPath, args...) utils.ApplyNoWindow(cmd) var stderr bytes.Buffer cmd.Stderr = &stderr if err := cmd.Run(); err != nil { return fmt.Errorf("whisper failed: %w (%s)", err, strings.TrimSpace(stderr.String())) } return nil } func runFFmpeg(args []string) error { cmd := exec.Command(platformConfig.FFmpegPath, args...) utils.ApplyNoWindow(cmd) var stderr bytes.Buffer cmd.Stderr = &stderr if err := cmd.Run(); err != nil { return fmt.Errorf("ffmpeg failed: %w (%s)", err, strings.TrimSpace(stderr.String())) } return nil }