diff options
| author | Sam Anthony <sam@samanthony.xyz> | 2026-02-07 20:54:12 -0500 |
|---|---|---|
| committer | Sam Anthony <sam@samanthony.xyz> | 2026-02-07 20:54:12 -0500 |
| commit | 4e1f21f9d882d392c8636a1ce095f9ec145a74ab (patch) | |
| tree | adddd3a9baf951ce19275c525273128571d969f8 | |
| parent | ed48f10d093b16919217bcf70f02393a78b0e567 (diff) | |
| download | bin-master.zip | |
| -rwxr-xr-x | transcribe | 22 | ||||
| -rwxr-xr-x | transcribeall | 48 |
2 files changed, 70 insertions, 0 deletions
diff --git a/transcribe b/transcribe new file mode 100755 index 0000000..289bcd2 --- /dev/null +++ b/transcribe @@ -0,0 +1,22 @@ +#!/bin/sh +# Transcribe an audio file using OpenAI/GGML Whisper.cpp. + +set -e + +model="${XDG_DATA_HOME}/whisper/models/ggml-large-v3-turbo.bin" +transcribe() +{ + echo "Transcribing '$1'" >&2 + whisper-cli --language English -m "$model" --threads $(nproc) -osrt "$1" +} + +ext="${1##*.}" +if [ ! "$ext" = "wav" ]; then + wav=".${1}.wav" + echo "Converting to wav '$1' -> '$wav'" >&2 + ffmpeg -i "$1" -ar 16k -ac 1 -c:a pcm_s16le "$wav" + transcribe "$wav" + rm "$wav" +else + transcribe "$1" +fi diff --git a/transcribeall b/transcribeall new file mode 100755 index 0000000..a2de999 --- /dev/null +++ b/transcribeall @@ -0,0 +1,48 @@ +#!/bin/sh + +set -e + +usage="$0 -o <output_dir> <file>..." + +# Parse flags +outdir="" +while getopts "o:" opt; do + case "$opt" in + o) + outdir="${OPTARG%/}";; + *) + echo Usage: $usage >&2 + exit 1;; + esac +done +shift $((OPTIND - 1)) +if [ -z "$outdir" ]; then + echo Usage: $usage >&2 + exit 1 +fi + +# Transcribe each file +while [ $# -gt 0 ]; do + in="$1" + base="${1%.*}" + out="${outdir}/${base}.srt" + wip="${outdir}/.${base}.srt.wip" # work-in-progress + shift + + # Check work-in-progress from previous run + if [ -e "$wip" ]; then + echo "Warning: '$out' in-progress; overwriting." >&2 + rm -f "$out" "$wip" # remove partial output and restart from scratch + fi + + # Transcribe if not done already + if [ ! -e "$out" ]; then + touch "$wip" + echo "'$in' -> '$out'" >&2 + transcribe "$in" >"$out" + rm "$wip" + else + echo "'$out' already exists; skipping." >&2 + fi +done +echo done >&2 |