aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSam Anthony <sam@samanthony.xyz>2026-02-07 20:54:12 -0500
committerSam Anthony <sam@samanthony.xyz>2026-02-07 20:54:12 -0500
commit4e1f21f9d882d392c8636a1ce095f9ec145a74ab (patch)
treeadddd3a9baf951ce19275c525273128571d969f8
parented48f10d093b16919217bcf70f02393a78b0e567 (diff)
downloadbin-master.zip
transcription with openai whisperHEADmaster
-rwxr-xr-xtranscribe22
-rwxr-xr-xtranscribeall48
2 files changed, 70 insertions, 0 deletions
diff --git a/transcribe b/transcribe
new file mode 100755
index 0000000..289bcd2
--- /dev/null
+++ b/transcribe
@@ -0,0 +1,22 @@
+#!/bin/sh
+# Transcribe an audio file using OpenAI/GGML Whisper.cpp.
+
+set -e
+
+model="${XDG_DATA_HOME}/whisper/models/ggml-large-v3-turbo.bin"
+transcribe()
+{
+ echo "Transcribing '$1'" >&2
+ whisper-cli --language English -m "$model" --threads $(nproc) -osrt "$1"
+}
+
+ext="${1##*.}"
+if [ ! "$ext" = "wav" ]; then
+ wav=".${1}.wav"
+ echo "Converting to wav '$1' -> '$wav'" >&2
+ ffmpeg -i "$1" -ar 16k -ac 1 -c:a pcm_s16le "$wav"
+ transcribe "$wav"
+ rm "$wav"
+else
+ transcribe "$1"
+fi
diff --git a/transcribeall b/transcribeall
new file mode 100755
index 0000000..a2de999
--- /dev/null
+++ b/transcribeall
@@ -0,0 +1,48 @@
+#!/bin/sh
+
+set -e
+
+usage="$0 -o <output_dir> <file>..."
+
+# Parse flags
+outdir=""
+while getopts "o:" opt; do
+ case "$opt" in
+ o)
+ outdir="${OPTARG%/}";;
+ *)
+ echo Usage: $usage >&2
+ exit 1;;
+ esac
+done
+shift $((OPTIND - 1))
+if [ -z "$outdir" ]; then
+ echo Usage: $usage >&2
+ exit 1
+fi
+
+# Transcribe each file
+while [ $# -gt 0 ]; do
+ in="$1"
+ base="${1%.*}"
+ out="${outdir}/${base}.srt"
+ wip="${outdir}/.${base}.srt.wip" # work-in-progress
+ shift
+
+ # Check work-in-progress from previous run
+ if [ -e "$wip" ]; then
+ echo "Warning: '$out' in-progress; overwriting." >&2
+ rm -f "$out" "$wip" # remove partial output and restart from scratch
+ fi
+
+ # Transcribe if not done already
+ if [ ! -e "$out" ]; then
+ touch "$wip"
+ echo "'$in' -> '$out'" >&2
+ transcribe "$in" >"$out"
+ rm "$wip"
+ else
+ echo "'$out' already exists; skipping." >&2
+ fi
+done
+echo done >&2