Spaces:
Running
Running
| # shellcheck disable=2086 | |
| # MIT License | |
| # Copyright (c) 2022 Daniils Petrovs | |
| # Copyright (c) 2023 Jennifer Capasso | |
| # Permission is hereby granted, free of charge, to any person obtaining a copy | |
| # of this software and associated documentation files (the "Software"), to deal | |
| # in the Software without restriction, including without limitation the rights | |
| # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
| # copies of the Software, and to permit persons to whom the Software is | |
| # furnished to do so, subject to the following conditions: | |
| # The above copyright notice and this permission notice shall be included in all | |
| # copies or substantial portions of the Software. | |
| # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
| # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
| # SOFTWARE. | |
| # Small shell script to more easily automatically download and transcribe live stream VODs. | |
| # This uses YT-DLP, ffmpeg and the CPP version of Whisper: https://github.com/ggerganov/whisper.cpp | |
| # Use `./examples/yt-wsp.sh help` to print help info. | |
| # | |
| # Sample usage: | |
| # | |
| # git clone https://github.com/ggerganov/whisper.cpp | |
| # cd whisper.cpp | |
| # make | |
| # ./examples/yt-wsp.sh https://www.youtube.com/watch?v=1234567890 | |
| # | |
| set -Eeuo pipefail | |
| # get script file location | |
| SCRIPT_PATH="$(realpath -e ${BASH_SOURCE[0]})"; | |
| SCRIPT_DIR="${SCRIPT_PATH%/*}" | |
| ################################################################################ | |
| # Documentation on downloading models can be found in the whisper.cpp repo: | |
| # https://github.com/ggerganov/whisper.cpp/#usage | |
| # | |
| # note: unless a multilingual model is specified, WHISPER_LANG will be ignored | |
| # and the video will be transcribed as if the audio were in the English language | |
| ################################################################################ | |
| MODEL_PATH="${MODEL_PATH:-${SCRIPT_DIR}/../models/ggml-base.en.bin}" | |
| ################################################################################ | |
| # Where to find the whisper.cpp executable. default to the examples directory | |
| # which holds this script in source control | |
| ################################################################################ | |
| WHISPER_EXECUTABLE="${WHISPER_EXECUTABLE:-${SCRIPT_DIR}/../main}"; | |
| # Set to desired language to be translated into english | |
| WHISPER_LANG="${WHISPER_LANG:-en}"; | |
| # Default to 4 threads (this was most performant on my 2020 M1 MBP) | |
| WHISPER_THREAD_COUNT="${WHISPER_THREAD_COUNT:-4}"; | |
| msg() { | |
| echo >&2 -e "${1-}" | |
| } | |
| cleanup() { | |
| local -r clean_me="${1}"; | |
| if [ -d "${clean_me}" ]; then | |
| msg "Cleaning up..."; | |
| rm -rf "${clean_me}"; | |
| else | |
| msg "'${clean_me}' does not appear to be a directory!"; | |
| exit 1; | |
| fi; | |
| } | |
| print_help() { | |
| echo "################################################################################" | |
| echo "Usage: ./examples/yt-wsp.sh <video_url>" | |
| echo "# See configurable env variables in the script; there are many!" | |
| echo "# This script will produce an MP4 muxed file in the working directory; it will" | |
| echo "# be named for the title and id of the video." | |
| echo "# passing in https://youtu.be/VYJtb2YXae8 produces a file named"; | |
| echo "# 'Why_we_all_need_subtitles_now-VYJtb2YXae8-res.mp4'" | |
| echo "# Requirements: ffmpeg yt-dlp whisper.cpp" | |
| echo "################################################################################" | |
| } | |
| check_requirements() { | |
| if ! command -v ffmpeg &>/dev/null; then | |
| echo "ffmpeg is required: https://ffmpeg.org"; | |
| exit 1 | |
| fi; | |
| if ! command -v yt-dlp &>/dev/null; then | |
| echo "yt-dlp is required: https://github.com/yt-dlp/yt-dlp"; | |
| exit 1; | |
| fi; | |
| if ! command -v "${WHISPER_EXECUTABLE}" &>/dev/null; then | |
| echo "The C++ implementation of Whisper is required: https://github.com/ggerganov/whisper.cpp" | |
| echo "Sample usage:"; | |
| echo ""; | |
| echo " git clone https://github.com/ggerganov/whisper.cpp"; | |
| echo " cd whisper.cpp"; | |
| echo " make"; | |
| echo " ./examples/yt-wsp.sh https://www.youtube.com/watch?v=1234567890"; | |
| echo ""; | |
| exit 1; | |
| fi; | |
| } | |
| if [[ "${#}" -lt 1 ]]; then | |
| print_help; | |
| exit 1; | |
| fi | |
| if [[ "${1##-*}" == "help" ]]; then | |
| print_help; | |
| exit 0; | |
| fi | |
| check_requirements; | |
| ################################################################################ | |
| # create a temporary directory to work in | |
| # set the temp_dir and temp_filename variables | |
| ################################################################################ | |
| temp_dir="$(mktemp -d ${SCRIPT_DIR}/tmp.XXXXXX)"; | |
| temp_filename="${temp_dir}/yt-dlp-filename"; | |
| ################################################################################ | |
| # for now we only take one argument | |
| # TODO: a for loop | |
| ################################################################################ | |
| source_url="${1}" | |
| title_name=""; | |
| msg "Downloading VOD..."; | |
| ################################################################################ | |
| # Download the video, put the dynamic output filename into a variable. | |
| # Optionally add --cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER] | |
| # for videos only available to logged-in users. | |
| ################################################################################ | |
| yt-dlp \ | |
| -f "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best" \ | |
| -o "${temp_dir}/%(title)s-%(id)s.vod.mp4" \ | |
| --print-to-file "%(filename)s" "${temp_filename}" \ | |
| --no-simulate \ | |
| --no-write-auto-subs \ | |
| --restrict-filenames \ | |
| --embed-thumbnail \ | |
| --embed-chapters \ | |
| --xattrs \ | |
| "${source_url}"; | |
| title_name="$(xargs basename -s .vod.mp4 < ${temp_filename})"; | |
| msg "Extracting audio and resampling..."; | |
| ffmpeg -i "${temp_dir}/${title_name}.vod.mp4" \ | |
| -hide_banner \ | |
| -vn \ | |
| -loglevel error \ | |
| -ar 16000 \ | |
| -ac 1 \ | |
| -c:a pcm_s16le \ | |
| -y \ | |
| "${temp_dir}/${title_name}.vod-resampled.wav"; | |
| msg "Transcribing to subtitle file..."; | |
| msg "Whisper specified at: '${WHISPER_EXECUTABLE}'"; | |
| "${WHISPER_EXECUTABLE}" \ | |
| -m "${MODEL_PATH}" \ | |
| -l "${WHISPER_LANG}" \ | |
| -f "${temp_dir}/${title_name}.vod-resampled.wav" \ | |
| -t "${WHISPER_THREAD_COUNT}" \ | |
| -osrt \ | |
| --translate; | |
| msg "Embedding subtitle track..."; | |
| ffmpeg -i "${temp_dir}/${title_name}.vod.mp4" \ | |
| -hide_banner \ | |
| -loglevel error \ | |
| -i "${temp_dir}/${title_name}.vod-resampled.wav.srt" \ | |
| -c copy \ | |
| -c:s mov_text \ | |
| -y "${title_name}-res.mp4"; | |
| #cleanup "${temp_dir}"; | |
| msg "Done! Your finished file is ready: ${title_name}-res.mp4"; | |