From dba02ae5ea3b2bd56c0f6f71b79b0f9bfc8b626c Mon Sep 17 00:00:00 2001 From: Eric Meehan Date: Tue, 25 Mar 2025 21:57:00 -0400 Subject: [PATCH] OpenAI functions and clearer parameters --- .env_example | 7 +++++-- app.py | 58 ++++++++++++++++++++++++++++++---------------------- 2 files changed, 38 insertions(+), 27 deletions(-) diff --git a/.env_example b/.env_example index 525dfcb..221e367 100644 --- a/.env_example +++ b/.env_example @@ -1,2 +1,5 @@ -OPENAI_API_KEY= -OPENAI_BASE_URL= +OPENAI_API_KEY = +OPENAI_BASE_URL = +OPENAI_RESPONSES_INSTRUCTIONS = +SEGMENT_DURATION = +TMP_AUDIO_PATH = diff --git a/app.py b/app.py index 9f9a317..5400e8b 100644 --- a/app.py +++ b/app.py @@ -1,40 +1,48 @@ import argparse -import asyncio import os from dotenv import load_dotenv from moviepy import VideoFileClip from openai import OpenAI from pydub import AudioSegment -from pydub.playback import play -DEFAULT_RESPONSES_INSTRUCTIONS = "You will be provided a video transcription for which you are to generate a blog post in Markdown format summarizing the video's contents." -TMP_AUDIO_PATH = "/tmp/video-summary-bot-tmp-audio.wav" +PROMPT = "You will be provided a video transcription for which you are to generate a blog post in Markdown format summarizing the video contents." -async def main(): - parser = argparse.ArgumentParser(description="Use AI models to summarize videos") - parser.add_argument('--video-file', type=str, help="Path to the video to be summarized") - parser.add_argument('--segment-duration', type=int, help="Lenght of audio segments") - args = parser.parse_args() - load_dotenv() - VideoFileClip(args.video_file).audio.write_audiofile(TMP_AUDIO_PATH) - audio = AudioSegment.from_wav(TMP_AUDIO_PATH) - segments = [audio[i:i + args.segment_duration] for i in range(0, len(audio), args.segment_duration)] - # TODO: Test OpenAI - client = OpenAI() - transcription = ' '.join([ - await client.audio.transcriptions.create( - model=args.transcription_model, +def main(args): + openai_client = OpenAI() + return summarize_transcription( + openai_client, + transcribe_audio( + openai_client, + get_audio_from_video(args.video_file_path) + ) + ) + +def get_audio_from_video(video_file_path): + tmp_audio_path = os.getenv('TMP_AUDIO_PATH', '/tmp/video_summary_bot_tmp_audio.wav') + VideoFileClip(video_file_path).audio.write_audiofile(tmp_audio_path) + return AudioSegment.from_wav(tmp_audio_path) + +def transcribe_audio(openai_client, audio): + segment_duration = int(os.getenv('SEGMENT_DURATION', 30000)), + transcription_model = os.getenv('OPENAI_TRANSCRIPTION_MODEL', 'whisper-1') + return ' '.join([ + openai_client.audio.transcriptions.create( + model=transcription_model, file=each - ).text for each in segments + ).text for each in [audio[i:i + segment_duration] for i in range(0, len(audio), segment_duration)] ]) - summary = client.responses.create( - model=args.responses_model, - instructions=DEFAULT_RESPONSES_INSTRUCTIONS, + +def summarize_transcription(openai_client, transcription): + responses_model = os.getenv('OPENAI_RESPONSES_MODEL', 'whisper-1') + return client.responses.create( + model=responses_model, + instructions=PROMPT, input=transcription ) - return summary if __name__ == '__main__': - summary = main() - print(summary) + load_dotenv() + parser = argparse.ArgumentParser(description="Use AI models to summarize videos") + parser.add_argument('--video-file-path', type=str, help="Path to the video to be summarized") + main(parser.parse_args())