From c64a9dd7dbaa1f1bb4ab6378a54fb9953b9d0718 Mon Sep 17 00:00:00 2001 From: Eric Meehan Date: Tue, 25 Mar 2025 16:38:28 -0400 Subject: [PATCH] Initial commit --- .env_example | 2 ++ .gitignore | 2 ++ README.md | 0 app.py | 40 ++++++++++++++++++++++++++++++++++++++++ requirements.txt | 2 ++ 5 files changed, 46 insertions(+) create mode 100644 .env_example create mode 100644 .gitignore create mode 100644 README.md create mode 100644 app.py create mode 100644 requirements.txt diff --git a/.env_example b/.env_example new file mode 100644 index 0000000..525dfcb --- /dev/null +++ b/.env_example @@ -0,0 +1,2 @@ +OPENAI_API_KEY= +OPENAI_BASE_URL= diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6175add --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +venv/* +.env diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/app.py b/app.py new file mode 100644 index 0000000..9f9a317 --- /dev/null +++ b/app.py @@ -0,0 +1,40 @@ +import argparse +import asyncio +import os + +from dotenv import load_dotenv +from moviepy import VideoFileClip +from openai import OpenAI +from pydub import AudioSegment +from pydub.playback import play + +DEFAULT_RESPONSES_INSTRUCTIONS = "You will be provided a video transcription for which you are to generate a blog post in Markdown format summarizing the video's contents." +TMP_AUDIO_PATH = "/tmp/video-summary-bot-tmp-audio.wav" + +async def main(): + parser = argparse.ArgumentParser(description="Use AI models to summarize videos") + parser.add_argument('--video-file', type=str, help="Path to the video to be summarized") + parser.add_argument('--segment-duration', type=int, help="Lenght of audio segments") + args = parser.parse_args() + load_dotenv() + VideoFileClip(args.video_file).audio.write_audiofile(TMP_AUDIO_PATH) + audio = AudioSegment.from_wav(TMP_AUDIO_PATH) + segments = [audio[i:i + args.segment_duration] for i in range(0, len(audio), args.segment_duration)] + # TODO: Test OpenAI + client = OpenAI() + transcription = ' '.join([ + await client.audio.transcriptions.create( + model=args.transcription_model, + file=each + ).text for each in segments + ]) + summary = client.responses.create( + model=args.responses_model, + instructions=DEFAULT_RESPONSES_INSTRUCTIONS, + input=transcription + ) + return summary + +if __name__ == '__main__': + summary = main() + print(summary) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..2d5fb82 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +moviepy +pydub