video-summary-bot/app.py
2025-03-25 16:38:28 -04:00

41 lines
1.5 KiB
Python

import argparse
import asyncio
import os
from dotenv import load_dotenv
from moviepy import VideoFileClip
from openai import OpenAI
from pydub import AudioSegment
from pydub.playback import play
DEFAULT_RESPONSES_INSTRUCTIONS = "You will be provided a video transcription for which you are to generate a blog post in Markdown format summarizing the video's contents."
TMP_AUDIO_PATH = "/tmp/video-summary-bot-tmp-audio.wav"
async def main():
parser = argparse.ArgumentParser(description="Use AI models to summarize videos")
parser.add_argument('--video-file', type=str, help="Path to the video to be summarized")
parser.add_argument('--segment-duration', type=int, help="Lenght of audio segments")
args = parser.parse_args()
load_dotenv()
VideoFileClip(args.video_file).audio.write_audiofile(TMP_AUDIO_PATH)
audio = AudioSegment.from_wav(TMP_AUDIO_PATH)
segments = [audio[i:i + args.segment_duration] for i in range(0, len(audio), args.segment_duration)]
# TODO: Test OpenAI
client = OpenAI()
transcription = ' '.join([
await client.audio.transcriptions.create(
model=args.transcription_model,
file=each
).text for each in segments
])
summary = client.responses.create(
model=args.responses_model,
instructions=DEFAULT_RESPONSES_INSTRUCTIONS,
input=transcription
)
return summary
if __name__ == '__main__':
summary = main()
print(summary)