import argparse import asyncio import os from dotenv import load_dotenv from moviepy import VideoFileClip from openai import OpenAI from pydub import AudioSegment from pydub.playback import play DEFAULT_RESPONSES_INSTRUCTIONS = "You will be provided a video transcription for which you are to generate a blog post in Markdown format summarizing the video's contents." TMP_AUDIO_PATH = "/tmp/video-summary-bot-tmp-audio.wav" async def main(): parser = argparse.ArgumentParser(description="Use AI models to summarize videos") parser.add_argument('--video-file', type=str, help="Path to the video to be summarized") parser.add_argument('--segment-duration', type=int, help="Lenght of audio segments") args = parser.parse_args() load_dotenv() VideoFileClip(args.video_file).audio.write_audiofile(TMP_AUDIO_PATH) audio = AudioSegment.from_wav(TMP_AUDIO_PATH) segments = [audio[i:i + args.segment_duration] for i in range(0, len(audio), args.segment_duration)] # TODO: Test OpenAI client = OpenAI() transcription = ' '.join([ await client.audio.transcriptions.create( model=args.transcription_model, file=each ).text for each in segments ]) summary = client.responses.create( model=args.responses_model, instructions=DEFAULT_RESPONSES_INSTRUCTIONS, input=transcription ) return summary if __name__ == '__main__': summary = main() print(summary)