import os import requests from dotenv import load_dotenv from moviepy import VideoFileClip from openai import OpenAI from pydub import AudioSegment DEFAULT_PROMPT = "You will be provided a video transcription for which you are to generate a blog post in Markdown format summarizing the video contents." def main(args): openai_client = OpenAI() return summarize_transcription( openai_client, transcribe_audio( openai_client, get_audio_from_video( get_video_from_url() ) ) ) def get_video_from_url(): video_file_url = os.getenv('INPUT_VIDEO_URL') video_file_path = os.getenv('TMP_VIDEO_PATH' , '/tmp/video_summary_bot_tmp_video.mp4') request = requests.get(video_file_url) with open(video_file_path, 'wb') as f: for chunk in requests.get(video_file_url).iter_content(chunk_size=255): if chunk: f.write(chunk) def get_audio_from_video(): tmp_audio_path = os.getenv('TMP_AUDIO_PATH', '/tmp/video_summary_bot_tmp_audio.wav') video_file_path = os.getenv('TMP_VIDEO_PATH') VideoFileClip(video_file_path).audio.write_audiofile(tmp_audio_path) return AudioSegment.from_wav(tmp_audio_path) def transcribe_audio(openai_client, audio): segment_duration = int(os.getenv('SEGMENT_DURATION', 30000)), transcription_model = os.getenv('OPENAI_TRANSCRIPTION_MODEL', 'whisper-1') return ' '.join([ openai_client.audio.transcriptions.create( model=transcription_model, file=each ).text for each in [audio[i:i + segment_duration] for i in range(0, len(audio), segment_duration)] ]) def summarize_transcription(openai_client, transcription): prompt = os.getenv('OPENAI_RESPONSES_PROMPT', DEFAULT_PROMPT) responses_model = os.getenv('OPENAI_RESPONSES_MODEL', 'whisper-1') return client.responses.create( model=responses_model, instructions=prompt, input=transcription ) if __name__ == '__main__': load_dotenv() main(parser.parse_args())