diff --git a/.env_example b/.env_example index 25e7d50..470d39a 100644 --- a/.env_example +++ b/.env_example @@ -1,6 +1,7 @@ -OPENAI_API_KEY = -OPENAI_BASE_URL = -OPENAI_RESPONSES_PROMPT = -SEGMENT_DURATION = -TMP_AUDIO_PATH = -TMP_VIDEO_PATH = +INPUT_VIDEO_URL= +OPENAI_API_KEY= +OPENAI_BASE_URL= +OPENAI_TRANSCRIPTION_MODEL= +OPENAI_CHAT_SYSTEM_PROMPT= +OPENAI_CHAT_MODEL= +OPENAI_CHAT_N= diff --git a/.gitignore b/.gitignore index 6175add..bd4b8c9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ +__pycache__/* +tmp/* venv/* .env diff --git a/app.py b/app.py index 90a4dce..4d45bf1 100644 --- a/app.py +++ b/app.py @@ -1,15 +1,26 @@ import os import requests -from dotenv import load_dotenv from moviepy import VideoFileClip from openai import OpenAI from pydub import AudioSegment -DEFAULT_PROMPT = "You will be provided a video transcription for which you are to generate a blog post in Markdown format summarizing the video contents." +DEFAULT_PROMPT = "The user will provided a video transcription for which you are to generate a blog post in Markdown format summarizing the video contents. Please only output the blog post content." +VIDEO_URL = os.getenv('INPUT_VIDEO_URL', None) +OUTPUT_PATH = os.getenv('OUTPUT_PATH', 'tmp') +AUDIO_SEGMENT_DURATION = 30000 +OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', None) +OPENAI_BASE_URL = os.getenv('OPENAI_BASE_URL', 'https://api.openai.com/v1') +OPENAI_TRANSCRIPTION_MODEL = os.getenv('OPENAI_TRANSCRIPTION_MODEL', 'whisper-1') +OPENAI_CHAT_SYSTEM_PROMPT = os.getenv('OPENAI_CHAT_SYSTEM_PROMPT', DEFAULT_PROMPT) +OPENAI_CHAT_MODEL = os.getenv('OPENAI_CHAT_MODEL', 'whisper-1') +OPENAI_CHAT_N = int(os.getenv('OPENAI_CHAT_N', '3')) -def main(args): - openai_client = OpenAI() +def main(): + openai_client = OpenAI( + base_url = OPENAI_BASE_URL, + api_key = OPENAI_API_KEY + ) return summarize_transcription( openai_client, transcribe_audio( @@ -21,39 +32,51 @@ def main(args): ) def get_video_from_url(): - video_file_url = os.getenv('INPUT_VIDEO_URL') - video_file_path = os.getenv('TMP_VIDEO_PATH' , '/tmp/video_summary_bot_tmp_video.mp4') - request = requests.get(video_file_url) - with open(video_file_path, 'wb') as f: - for chunk in requests.get(video_file_url).iter_content(chunk_size=255): + filename = VIDEO_URL.split('/')[-1] + with open(f"{OUTPUT_PATH}/{filename}", 'wb') as f: + for chunk in requests.get(VIDEO_URL).iter_content(chunk_size=255): if chunk: f.write(chunk) + return filename -def get_audio_from_video(): - tmp_audio_path = os.getenv('TMP_AUDIO_PATH', '/tmp/video_summary_bot_tmp_audio.wav') - video_file_path = os.getenv('TMP_VIDEO_PATH') - VideoFileClip(video_file_path).audio.write_audiofile(tmp_audio_path) - return AudioSegment.from_wav(tmp_audio_path) +def get_audio_from_video(video_filename): + VideoFileClip(f"{OUTPUT_PATH}/{video_filename}").audio.write_audiofile(f"{OUTPUT_PATH}/{video_filename}.wav") + audio = AudioSegment.from_wav(f"{OUTPUT_PATH}/{video_filename}.wav") + segments = [] + for i in range(0, len(audio), AUDIO_SEGMENT_DURATION): + segment = audio[i:i + AUDIO_SEGMENT_DURATION] + path = f"{OUTPUT_PATH}/audio_segment_{i // AUDIO_SEGMENT_DURATION}.wav" + segments.append(path) + segment.export(path, format='wav') + return segments -def transcribe_audio(openai_client, audio): - segment_duration = int(os.getenv('SEGMENT_DURATION', 30000)), - transcription_model = os.getenv('OPENAI_TRANSCRIPTION_MODEL', 'whisper-1') +def transcribe_audio(openai_client, audio_segments): return ' '.join([ openai_client.audio.transcriptions.create( - model=transcription_model, - file=each - ).text for each in [audio[i:i + segment_duration] for i in range(0, len(audio), segment_duration)] + model=OPENAI_TRANSCRIPTION_MODEL, + file=open(each, 'rb') + ).text for each in audio_segments ]) def summarize_transcription(openai_client, transcription): - prompt = os.getenv('OPENAI_RESPONSES_PROMPT', DEFAULT_PROMPT) - responses_model = os.getenv('OPENAI_RESPONSES_MODEL', 'whisper-1') - return client.responses.create( - model=responses_model, - instructions=prompt, - input=transcription - ) + return openai_client.chat.completions.create( + model=OPENAI_CHAT_MODEL, + n=OPENAI_CHAT_N, + messages = [ + {"role": "developer", "content": OPENAI_CHAT_SYSTEM_PROMPT}, + {"role": "user", "content": transcription} + ] + ).choices + +def setup(): + from dotenv import load_dotenv + load_dotenv() + +def cleanup(): + os.rmdir(OUTPUT_PATH) if __name__ == '__main__': - load_dotenv() - main(parser.parse_args()) + setup() + for each in main(): + print("========") + print(each.message.content)