diff --git a/.env_example b/.env_example index 7447381..470d39a 100644 --- a/.env_example +++ b/.env_example @@ -1,5 +1,7 @@ -OPENAI_API_KEY = -OPENAI_BASE_URL = -OPENAI_RESPONSES_PROMPT = -SEGMENT_DURATION = -TMP_AUDIO_PATH = +INPUT_VIDEO_URL= +OPENAI_API_KEY= +OPENAI_BASE_URL= +OPENAI_TRANSCRIPTION_MODEL= +OPENAI_CHAT_SYSTEM_PROMPT= +OPENAI_CHAT_MODEL= +OPENAI_CHAT_N= diff --git a/.gitea/workflows/execute.yaml b/.gitea/workflows/execute.yaml new file mode 100644 index 0000000..6cecf12 --- /dev/null +++ b/.gitea/workflows/execute.yaml @@ -0,0 +1,29 @@ +name: execute + +on: + workflow_dispatch: + inputs: + video-url: + description: "URL for the video to be analyzed" + required: true + +jobs: + Python: + runs-on: ubuntu-latest + env: + OPENAI_API_KEY: "nokey" + OPENAI_BASE_URL: "http://192.168.1.168/v1" + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Run applicaiton + run: python app.py diff --git a/.gitignore b/.gitignore index 6175add..bd4b8c9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ +__pycache__/* +tmp/* venv/* .env diff --git a/app.py b/app.py index 4676a2d..ee5d2ed 100644 --- a/app.py +++ b/app.py @@ -1,49 +1,79 @@ -import argparse import os +import requests -from dotenv import load_dotenv from moviepy import VideoFileClip from openai import OpenAI from pydub import AudioSegment -DEFAULT_PROMPT = "You will be provided a video transcription for which you are to generate a blog post in Markdown format summarizing the video contents." +from prompt import DEFAULT_PROMPT -def main(args): - openai_client = OpenAI() +VIDEO_URL = os.getenv('INPUT_VIDEO_URL', None) +OUTPUT_PATH = os.getenv('OUTPUT_PATH', 'tmp') +AUDIO_SEGMENT_DURATION = 30000 +OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', None) +OPENAI_BASE_URL = os.getenv('OPENAI_BASE_URL', 'https://api.openai.com/v1') +OPENAI_TRANSCRIPTION_MODEL = os.getenv('OPENAI_TRANSCRIPTION_MODEL', 'whisper-1') +OPENAI_CHAT_SYSTEM_PROMPT = os.getenv('OPENAI_CHAT_SYSTEM_PROMPT', DEFAULT_PROMPT) +OPENAI_CHAT_MODEL = os.getenv('OPENAI_CHAT_MODEL', 'whisper-1') +OPENAI_CHAT_N = int(os.getenv('OPENAI_CHAT_N', '3')) + +def main(): + openai_client = OpenAI( + base_url = OPENAI_BASE_URL, + api_key = OPENAI_API_KEY + ) return summarize_transcription( openai_client, transcribe_audio( openai_client, - get_audio_from_video(args.video_file_path) + get_audio_from_video( + get_video_from_url() + ) ) ) -def get_audio_from_video(video_file_path): - tmp_audio_path = os.getenv('TMP_AUDIO_PATH', '/tmp/video_summary_bot_tmp_audio.wav') - VideoFileClip(video_file_path).audio.write_audiofile(tmp_audio_path) - return AudioSegment.from_wav(tmp_audio_path) +def get_video_from_url(): + filename = VIDEO_URL.split('/')[-1] + with open(f"{OUTPUT_PATH}/{filename}", 'wb') as f: + for chunk in requests.get(VIDEO_URL).iter_content(chunk_size=255): + if chunk: + f.write(chunk) + return filename -def transcribe_audio(openai_client, audio): - segment_duration = int(os.getenv('SEGMENT_DURATION', 30000)), - transcription_model = os.getenv('OPENAI_TRANSCRIPTION_MODEL', 'whisper-1') +def get_audio_from_video(video_filename): + VideoFileClip(f"{OUTPUT_PATH}/{video_filename}").audio.write_audiofile(f"{OUTPUT_PATH}/{video_filename}.wav") + audio = AudioSegment.from_wav(f"{OUTPUT_PATH}/{video_filename}.wav") + segments = [] + for i in range(0, len(audio), AUDIO_SEGMENT_DURATION): + segment = audio[i:i + AUDIO_SEGMENT_DURATION] + path = f"{OUTPUT_PATH}/audio_segment_{i // AUDIO_SEGMENT_DURATION}.wav" + segments.append(path) + segment.export(path, format='wav') + return segments + +def transcribe_audio(openai_client, audio_segments): return ' '.join([ openai_client.audio.transcriptions.create( - model=transcription_model, - file=each - ).text for each in [audio[i:i + segment_duration] for i in range(0, len(audio), segment_duration)] + model=OPENAI_TRANSCRIPTION_MODEL, + file=open(each, 'rb') + ).text for each in audio_segments ]) def summarize_transcription(openai_client, transcription): - prompt = os.getenv('OPENAI_RESPONSES_PROMPT', DEFAULT_PROMPT) - responses_model = os.getenv('OPENAI_RESPONSES_MODEL', 'whisper-1') - return client.responses.create( - model=responses_model, - instructions=prompt, - input=transcription - ) + return openai_client.completions.create( + model=OPENAI_CHAT_MODEL, + prompt=OPENAI_CHAT_SYSTEM_PROMPT.format(transcription) + ).choices + +def setup(): + from dotenv import load_dotenv + load_dotenv() + +def cleanup(): + os.rmdir(OUTPUT_PATH) if __name__ == '__main__': - load_dotenv() - parser = argparse.ArgumentParser(description="Use AI models to summarize videos") - parser.add_argument('--video-file-path', type=str, help="Path to the video to be summarized") - main(parser.parse_args()) + setup() + for each in main(): + print("========") + print(each.text) diff --git a/prompt.py b/prompt.py new file mode 100644 index 0000000..be9dc5f --- /dev/null +++ b/prompt.py @@ -0,0 +1,19 @@ +DEFAULT_PROMPT = """ +You are a professional blog writer and SEO expert. You will be given the transcript of a live stream for which you are to generate +a blog post. + +Instructions: + - The blog post title should be SEO optimized. + - The blog post should be properly and beautifully formatted using markdown. + - Each blog post should have around 5 sections with 3 sub-sections each. + - Each sub section should have about 3 paragraphs. + - Sub-section headings should be clearly marked. + - Ensure that the content flows logically from one section to another, maintaining coherence and readability. + - In the final section, provide a forward-looking perspective on the topic and a conclusion. + - Make the blog post sound as human and as engaging as possible, add real world examples and make it as informative as possible. + - Please ensure proper and standard markdown formatting always. + +Transcription: {} + +Blog Post: +""" diff --git a/requirements.txt b/requirements.txt index 2d5fb82..30f1552 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,5 @@ moviepy +openai pydub +python-dotenv +requests