Execute application with Gitea Actions #4

Merged
eric merged 6 commits from Broadcast/video-summary-bot#3 into main 2025-03-28 15:15:14 +00:00
3 changed files with 61 additions and 35 deletions
Showing only changes of commit 42bf3f0e98 - Show all commits

View File

@ -1,6 +1,7 @@
OPENAI_API_KEY = INPUT_VIDEO_URL=
OPENAI_BASE_URL = OPENAI_API_KEY=
OPENAI_RESPONSES_PROMPT = OPENAI_BASE_URL=
SEGMENT_DURATION = OPENAI_TRANSCRIPTION_MODEL=
TMP_AUDIO_PATH = OPENAI_CHAT_SYSTEM_PROMPT=
TMP_VIDEO_PATH = OPENAI_CHAT_MODEL=
OPENAI_CHAT_N=

2
.gitignore vendored
View File

@ -1,2 +1,4 @@
__pycache__/*
tmp/*
venv/* venv/*
.env .env

81
app.py
View File

@ -1,15 +1,26 @@
import os import os
import requests import requests
from dotenv import load_dotenv
from moviepy import VideoFileClip from moviepy import VideoFileClip
from openai import OpenAI from openai import OpenAI
from pydub import AudioSegment from pydub import AudioSegment
DEFAULT_PROMPT = "You will be provided a video transcription for which you are to generate a blog post in Markdown format summarizing the video contents." DEFAULT_PROMPT = "The user will provided a video transcription for which you are to generate a blog post in Markdown format summarizing the video contents. Please only output the blog post content."
VIDEO_URL = os.getenv('INPUT_VIDEO_URL', None)
OUTPUT_PATH = os.getenv('OUTPUT_PATH', 'tmp')
AUDIO_SEGMENT_DURATION = 30000
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', None)
OPENAI_BASE_URL = os.getenv('OPENAI_BASE_URL', 'https://api.openai.com/v1')
OPENAI_TRANSCRIPTION_MODEL = os.getenv('OPENAI_TRANSCRIPTION_MODEL', 'whisper-1')
OPENAI_CHAT_SYSTEM_PROMPT = os.getenv('OPENAI_CHAT_SYSTEM_PROMPT', DEFAULT_PROMPT)
OPENAI_CHAT_MODEL = os.getenv('OPENAI_CHAT_MODEL', 'whisper-1')
OPENAI_CHAT_N = int(os.getenv('OPENAI_CHAT_N', '3'))
def main(args): def main():
openai_client = OpenAI() openai_client = OpenAI(
base_url = OPENAI_BASE_URL,
api_key = OPENAI_API_KEY
)
return summarize_transcription( return summarize_transcription(
openai_client, openai_client,
transcribe_audio( transcribe_audio(
@ -21,39 +32,51 @@ def main(args):
) )
def get_video_from_url(): def get_video_from_url():
video_file_url = os.getenv('INPUT_VIDEO_URL') filename = VIDEO_URL.split('/')[-1]
video_file_path = os.getenv('TMP_VIDEO_PATH' , '/tmp/video_summary_bot_tmp_video.mp4') with open(f"{OUTPUT_PATH}/{filename}", 'wb') as f:
request = requests.get(video_file_url) for chunk in requests.get(VIDEO_URL).iter_content(chunk_size=255):
with open(video_file_path, 'wb') as f:
for chunk in requests.get(video_file_url).iter_content(chunk_size=255):
if chunk: if chunk:
f.write(chunk) f.write(chunk)
return filename
def get_audio_from_video(): def get_audio_from_video(video_filename):
tmp_audio_path = os.getenv('TMP_AUDIO_PATH', '/tmp/video_summary_bot_tmp_audio.wav') VideoFileClip(f"{OUTPUT_PATH}/{video_filename}").audio.write_audiofile(f"{OUTPUT_PATH}/{video_filename}.wav")
video_file_path = os.getenv('TMP_VIDEO_PATH') audio = AudioSegment.from_wav(f"{OUTPUT_PATH}/{video_filename}.wav")
VideoFileClip(video_file_path).audio.write_audiofile(tmp_audio_path) segments = []
return AudioSegment.from_wav(tmp_audio_path) for i in range(0, len(audio), AUDIO_SEGMENT_DURATION):
segment = audio[i:i + AUDIO_SEGMENT_DURATION]
path = f"{OUTPUT_PATH}/audio_segment_{i // AUDIO_SEGMENT_DURATION}.wav"
segments.append(path)
segment.export(path, format='wav')
return segments
def transcribe_audio(openai_client, audio): def transcribe_audio(openai_client, audio_segments):
segment_duration = int(os.getenv('SEGMENT_DURATION', 30000)),
transcription_model = os.getenv('OPENAI_TRANSCRIPTION_MODEL', 'whisper-1')
return ' '.join([ return ' '.join([
openai_client.audio.transcriptions.create( openai_client.audio.transcriptions.create(
model=transcription_model, model=OPENAI_TRANSCRIPTION_MODEL,
file=each file=open(each, 'rb')
).text for each in [audio[i:i + segment_duration] for i in range(0, len(audio), segment_duration)] ).text for each in audio_segments
]) ])
def summarize_transcription(openai_client, transcription): def summarize_transcription(openai_client, transcription):
prompt = os.getenv('OPENAI_RESPONSES_PROMPT', DEFAULT_PROMPT) return openai_client.chat.completions.create(
responses_model = os.getenv('OPENAI_RESPONSES_MODEL', 'whisper-1') model=OPENAI_CHAT_MODEL,
return client.responses.create( n=OPENAI_CHAT_N,
model=responses_model, messages = [
instructions=prompt, {"role": "developer", "content": OPENAI_CHAT_SYSTEM_PROMPT},
input=transcription {"role": "user", "content": transcription}
) ]
).choices
def setup():
from dotenv import load_dotenv
load_dotenv()
def cleanup():
os.rmdir(OUTPUT_PATH)
if __name__ == '__main__': if __name__ == '__main__':
load_dotenv() setup()
main(parser.parse_args()) for each in main():
print("========")
print(each.message.content)