41 lines
1.5 KiB
Python
41 lines
1.5 KiB
Python
import argparse
|
|
import asyncio
|
|
import os
|
|
|
|
from dotenv import load_dotenv
|
|
from moviepy import VideoFileClip
|
|
from openai import OpenAI
|
|
from pydub import AudioSegment
|
|
from pydub.playback import play
|
|
|
|
DEFAULT_RESPONSES_INSTRUCTIONS = "You will be provided a video transcription for which you are to generate a blog post in Markdown format summarizing the video's contents."
|
|
TMP_AUDIO_PATH = "/tmp/video-summary-bot-tmp-audio.wav"
|
|
|
|
async def main():
|
|
parser = argparse.ArgumentParser(description="Use AI models to summarize videos")
|
|
parser.add_argument('--video-file', type=str, help="Path to the video to be summarized")
|
|
parser.add_argument('--segment-duration', type=int, help="Lenght of audio segments")
|
|
args = parser.parse_args()
|
|
load_dotenv()
|
|
VideoFileClip(args.video_file).audio.write_audiofile(TMP_AUDIO_PATH)
|
|
audio = AudioSegment.from_wav(TMP_AUDIO_PATH)
|
|
segments = [audio[i:i + args.segment_duration] for i in range(0, len(audio), args.segment_duration)]
|
|
# TODO: Test OpenAI
|
|
client = OpenAI()
|
|
transcription = ' '.join([
|
|
await client.audio.transcriptions.create(
|
|
model=args.transcription_model,
|
|
file=each
|
|
).text for each in segments
|
|
])
|
|
summary = client.responses.create(
|
|
model=args.responses_model,
|
|
instructions=DEFAULT_RESPONSES_INSTRUCTIONS,
|
|
input=transcription
|
|
)
|
|
return summary
|
|
|
|
if __name__ == '__main__':
|
|
summary = main()
|
|
print(summary)
|