Problem Statement:
I am encountering an error while attempting to transcribe audio from a YouTube video using the OpenAI API in Python. My goal is to save the audio to an S3 bucket and then pass the S3 URL to the OpenAI API for transcription. However, I'm facing difficulties in achieving this and need assistance in resolving the error.
Details:
I have developed a Flask application where users can provide a link to a YouTube video along with start and end timestamps. The application should download the video, extract the specified segment, transcribe the audio using the OpenAI API, and return the transcription. To achieve this, I am utilizing MoviePy for video processing and OpenAI API for transcription.
Issue Encountered:
During the process of passing the audio data to the OpenAI API, I encountered the following error:
python
An error occurred during processing: expected str, bytes or os.PathLike object, not BytesIO
This error arises when attempting to pass the audio data directly to the OpenAI API. Below is my code:
from flask import Flask, jsonify, request
from pytube import YouTube
import boto3
from moviepy.editor import *
from openai import OpenAI
from moviepy.video.tools.subtitles import SubtitlesClip
import random
import io
import requests
import tempfile
app = Flask(__name__)
BUCKET_NAME = "clipwave"
client = OpenAI(api_key="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")
colors = ["green", "yellow", "red", "white"]
fonts = ["Impact", "Comic-Sans-MS-Bold"]
def upload_to_s3(filename, video_stream):
s3_client = boto3.client("s3")
with requests.get(video_stream.url, stream=True) as response:
if response.status_code == 200:
s3_client.upload_fileobj(response.raw, BUCKET_NAME, filename)
def get_subs(clip, key_name):
audio = clip.audio
audio_bytes = io.BytesIO()
audio.write_audiofile(audio_bytes, codec='pcm_s16le') # Write audio to BytesIO object
try:
# Get the bytes content from BytesIO
audio_data = audio_bytes.getvalue()
# Upload audio data directly to S3
upload_audio_to_s3(audio_data, BUCKET_NAME, f"audio_{key_name}")
# Transcribe audio using OpenAI's API
transcript = client.audio.transcriptions.create(
model="whisper-1",
file=audio_data, # Pass the retrieved bytes data
response_format="verbose_json",
timestamp_granularities=["word"]
)
text = transcript.text
timestamps = transcript.words
return {'text': text, 'timestamps': timestamps}
except Exception as e:
print("An error occurred during processing:", e)
raise
def upload_audio_to_s3(audio_bytes, bucket_name, key_name):
s3 = boto3.client('s3')
s3.put_object(Body=audio_bytes, Bucket=bucket_name, Key=key_name)
def get_video_url(filename):
s3_client = boto3.client("s3")
s3_object = s3_client.get_object(Bucket=BUCKET_NAME, Key=filename)
if 'Location' in s3_object:
video_url = s3_object["Location"]
return video_url
else:
region = s3_client.meta.region_name
bucket_url = f"https://{BUCKET_NAME}.s3.{region}.amazonaws.com/"
video_url = bucket_url + filename
return video_url
def generate_subtitles_clip(subs, delay=0.05):
text = subs['text']
timestamps = subs['timestamps']
clips = []
for word_info in timestamps:
start_time = word_info['start'] + delay
end_time = word_info['end'] + delay
word = word_info['word']
clips.append(((start_time, end_time), word.upper()))
font = random.choice(fonts)
color = random.choice(colors)
return SubtitlesClip(clips, lambda txt: TextClip(txt, fontsize=100, color=color, method='caption', stroke_color="black", stroke_width=6, font=font))
def upload_video_to_s3(video_bytes, bucket_name, key_name):
s3 = boto3.client('s3')
s3.put_object(Body=video_bytes, Bucket=bucket_name, Key=key_name)
@app.route('/make-short', methods=['GET'])
def make_short():
s3 = boto3.client('s3')
link = request.args.get('link')
start = request.args.get('start')
end = request.args.get('end')
if link:
try:
youtube_object = YouTube(link)
video_stream = youtube_object.streams.get_highest_resolution()
filename = video_stream.default_filename.replace(' ', '')
upload_to_s3(filename, video_stream)
video_url = get_video_url(filename)
video = VideoFileClip(video_url).subclip(start, end).fx(vfx.fadeout, 1)
# Aspect ratio and cropping logic remains unchanged
# Generate subtitles and create SubtitlesClip
subs_result = get_subs(video, f"subs_{filename}")
subs_clip = generate_subtitles_clip(subs_result)
# Overlay subtitles on the video and write the final video file to a temporary file
final_video = CompositeVideoClip([video.set_duration(subs_clip.duration), subs_clip.set_position(((1920/2 - 1080/2), 1200))])
temp_video_path = tempfile.NamedTemporaryFile(suffix='.mp4').name
final_video.write_videofile(temp_video_path, codec="libx264")
# Upload final video to S3 and clean up uploaded files
with open(temp_video_path, 'rb') as temp_video_file:
video_bytesio = io.BytesIO(temp_video_file.read())
upload_video_to_s3(video_bytesio.getvalue(), BUCKET_NAME, f"{filename}_short")
s3.delete_object(Bucket=BUCKET_NAME, Key=filename)
s3.delete_object(Bucket=BUCKET_NAME, Key=f"subs_{filename}")
url = get_video_url(f"{filename}_short")
return jsonify({"message": "Video uploaded to S3 successfully!", "url": url})
except Exception as e:
print("An error occurred:", e)
return jsonify({"message": "Error downloading or uploading video"}), 500
else:
return jsonify({"message": "Missing 'link' parameter"}), 400
if __name__ == "__main__":
app.run(port=3000, debug=True)
Request for Assistance:
I am seeking guidance on how to properly save the audio to an S3 bucket and then pass the S3 URL to the OpenAI API for transcription. What steps should I take to ensure a smooth process and avoid the encountered error?
Your insights and suggestions would be greatly appreciated. Thank you!
What I've Tried:
I attempted to pass the audio data directly to the OpenAI API, but encountered the error mentioned above.
I also tried writing the audio to a temporary file and passing the file path to the OpenAI API, but encountered similar errors.