How to send multipart form data request with vapor client

78 views Asked by At

I am trying to send a request to openai's speech to text api in my vapor app. The api accepts multipart/form-data requests. I couldn't figure out how to use vapor's client api. For json requests one can send request fairly easyly.

    let resp = try await client.post(
        "https://api.openai.com/v1/chat/completions",
        headers: HTTPHeaders([
            ("Content-Type", "application/json"),
            ("Authorization", "Bearer \(memoKey)")
        ]),
        content: reqData
    )

For multipart form data I've tried this but the api gives Could not parse multipart form error

struct SpeechToTextRequest: Content {
    var model = "whisper-1"
    var file: Data
}

func makeSpeechToTextRequest(
    client: Client,
    audio: Data
) async throws {
    let result = try await client.post(
        "https://api.openai.com/v1/audio/transcriptions",
        headers: [
            "Content-Type": "multipart/form-data",
            "Authorization": "Bearer \(memoKey)"
        ],
        beforeSend: { req in
            let encoder = FormDataEncoder()
            let encoded = try encoder.encode(
                SpeechToTextRequest(file: audio),
                boundary: ""
            )

            req.body = ByteBuffer(string: encoded)
        }
    )

    print(result)
}

For reference here is the curl string for the request

curl --request POST \
  --url https://api.openai.com/v1/audio/transcriptions \
  --header "Authorization: Bearer $OPENAI_API_KEY" \
  --header 'Content-Type: multipart/form-data' \
  --form file=@/path/to/file/openai.mp3 \
  --form model=whisper-1
1

There are 1 answers

0
mustafa On BEST ANSWER

I've ended up creating a little function to create multipart form data. I couldn't find any docs about Vapor's MultiPartKit library.

Here is how request is constructed:

func makeSpeechToTextRequest(
    client: Client,
    audio: Data
) async throws -> SpeechToTextResponse {
    let result = try await client.post(
        "https://api.openai.com/v1/audio/transcriptions",
        headers: [
            "Authorization": "Bearer \(memoKey)"
        ],
        beforeSend: { req in
            let (body, contentType) = createMultipartFormData(from: [
                .file(fileName: "speech.mp3", fileType: "audio/mp3", fileData: audio),
                .string(name: "model", value: "whisper-1"),
                .string(name: "response_format", value: "verbose_json"),
                .string(name: "timestamp_granularities[]", value: "word")
            ])

            req.body = body
            req.headers.contentType = contentType
        }
    )

    return try result.content.decode(SpeechToTextResponse.self)
}

And here is the helper function:

private enum MultipartField {
    case string(name: String, value: String)
    case file(fileName: String, fileType: String, fileData: Data)
}

private func createMultipartFormData(from fields: [MultipartField]) -> (ByteBuffer, HTTPMediaType) {
    let boundary = UUID().uuidString
    var buffer = ByteBuffer()

    for field in fields {
        switch field {
        case let .file(fileName, fileType, fileData):
            buffer.writeString("--\(boundary)\r\n")
            buffer.writeString("Content-Disposition: form-data; name=\"file\"; filename=\"\(fileName)\"\r\n")
            buffer.writeString("Content-Type: \(fileType)\r\n\r\n")
            buffer.writeData(fileData)
            buffer.writeString("\r\n")
        case let .string(name, value):
            buffer.writeString("--\(boundary)\r\n")
            buffer.writeString("Content-Disposition: form-data; name=\"\(name)\"\r\n\r\n")
            buffer.writeString("\(value)\r\n")
        }
    }

    buffer.writeString("--\(boundary)--\r\n")

    let mediaType = HTTPMediaType(
        type: "multipart",
        subType: "form-data",
        parameters: ["boundary": boundary]
    )

    return (buffer, mediaType)
}