Python Spectrograms for song identification

41 views Asked by At

The goal of this program is to create a database of spectrograms from audio files, and then identify a specific song based on a provided audio fragment. The current code only correctly identifies if the provided snippet starts exactly at the beginning of the database song. However, if the snippet does not start from the beginning, it does not identify the song correctly.

generaDB.py
import os
import numpy as np
import soundfile as sf
from scipy.signal import spectrogram
from utilidades import generate_database

def generate_spectrogram(audio_file, target_length):
    audio_data, sampling_frequency = sf.read(audio_file)

    _, _, Sxx = spectrogram(audio_data, fs=sampling_frequency)

    if Sxx.shape[1] < target_length:
        pad_width = target_length - Sxx.shape[1]
        Sxx = np.pad(Sxx, pad_width=((0, 0), (0, pad_width)))
    elif Sxx.shape[1] > target_length:
        Sxx = Sxx[:, :target_length]

    return Sxx

if __name__ == "__main__":
    folder_path = "Canciones/Canciones"  # Utiliza '/' para rutas (compatible con sistemas operativos diferentes)
    generate_database(folder_path)

identifica.py
import os
import numpy as np
from utilidades import identify_song

def load_database():
    try:
        data = np.load("EspectrogramaBD.npz")
        spectrogram_db = data['spectrogram_db']
        song_info = np.load("info.out.npy", allow_pickle=True)
        return spectrogram_db, song_info
    except FileNotFoundError:
        print("Error: No se encontraron los archivos de la base de datos.")
        return None, None
    except Exception as e:
        print(f"Error al cargar la base de datos: {e}")
        return None, None

def main():
    spectrogram_db, song_info = load_database()

    if spectrogram_db is not None and song_info is not None:
        print("Base de datos cargada exitosamente.")
    else:
        print("Error al cargar la base de datos.")
        return

    while True:
        audio_file = input("Ingrese la ruta del archivo WAV: ")
        if os.path.exists(audio_file):
            break
        else:
            print("Error: El archivo no existe. Por favor, verifique la ruta.")

    target_length = 500  # Puedes ajustar esto según tus necesidades

    identification_result = identify_song(audio_file, target_length, spectrogram_db, song_info)
    
    if identification_result:
        print(f"La canción fue identificada como: {identification_result}")
    else:
        print("No se pudo identificar la canción.")

if __name__ == "__main__":
    main()
utiidades.py
# utilidades.py
import os
import numpy as np
import soundfile as sf
from scipy.signal import spectrogram

def generate_spectrogram(audio_file, target_length):
    audio_data, sampling_frequency = sf.read(audio_file)

    _, _, Sxx = spectrogram(audio_data, fs=sampling_frequency)

    if Sxx.shape[1] < target_length:
        pad_width = target_length - Sxx.shape[1]
        Sxx = np.pad(Sxx, pad_width=((0, 0), (0, pad_width)))
    elif Sxx.shape[1] > target_length:
        Sxx = Sxx[:, :target_length]

    return (Sxx - np.mean(Sxx)) / np.std(Sxx)  # Normalizar espectrograma

def generate_database(folder_path, target_length=500):
    spectrogram_db = []
    song_info = []

    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith(".wav"):
                audio_file = os.path.join(root, file)
                spectrogram = generate_spectrogram(audio_file, target_length)
                spectrogram_db.append(spectrogram)
                song_info.append({"file": audio_file, "duration": spectrogram.shape[1]})

    np.savez("EspectrogramaBD.npz", spectrogram_db=spectrogram_db)
    np.save("info.out", song_info)

def identify_song(audio_file, target_length, spectrogram_db, song_info):
    try:
        input_spectrogram = generate_spectrogram(audio_file, target_length)
    except FileNotFoundError:
        return "Error: El archivo de audio no existe."

    best_match = None
    best_match_difference = float('inf')
    best_match_start = 0
    best_match_end = target_length

    identification_threshold = 2.0  # Ajusta según sea necesario

    for idx, db_spectrogram in enumerate(spectrogram_db):
        # Asumiendo que los espectrogramas son del mismo tamaño
        db_spectrogram = db_spectrogram[:, :target_length]

        for start_col in range(db_spectrogram.shape[1] - target_length + 1):
            end_col = start_col + target_length

            # Comparar submatrices
            submatrix_db = db_spectrogram[:, start_col:end_col]
            submatrix_input = input_spectrogram[:, start_col:end_col]
            difference = np.linalg.norm(submatrix_db - submatrix_input)

            if difference < best_match_difference:
                best_match_difference = difference
                best_match = idx
                best_match_start = start_col
                best_match_end = end_col

    if best_match is not None and best_match_difference < identification_threshold:
        identified_song = song_info[best_match]["file"]
        return f"Canción identificada: {identified_song}, Diferencia mínima: {best_match_difference}, Inicio: {best_match_start}, Fin: {best_match_end}"
    else:
        return f"No se pudo identificar la canción. Diferencia mínima: {best_match_difference}"

I've tried every possible combination of knowledge that I have but cant find the answer to why it does not identify the song if its not starting from the beggining. I'm expecting the code to identify the song if it does not start at second 0 (beggining). I'am nearly 100% sure that the problem is at utilidades.py in the function identify_song. If anyone could help I would greatly appreciate it.

0

There are 0 answers