Complete rewrite with pyloudnorm

Based on https://ffmpeg.org/pipermail/ffmpeg-user/2024-March/057775.html loudnorm is a low quality filter. Some research gave some alternatives. One of these is "pyloudnorm", a native python module implementing EBU R 128 loudness normalization. This is what I have implemented now instead of the old ffmpeg filter. In the future additional rewrites to use the official FLAC and OPUS encoders might be desirable for better compression.
2024-03-24 12:57:59 +01:00 · 2024-03-24 12:57:59 +01:00 · 10cf2f455d
commit 10cf2f455d
parent 78ee74ed9d
2 changed files with 76 additions and 125 deletions
--- a/music-normalize/main.py
+++ b/music-normalize/main.py
@ -1,8 +1,9 @@
 #!/usr/bin/env python3
-# ffmpeg wrapper
+# multithreading
 import multiprocessing
-from os.path import isdir, isfile
+
 # audio format conversions
 import ffmpy
 # argument parsing
@ -10,144 +11,104 @@ import argparse
 # multiprocessing stuff
 from multiprocessing import Pool
 from multiprocessing import cpu_count
 # executing some commands
 import subprocess
 # parsing json output of loudnorm
 import json
 # file/directory handling
 import os
 # most recent starttime for program
 import time
 # randomness
 from random import randint
 # typing hints
 from typing import Any, Optional
-"""
+import tempfile
 # working with sound files
 import soundfile
 # loudness normalization
 import pyloudnorm
 """
 Normalize loudness of all music files in a given directory and its subdirectories.
 """
 musicfile_extensions = (".flac", ".wav", ".mp3", ".m4a", ".aac", ".opus")
-def loudness_info(inputfile) -> dict[str, str]:
+def loudnorm(inputfile: str, outputfile: str):
    """
-    Measure loudness of the given input file
+    Normalize audio to EBU R 128 standard using pyloudnorm
    Parameters:
-        inputfile
+        inputfile (str): Path to input file. Format must be supported by python-soundfile module
-
+        outputfile (str): Path to output file
    Output:
        loudness (dict[str, str]): decoded json dictionary containing all loudness information
    """
    data, rate = soundfile.read(file=inputfile)
-    print("Measuring loudness of ", os.path.basename(inputfile))
+    # measure loudness
    meter = pyloudnorm.Meter(rate=rate)
    loudness = meter.integrated_loudness(data=data)
-    ff = ffmpy.FFmpeg(
+    # normalize audio
-        inputs={inputfile: None},
+    file_normalized = pyloudnorm.normalize.loudness(
-        outputs={"/dev/null": "-pass 1 -filter:a loudnorm=print_format=json -f null"},
+        data=data, input_loudness=loudness, target_loudness=-30.0
        global_options=("-y"),
    )
-    proc = subprocess.Popen(
+    # write normalized audio to file
-        ff.cmd, shell=True, stderr=subprocess.STDOUT, stdout=subprocess.PIPE
+    soundfile.write(file=outputfile, data=file_normalized, samplerate=rate)
    )
    # NOTE get loudness info from subprocess
    # rstrip: remove trailing newline
    # decode: convert from binary string to utf8
    # splitlines: list of lines (only 12 last ones, length of the output json)
    # join: reassembles the list of lines and separates with "\n"
    loudness_json: str = "\n".join(
        proc.stdout.read().rstrip().decode("utf8").splitlines()[-12:]
    )
    # decode json to dict
    loudness: dict[str, str] = json.loads(loudness_json)
    return loudness
-def convert(
+def ffmpeg_to_wav(inputfile: str, outputfile: str):
    inputfile: str,
    outputfile: str,
    codec: str,
    compression: int,
    loudness: dict[str, str],
    bitrate: str = "0k",
 ) -> Optional[list[Any]]:
    """
-    Convert the input file to the desired format
+    Convert a file into .wav for further processing
    Parameters:
-        inputfile (str)
+        inputfile (str): Path to input file
-        outputfile (str)
+        outputfile (str): Path to output file
        loudness (dict[str, str])
    Output:
        dynamically normalised files (list)
    """
-    print("Working on ", os.path.basename(inputfile))
+    # convert to wav in temporary directory
-
+    with tempfile.TemporaryDirectory() as tempdir:
-    # NOTE including covers into ogg/opus containers currently doesn't work
+        # temporary input file
-    # https://trac.ffmpeg.org/ticket/4448
+        temp_input: str = os.path.join(
-    inputcmd = {inputfile: None}
+            tempdir, os.path.splitext(os.path.basename(inputfile))[0] + ".wav"
    # NOTE bitrate is set to 0k when converting to flac. This does not have any effect however and is simply ignored
    outputcmd = {
        outputfile: "-pass 2"
        " "
        "-filter:a"
        " "
        "loudnorm=I=-30.0:"
        "LRA=10.0:"
        "measured_I={input_i}:"
        "measured_LRA={input_lra}:"
        "measured_tp={input_tp}:measured_thresh={input_thresh}:"
        "print_format=json"
        " "
        "-c:a {codec}"
        " "
        "-b:a {bitrate}"
        " "
        "-compression_level {compression}".format(
            input_i=loudness["input_i"],
            input_lra=loudness["input_lra"],
            input_tp=loudness["input_tp"],
            input_thresh=loudness["input_thresh"],
            codec=codec,
            bitrate=bitrate,
            compression=compression,
        )
    }
-    ff = ffmpy.FFmpeg(
+        # temporary output file
-        inputs=inputcmd,
+        temp_output: str = os.path.join(
-        outputs=outputcmd,
+            tempdir,
-        global_options=("-y"),
+            "normalized",
-    )
+            os.path.splitext(os.path.basename(inputfile))[0] + ".wav",
        )
        os.mkdir(os.path.join(tempdir, "normalized"))
-    proc = subprocess.Popen(
+        # convert audio to wav
-        ff.cmd, shell=True, stderr=subprocess.STDOUT, stdout=subprocess.PIPE
+        ff = ffmpy.FFmpeg(
-    )
+            inputs={inputfile: None}, outputs={temp_input: None}, global_options=("-y")
        )
-    # NOTE get loudness info from subprocess
+        subprocess.run(ff.cmd, shell=True, capture_output=True)
    # rstrip: remove trailing newline
    # decode: convert from binary string to utf8
    # splitlines: list of lines (only 12 last ones, length of the output json)
    # join: reassembles the list of lines and separates with "\n"
    loudness_json: str = "\n".join(
        proc.stdout.read().rstrip().decode("utf8").splitlines()[-12:]
    )
-    # decode json to dict
+        # normalize loudness
-    loudness_new: dict[str, str] = json.loads(loudness_json)
+        loudnorm(inputfile=temp_input, outputfile=temp_output)
-    if loudness_new["normalization_type"] != "linear":
+
-        nonlinear: list[Any] = [inputfile, loudness_new]
+        # convert audio back to lossy format
-        return nonlinear
+        outputcmd = {
            outputfile: "-c:a libopus" " " "-b:a 192k" " " "-compression_level 10"
        }
        ff = ffmpy.FFmpeg(
            inputs={temp_output: None}, outputs=outputcmd, global_options=("-y")
        )
        subprocess.run(ff.cmd, shell=True, capture_output=True)
 def main(inputfile: str) -> Optional[list[Any]]:
@ -182,36 +143,30 @@ def main(inputfile: str) -> Optional[list[Any]]:
    match infile_extension:
        case ".flac" | ".wav":
            print("Working on", inputfile)
            outputfile: str = os.path.join(outputfolder, infile_noextension + ".flac")
-            codec: str = "flac"
+            # direct conversion start
-            compression: int = 12  # best compression
+            loudnorm(inputfile=inputfile, outputfile=outputfile)
-            bitrate: str = "0k"
+            print("Completed", inputfile)
        case ".mp3" | ".m4a" | ".aac" | ".opus":
            print("Working on", inputfile)
            outputfile: str = os.path.join(outputfolder, infile_noextension + ".opus")
-            codec: str = "libopus"
+            # conversion is started within the ffmpeg_to_wav function
-            compression: int = 10  # best compression
+            ffmpeg_to_wav(inputfile=inputfile, outputfile=outputfile)
-            bitrate: str = "192k"
+            print("Completed", inputfile)
        case _:
-            print(inputfile, "does not use a known extension. Conversion skipped")
+            print(
                inputfile,
                "does not use a known extension. This error shouldn't be happening actually",
            )
            return
    loudness: dict[str, str] = loudness_info(inputfile=inputfile)
    nonlinear: Optional[list[Any]] = convert(
        inputfile=inputfile,
        outputfile=outputfile,
        codec=codec,
        compression=compression,
        loudness=loudness,
        bitrate=bitrate,
    )
    return nonlinear
 if __name__ == "__main__":
    """
    Handle arguments and other details for interactive usage
    """
    # start time of program
    starttime = time.time()
@ -281,9 +236,3 @@ if __name__ == "__main__":
    # write this run's time into file
    with open(timefile, "w") as file:
        file.write(str(starttime))
    print("Dynamically normalized music:")
    for i in nonlinear_all:
        # NOTE ignore empty and "None" values
        if i:
            print(i)
--- a/music-normalize/requirements.txt
+++ b/music-normalize/requirements.txt
@ -1 +1,3 @@
 ffmpy
 soundfile
 pyloudnorm