Complete rewrite with pyloudnorm

Based on
    https://ffmpeg.org/pipermail/ffmpeg-user/2024-March/057775.html
    loudnorm is a low quality filter.
    Some research gave some alternatives.
    One of these is "pyloudnorm", a native python module implementing
    EBU R 128 loudness normalization.
    This is what I have implemented now instead of the old ffmpeg
    filter.

    In the future additional rewrites to use the official FLAC and OPUS
    encoders might be desirable for better compression.
This commit is contained in:
exu 2024-03-24 12:57:59 +01:00
parent 78ee74ed9d
commit 10cf2f455d
2 changed files with 76 additions and 125 deletions

View File

@ -1,8 +1,9 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# ffmpeg wrapper # multithreading
import multiprocessing import multiprocessing
from os.path import isdir, isfile
# audio format conversions
import ffmpy import ffmpy
# argument parsing # argument parsing
@ -10,144 +11,104 @@ import argparse
# multiprocessing stuff # multiprocessing stuff
from multiprocessing import Pool from multiprocessing import Pool
from multiprocessing import cpu_count
# executing some commands # executing some commands
import subprocess import subprocess
# parsing json output of loudnorm
import json
# file/directory handling # file/directory handling
import os import os
# most recent starttime for program # most recent starttime for program
import time import time
# randomness
from random import randint from random import randint
# typing hints
from typing import Any, Optional from typing import Any, Optional
""" import tempfile
# working with sound files
import soundfile
# loudness normalization
import pyloudnorm
"""
Normalize loudness of all music files in a given directory and its subdirectories.
""" """
musicfile_extensions = (".flac", ".wav", ".mp3", ".m4a", ".aac", ".opus") musicfile_extensions = (".flac", ".wav", ".mp3", ".m4a", ".aac", ".opus")
def loudness_info(inputfile) -> dict[str, str]: def loudnorm(inputfile: str, outputfile: str):
""" """
Measure loudness of the given input file Normalize audio to EBU R 128 standard using pyloudnorm
Parameters: Parameters:
inputfile inputfile (str): Path to input file. Format must be supported by python-soundfile module
outputfile (str): Path to output file
"""
data, rate = soundfile.read(file=inputfile)
Output: # measure loudness
loudness (dict[str, str]): decoded json dictionary containing all loudness information meter = pyloudnorm.Meter(rate=rate)
loudness = meter.integrated_loudness(data=data)
# normalize audio
file_normalized = pyloudnorm.normalize.loudness(
data=data, input_loudness=loudness, target_loudness=-30.0
)
# write normalized audio to file
soundfile.write(file=outputfile, data=file_normalized, samplerate=rate)
def ffmpeg_to_wav(inputfile: str, outputfile: str):
"""
Convert a file into .wav for further processing
Parameters:
inputfile (str): Path to input file
outputfile (str): Path to output file
""" """
print("Measuring loudness of ", os.path.basename(inputfile)) # convert to wav in temporary directory
with tempfile.TemporaryDirectory() as tempdir:
# temporary input file
temp_input: str = os.path.join(
tempdir, os.path.splitext(os.path.basename(inputfile))[0] + ".wav"
)
# temporary output file
temp_output: str = os.path.join(
tempdir,
"normalized",
os.path.splitext(os.path.basename(inputfile))[0] + ".wav",
)
os.mkdir(os.path.join(tempdir, "normalized"))
# convert audio to wav
ff = ffmpy.FFmpeg( ff = ffmpy.FFmpeg(
inputs={inputfile: None}, inputs={inputfile: None}, outputs={temp_input: None}, global_options=("-y")
outputs={"/dev/null": "-pass 1 -filter:a loudnorm=print_format=json -f null"},
global_options=("-y"),
) )
proc = subprocess.Popen( subprocess.run(ff.cmd, shell=True, capture_output=True)
ff.cmd, shell=True, stderr=subprocess.STDOUT, stdout=subprocess.PIPE
)
# NOTE get loudness info from subprocess # normalize loudness
# rstrip: remove trailing newline loudnorm(inputfile=temp_input, outputfile=temp_output)
# decode: convert from binary string to utf8
# splitlines: list of lines (only 12 last ones, length of the output json)
# join: reassembles the list of lines and separates with "\n"
loudness_json: str = "\n".join(
proc.stdout.read().rstrip().decode("utf8").splitlines()[-12:]
)
# decode json to dict
loudness: dict[str, str] = json.loads(loudness_json)
return loudness
# convert audio back to lossy format
def convert(
inputfile: str,
outputfile: str,
codec: str,
compression: int,
loudness: dict[str, str],
bitrate: str = "0k",
) -> Optional[list[Any]]:
"""
Convert the input file to the desired format
Parameters:
inputfile (str)
outputfile (str)
loudness (dict[str, str])
Output:
dynamically normalised files (list)
"""
print("Working on ", os.path.basename(inputfile))
# NOTE including covers into ogg/opus containers currently doesn't work
# https://trac.ffmpeg.org/ticket/4448
inputcmd = {inputfile: None}
# NOTE bitrate is set to 0k when converting to flac. This does not have any effect however and is simply ignored
outputcmd = { outputcmd = {
outputfile: "-pass 2" outputfile: "-c:a libopus" " " "-b:a 192k" " " "-compression_level 10"
" "
"-filter:a"
" "
"loudnorm=I=-30.0:"
"LRA=10.0:"
"measured_I={input_i}:"
"measured_LRA={input_lra}:"
"measured_tp={input_tp}:measured_thresh={input_thresh}:"
"print_format=json"
" "
"-c:a {codec}"
" "
"-b:a {bitrate}"
" "
"-compression_level {compression}".format(
input_i=loudness["input_i"],
input_lra=loudness["input_lra"],
input_tp=loudness["input_tp"],
input_thresh=loudness["input_thresh"],
codec=codec,
bitrate=bitrate,
compression=compression,
)
} }
ff = ffmpy.FFmpeg( ff = ffmpy.FFmpeg(
inputs=inputcmd, inputs={temp_output: None}, outputs=outputcmd, global_options=("-y")
outputs=outputcmd,
global_options=("-y"),
) )
proc = subprocess.Popen( subprocess.run(ff.cmd, shell=True, capture_output=True)
ff.cmd, shell=True, stderr=subprocess.STDOUT, stdout=subprocess.PIPE
)
# NOTE get loudness info from subprocess
# rstrip: remove trailing newline
# decode: convert from binary string to utf8
# splitlines: list of lines (only 12 last ones, length of the output json)
# join: reassembles the list of lines and separates with "\n"
loudness_json: str = "\n".join(
proc.stdout.read().rstrip().decode("utf8").splitlines()[-12:]
)
# decode json to dict
loudness_new: dict[str, str] = json.loads(loudness_json)
if loudness_new["normalization_type"] != "linear":
nonlinear: list[Any] = [inputfile, loudness_new]
return nonlinear
def main(inputfile: str) -> Optional[list[Any]]: def main(inputfile: str) -> Optional[list[Any]]:
@ -182,36 +143,30 @@ def main(inputfile: str) -> Optional[list[Any]]:
match infile_extension: match infile_extension:
case ".flac" | ".wav": case ".flac" | ".wav":
print("Working on", inputfile)
outputfile: str = os.path.join(outputfolder, infile_noextension + ".flac") outputfile: str = os.path.join(outputfolder, infile_noextension + ".flac")
codec: str = "flac" # direct conversion start
compression: int = 12 # best compression loudnorm(inputfile=inputfile, outputfile=outputfile)
bitrate: str = "0k" print("Completed", inputfile)
case ".mp3" | ".m4a" | ".aac" | ".opus": case ".mp3" | ".m4a" | ".aac" | ".opus":
print("Working on", inputfile)
outputfile: str = os.path.join(outputfolder, infile_noextension + ".opus") outputfile: str = os.path.join(outputfolder, infile_noextension + ".opus")
codec: str = "libopus" # conversion is started within the ffmpeg_to_wav function
compression: int = 10 # best compression ffmpeg_to_wav(inputfile=inputfile, outputfile=outputfile)
bitrate: str = "192k" print("Completed", inputfile)
case _: case _:
print(inputfile, "does not use a known extension. Conversion skipped") print(
return inputfile,
"does not use a known extension. This error shouldn't be happening actually",
loudness: dict[str, str] = loudness_info(inputfile=inputfile)
nonlinear: Optional[list[Any]] = convert(
inputfile=inputfile,
outputfile=outputfile,
codec=codec,
compression=compression,
loudness=loudness,
bitrate=bitrate,
) )
return
return nonlinear
if __name__ == "__main__": if __name__ == "__main__":
""" """
Handle arguments and other details for interactive usage Handle arguments and other details for interactive usage
""" """
# start time of program # start time of program
starttime = time.time() starttime = time.time()
@ -281,9 +236,3 @@ if __name__ == "__main__":
# write this run's time into file # write this run's time into file
with open(timefile, "w") as file: with open(timefile, "w") as file:
file.write(str(starttime)) file.write(str(starttime))
print("Dynamically normalized music:")
for i in nonlinear_all:
# NOTE ignore empty and "None" values
if i:
print(i)

View File

@ -1 +1,3 @@
ffmpy ffmpy
soundfile
pyloudnorm