Skip to content

Commit

Permalink
Add flags to control alignment steps (#110)
Browse files Browse the repository at this point in the history
* local and global alignment flags

* Adjust pybind wrapper and test

---------

Co-authored-by: georgezachos <[email protected]>
  • Loading branch information
terpste and georgezachos authored Mar 19, 2024
1 parent 0846dab commit b2b2a64
Show file tree
Hide file tree
Showing 9 changed files with 77 additions and 22 deletions.
2 changes: 1 addition & 1 deletion python/visqol_lib_py.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ PYBIND11_MODULE(visqol_lib_py, m) {
.def(pybind11::init<>())
.def("Init",
pybind11::overload_cast<const Visqol::FilePath&, bool, bool, int,
bool>(&Visqol::VisqolManager::Init))
bool, bool, bool>(&Visqol::VisqolManager::Init))
.def("Run", pybind11::overload_cast<const Visqol::FilePath&,
const Visqol::FilePath&>(
&Visqol::VisqolManager::Run));
Expand Down
2 changes: 1 addition & 1 deletion python/visqol_lib_py_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def _calculate_visqol(reference_file, degraded_file):
ref_path = visqol_lib_py.FilePath(os.path.join(files_dir, reference_file))
deg_path = visqol_lib_py.FilePath(os.path.join(files_dir, degraded_file))
manager = visqol_lib_py.VisqolManager()
manager.Init(model_path, True, False, 60, True)
manager.Init(model_path, True, False, 60, True, False, False)
similarity_result = manager.Run(ref_path, deg_path)
return similarity_result

Expand Down
10 changes: 9 additions & 1 deletion src/commandline_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ ABSL_FLAG(int, search_window_radius, 60,
"search to discover patch matches. For a given reference frame, it "
"will look at 2*search_window_radius + 1 patches to find the most "
"optimal match.");
ABSL_FLAG(bool, disable_global_alignment, false, "Disables global alignment");
ABSL_FLAG(bool, disable_realignment, false, "Disables realignment");

namespace Visqol {
ABSL_CONST_INIT const char kDefaultAudioModelFile[] =
Expand Down Expand Up @@ -113,6 +115,8 @@ absl::StatusOr<CommandLineArgs> VisqolCommandLineParser::Parse(int argc,
bool use_lattice_model;
bool use_unscaled_mapping;
int search_window;
bool disable_global_alignment;
bool disable_realignment;

batch_input = FilePath(absl::GetFlag(FLAGS_batch_input_csv));
if (!batch_input.Path().empty()) {
Expand All @@ -132,6 +136,8 @@ absl::StatusOr<CommandLineArgs> VisqolCommandLineParser::Parse(int argc,
verbose = absl::GetFlag(FLAGS_verbose);
search_window = absl::GetFlag(FLAGS_search_window_radius);
debug_output = FilePath(absl::GetFlag(FLAGS_output_debug));
disable_global_alignment = absl::GetFlag(FLAGS_disable_global_alignment);
disable_realignment = absl::GetFlag(FLAGS_disable_realignment);

similarity_to_quality_model =
FilePath(absl::GetFlag(FLAGS_similarity_to_quality_model));
Expand Down Expand Up @@ -175,7 +181,9 @@ absl::StatusOr<CommandLineArgs> VisqolCommandLineParser::Parse(int argc,
.use_speech_mode = use_speech,
.use_unscaled_speech_mos_mapping = use_unscaled_mapping,
.search_window_radius = search_window,
.use_lattice_model = use_lattice_model};
.use_lattice_model = use_lattice_model,
.disable_global_alignment = disable_global_alignment,
.disable_realignment = disable_realignment};
}

std::vector<ReferenceDegradedPathPair>
Expand Down
10 changes: 10 additions & 0 deletions src/include/commandline_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,16 @@ struct CommandLineArgs {
* If true, use a lattice model to map similarity to MOS.
*/
bool use_lattice_model = true;

/**
* If true, disables global alignment.
**/
bool disable_global_alignment;

/**
* If true, disables patch-wise realignment.
**/
bool disable_realignment;
};

/**
Expand Down
3 changes: 2 additions & 1 deletion src/include/visqol.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ class Visqol {
* score.
* @param search_window This parameter is used to determine how far the
* algorithm will search in order to find the most optimal match.
* @param disable_realignment Disables refined patch realignment
*
* @return If the comparison was successful, return the similarity result and
* associated debug info. Else, return an error status.
Expand All @@ -67,7 +68,7 @@ class Visqol {
const ImagePatchCreator* patch_creator,
const ComparisonPatchesSelector* comparison_patches_selector,
const SimilarityToQualityMapper* sim_to_qual_mapper,
const int search_window) const;
const int search_window, const bool disable_realignment) const;

/**
* Produces a set of FVNSIM scores, which represent the similarity between
Expand Down
22 changes: 20 additions & 2 deletions src/include/visqol_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,16 @@ class VisqolManager {
* a given reference patch.
* @param use_lattice_model If true, use a lattice model for mapping
* similarity to quality.
* @param disable_global_alignment Disables global alignment
* @param disable_realignment Disables refined patch realignment
*
* @return An 'OK' status if initialised successfully, else an error status.
*/
absl::Status Init(const FilePath& similarity_to_quality_mapper_model,
bool use_speech_mode, bool use_unscaled_speech,
int search_window, bool use_lattice_model = true);
int search_window, bool use_lattice_model = true,
bool disable_global_alignment = false,
bool disable_realignment = false);

/**
* Initializes an instance for use with the given similarity to quality
Expand All @@ -114,12 +118,16 @@ class VisqolManager {
* a given reference patch.
* @param use_lattice_model If true, use a lattice model for mapping
* similarity to quality.
* @param disable_global_alignment Disables global alignment
* @param disable_realignment Disables refined patch realignment
*
* @return An 'OK' status if initialised successfully, else an error status.
*/
absl::Status Init(absl::string_view similarity_to_quality_mapper_model_string,
bool use_speech_mode, bool use_unscaled_speech,
int search_window, bool use_lattice_model = true);
int search_window, bool use_lattice_model = true,
bool disable_global_alignment = false,
bool disable_realignment = false);

/**
* Perform a comparison on a single reference/degraded audio file pair.
Expand Down Expand Up @@ -174,6 +182,16 @@ class VisqolManager {
*/
int search_window_ = 60;

/**
* True if global realignment step should be skipped.
*/
bool disable_global_alignment_ = false;

/**
* True if per-patch realignment is disabled.
*/
bool disable_realignment_ = false;

/**
* Used for creating the patches from both the reference and degraded signals
* for comparison.
Expand Down
3 changes: 2 additions & 1 deletion src/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ int main(int argc, char** argv) {
auto init_status = visqol.Init(
cmd_args.similarity_to_quality_mapper_model, cmd_args.use_speech_mode,
cmd_args.use_unscaled_speech_mos_mapping, cmd_args.search_window_radius,
cmd_args.use_lattice_model);
cmd_args.use_lattice_model, cmd_args.disable_global_alignment,
cmd_args.disable_realignment);
if (!init_status.ok()) {
ABSL_RAW_LOG(ERROR, "%s", init_status.ToString().c_str());
return -1;
Expand Down
21 changes: 13 additions & 8 deletions src/visqol.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ absl::StatusOr<SimilarityResult> Visqol::CalculateSimilarity(
const ImagePatchCreator* patch_creator,
const ComparisonPatchesSelector* comparison_patches_selector,
const SimilarityToQualityMapper* sim_to_qual_mapper,
const int search_window) const {
const int search_window,
const bool disable_realignment) const {
/////////////////// Stage 1: Preprocessing ///////////////////
deg_signal =
MiscAudio::ScaleToMatchSoundPressureLevel(ref_signal, deg_signal);
Expand Down Expand Up @@ -89,14 +90,18 @@ absl::StatusOr<SimilarityResult> Visqol::CalculateSimilarity(

// Realign the patches in time domain subsignals that start at the coarse
// patch times.
auto realign_result =
comparison_patches_selector->FinelyAlignAndRecreatePatches(
sim_match_info, ref_signal, deg_signal, spect_builder, window);
if (!realign_result.ok()) {
return realign_result.status();
}
if (disable_realignment) {
sim_match_info = most_sim_patch_result.value();
} else {
auto realign_result =
comparison_patches_selector->FinelyAlignAndRecreatePatches(
sim_match_info, ref_signal, deg_signal, spect_builder, window);
if (!realign_result.ok()) {
return realign_result.status();
}

sim_match_info = realign_result.value();
sim_match_info = realign_result.value();
}

AMatrix<double> fvnsim = CalcPerPatchMeanFreqBandMeans(sim_match_info);
AMatrix<double> fvnsim10 = CalcPerPatchFreqBandQuantile(sim_match_info, 0.10);
Expand Down
26 changes: 19 additions & 7 deletions src/visqol_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,14 @@ const double VisqolManager::kDurationMismatchTolerance = 1.0;

absl::Status VisqolManager::Init(
const FilePath& similarity_to_quality_mapper_model, bool use_speech_mode,
bool use_unscaled_speech, int search_window, bool use_lattice_model) {
bool use_unscaled_speech, int search_window, bool use_lattice_model,
bool disable_global_alignment, bool disable_realignment) {
use_speech_mode_ = use_speech_mode;
use_unscaled_speech_mos_mapping_ = use_unscaled_speech;
search_window_ = search_window;
use_lattice_model_ = use_lattice_model;
disable_global_alignment_ = disable_global_alignment;
disable_realignment_ = disable_realignment;

InitPatchCreator();
InitPatchSelector();
Expand All @@ -75,10 +78,12 @@ absl::Status VisqolManager::Init(
absl::Status VisqolManager::Init(
absl::string_view similarity_to_quality_mapper_model_string,
bool use_speech_mode, bool use_unscaled_speech, int search_window,
bool use_lattice_model) {
bool use_lattice_model, bool disable_global_alignment,
bool disable_realignment) {
return Init(FilePath(similarity_to_quality_mapper_model_string),
use_speech_mode, use_unscaled_speech, search_window,
use_lattice_model);
use_lattice_model, disable_global_alignment,
disable_realignment);
}

void VisqolManager::InitPatchCreator() {
Expand Down Expand Up @@ -154,9 +159,16 @@ absl::StatusOr<SimilarityResultMsg> VisqolManager::Run(

VISQOL_RETURN_IF_ERROR(ValidateInputAudio(ref_signal, deg_signal));

// Adjust for codec initial padding.
auto alignment_result = Alignment::GloballyAlign(ref_signal, deg_signal);
deg_signal = std::get<0>(alignment_result);
std::tuple<AudioSignal, double> alignment_result;
if (!disable_global_alignment_) {
// Adjust for codec initial padding.
alignment_result = Alignment::GloballyAlign(ref_signal, deg_signal);
deg_signal = std::get<0>(alignment_result);
}
else {
// If no alignment is performed, lag should be set to 0
alignment_result = std::make_tuple(deg_signal, 0.0);
}

const AnalysisWindow window{ref_signal.sample_rate, kOverlap};

Expand All @@ -168,7 +180,7 @@ absl::StatusOr<SimilarityResultMsg> VisqolManager::Run(
sim_result, visqol.CalculateSimilarity(
ref_signal, deg_signal, spectrogram_builder_.get(),
window, patch_creator_.get(), patch_selector_.get(),
sim_to_qual_.get(), search_window_));
sim_to_qual_.get(), search_window_, disable_realignment_));
SimilarityResultMsg sim_result_msg = PopulateSimResultMsg(sim_result);
sim_result_msg.set_alignment_lag_s(std::get<1>(alignment_result));
return sim_result_msg;
Expand Down

0 comments on commit b2b2a64

Please sign in to comment.