Skip to content

Commit

Permalink
Merge pull request #4 from rhasspy/no-fma
Browse files Browse the repository at this point in the history
Remove -mfma from compilation
  • Loading branch information
synesthesiam authored Sep 28, 2023
2 parents 0af1f7b + 54c6bac commit 0b3bc23
Show file tree
Hide file tree
Showing 8 changed files with 63 additions and 63 deletions.
28 changes: 13 additions & 15 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
_SOURCE_DIR = _DIR / "webrtc-audio-processing"
_WEBRTC_DIR = _SOURCE_DIR / "webrtc-audio-processing-1"

__version__ = "1.2.2"
__version__ = "1.2.3"

# webrtc/
# rtc_base/
Expand Down Expand Up @@ -410,15 +410,26 @@
machine_cflags += [
"-DWEBRTC_ARCH_X86_FAMILY",
"-msse2",
"-mfma",
]

if have_avx2:
# Advanced Vector Instructions
machine_cflags += [
"-DWEBRTC_ENABLE_AVX2",
"-mfma",
"-mavx2",
]
webrtc_audio_processing_sources += [
"aec3/adaptive_fir_filter_avx2.cc",
"aec3/adaptive_fir_filter_erl_avx2.cc",
"aec3/fft_data_avx2.cc",
"aec3/matched_filter_avx2.cc",
"aec3/vector_math_avx2.cc",
]
common_audio_sources += [
"fir_filter_avx2.cc",
"resampler/sinc_resampler_avx2.cc",
]

if machine in ("x86_64", "amd64"):
# 64-bit x86_64
Expand All @@ -438,19 +449,6 @@
"resampler/sinc_resampler_sse.cc",
"third_party/ooura/fft_size_128/ooura_fft_sse2.cc",
]

# These have to be included, even if they aren't used
webrtc_audio_processing_sources += [
"aec3/adaptive_fir_filter_avx2.cc",
"aec3/adaptive_fir_filter_erl_avx2.cc",
"aec3/fft_data_avx2.cc",
"aec3/matched_filter_avx2.cc",
"aec3/vector_math_avx2.cc",
]
common_audio_sources += [
"fir_filter_avx2.cc",
"resampler/sinc_resampler_avx2.cc",
]
elif machine in ("armv7", "armv7l"):
# 32-bit ARM
machine_cflags += [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,17 @@ FIRFilter* CreateFirFilter(const float* coefficients,
// If we know the minimum architecture at compile time, avoid CPU detection.
#if defined(WEBRTC_ARCH_X86_FAMILY)
// x86 CPU detection required.
if (GetCPUInfo(kAVX2)) {
filter =
new FIRFilterAVX2(coefficients, coefficients_length, max_input_length);
} else if (GetCPUInfo(kSSE2)) {
filter =
new FIRFilterSSE2(coefficients, coefficients_length, max_input_length);
} else {
filter = new FIRFilterC(coefficients, coefficients_length);
}
// if (GetCPUInfo(kAVX2)) {
// filter =
// new FIRFilterAVX2(coefficients, coefficients_length, max_input_length);
// } else if (GetCPUInfo(kSSE2)) {
// filter =
// new FIRFilterSSE2(coefficients, coefficients_length, max_input_length);
// } else {
// filter = new FIRFilterC(coefficients, coefficients_length);
// }

filter = new FIRFilterC(coefficients, coefficients_length);
#elif defined(WEBRTC_HAS_NEON)
filter =
new FIRFilterNEON(coefficients, coefficients_length, max_input_length);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,15 +125,15 @@ const size_t SincResampler::kKernelSize;
void SincResampler::InitializeCPUSpecificFeatures() {
#if defined(WEBRTC_HAS_NEON)
convolve_proc_ = Convolve_NEON;
#elif defined(WEBRTC_ARCH_X86_FAMILY)
// Using AVX2 instead of SSE2 when AVX2 supported.
if (GetCPUInfo(kAVX2))
convolve_proc_ = Convolve_AVX2;
else if (GetCPUInfo(kSSE2))
convolve_proc_ = Convolve_SSE;
else
convolve_proc_ = Convolve_C;
#else
// #elif defined(WEBRTC_ARCH_X86_FAMILY)
// // Using AVX2 instead of SSE2 when AVX2 supported.
// if (GetCPUInfo(kAVX2))
// convolve_proc_ = Convolve_AVX2;
// else if (GetCPUInfo(kSSE2))
// convolve_proc_ = Convolve_SSE;
// else
// convolve_proc_ = Convolve_C;
// #else
// Unknown architecture.
convolve_proc_ = Convolve_C;
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -556,9 +556,9 @@ void AdaptiveFirFilter::Filter(const RenderBuffer& render_buffer,
case Aec3Optimization::kSse2:
aec3::ApplyFilter_Sse2(render_buffer, current_size_partitions_, H_, S);
break;
case Aec3Optimization::kAvx2:
aec3::ApplyFilter_Avx2(render_buffer, current_size_partitions_, H_, S);
break;
// case Aec3Optimization::kAvx2:
// aec3::ApplyFilter_Avx2(render_buffer, current_size_partitions_, H_, S);
// break;
#endif
#if defined(WEBRTC_HAS_NEON)
case Aec3Optimization::kNeon:
Expand Down Expand Up @@ -600,9 +600,9 @@ void AdaptiveFirFilter::ComputeFrequencyResponse(
case Aec3Optimization::kSse2:
aec3::ComputeFrequencyResponse_Sse2(current_size_partitions_, H_, H2);
break;
case Aec3Optimization::kAvx2:
aec3::ComputeFrequencyResponse_Avx2(current_size_partitions_, H_, H2);
break;
// case Aec3Optimization::kAvx2:
// aec3::ComputeFrequencyResponse_Avx2(current_size_partitions_, H_, H2);
// break;
#endif
#if defined(WEBRTC_HAS_NEON)
case Aec3Optimization::kNeon:
Expand All @@ -626,10 +626,10 @@ void AdaptiveFirFilter::AdaptAndUpdateSize(const RenderBuffer& render_buffer,
aec3::AdaptPartitions_Sse2(render_buffer, G, current_size_partitions_,
&H_);
break;
case Aec3Optimization::kAvx2:
aec3::AdaptPartitions_Avx2(render_buffer, G, current_size_partitions_,
&H_);
break;
// case Aec3Optimization::kAvx2:
// aec3::AdaptPartitions_Avx2(render_buffer, G, current_size_partitions_,
// &H_);
// break;
#endif
#if defined(WEBRTC_HAS_NEON)
case Aec3Optimization::kNeon:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,9 @@ void ComputeErl(const Aec3Optimization& optimization,
case Aec3Optimization::kSse2:
aec3::ErlComputer_SSE2(H2, erl);
break;
case Aec3Optimization::kAvx2:
aec3::ErlComputer_AVX2(H2, erl);
break;
// case Aec3Optimization::kAvx2:
// aec3::ErlComputer_AVX2(H2, erl);
// break;
#endif
#if defined(WEBRTC_HAS_NEON)
case Aec3Optimization::kNeon:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@ struct FftData {
power_spectrum[kFftLengthBy2] = re[kFftLengthBy2] * re[kFftLengthBy2] +
im[kFftLengthBy2] * im[kFftLengthBy2];
} break;
case Aec3Optimization::kAvx2:
SpectrumAVX2(power_spectrum);
break;
// case Aec3Optimization::kAvx2:
// SpectrumAVX2(power_spectrum);
// break;
#endif
default:
std::transform(re.begin(), re.end(), im.begin(), power_spectrum.begin(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -364,11 +364,11 @@ void MatchedFilter::Update(const DownsampledRenderBuffer& render_buffer,
smoothing_, render_buffer.buffer, y,
filters_[n], &filters_updated, &error_sum);
break;
case Aec3Optimization::kAvx2:
aec3::MatchedFilterCore_AVX2(x_start_index, x2_sum_threshold,
smoothing_, render_buffer.buffer, y,
filters_[n], &filters_updated, &error_sum);
break;
// case Aec3Optimization::kAvx2:
// aec3::MatchedFilterCore_AVX2(x_start_index, x2_sum_threshold,
// smoothing_, render_buffer.buffer, y,
// filters_[n], &filters_updated, &error_sum);
// break;
#endif
#if defined(WEBRTC_HAS_NEON)
case Aec3Optimization::kNeon:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ class VectorMath {
x[j] = sqrtf(x[j]);
}
} break;
case Aec3Optimization::kAvx2:
SqrtAVX2(x);
break;
// case Aec3Optimization::kAvx2:
// SqrtAVX2(x);
// break;
#endif
#if defined(WEBRTC_HAS_NEON)
case Aec3Optimization::kNeon: {
Expand Down Expand Up @@ -140,9 +140,9 @@ class VectorMath {
z[j] = x[j] * y[j];
}
} break;
case Aec3Optimization::kAvx2:
MultiplyAVX2(x, y, z);
break;
// case Aec3Optimization::kAvx2:
// MultiplyAVX2(x, y, z);
// break;
#endif
#if defined(WEBRTC_HAS_NEON)
case Aec3Optimization::kNeon: {
Expand Down Expand Up @@ -190,9 +190,9 @@ class VectorMath {
z[j] += x[j];
}
} break;
case Aec3Optimization::kAvx2:
AccumulateAVX2(x, z);
break;
// case Aec3Optimization::kAvx2:
// AccumulateAVX2(x, z);
// break;
#endif
#if defined(WEBRTC_HAS_NEON)
case Aec3Optimization::kNeon: {
Expand Down

0 comments on commit 0b3bc23

Please sign in to comment.