30#ifndef APF_CONVOLVER_H
31#define APF_CONVOLVER_H
65 assert(block_size > 0);
66 assert(filter_size > 0);
67 return (filter_size + block_size - 1) / block_size;
83 assert(this->size() == rhs.size());
91 std::copy(rhs.begin(), rhs.end(), this->begin());
109 Filter(
size_t block_size_,
size_t partitions_)
112 assert(this->partitions() > 0);
116 template<
typename In>
117 Filter(
size_t block_size_, In first, In last,
size_t partitions_ = 0);
120 size_t block_size()
const {
return this->front().size() / 2; }
121 size_t partition_size()
const {
return this->front().size(); }
122 size_t partitions()
const {
return this->size(); }
129 template<
typename In>
132 size_t block_size()
const {
return _block_size; }
133 size_t partition_size()
const {
return _partition_size; }
135 template<
typename In>
145 using plan_ptr = std::unique_ptr<scoped_plan>;
153 _sort_coefficients(first);
159 void _sort_coefficients(
float* first)
const;
161 const size_t _block_size;
162 const size_t _partition_size;
165TransformBase::TransformBase(
size_t block_size_)
166 : _block_size(block_size_)
167 , _partition_size(2 * _block_size)
169 if (_block_size % 8 != 0)
171 throw std::logic_error(
"Convolver: block size must be a multiple of 8!");
182TransformBase::plan_ptr
186 , array, array, FFTW_R2HC, FFTW_PATIENT));
200 for (
auto& partition: filter)
219 assert(std::distance(partition.begin(), partition.end())
220 ==
static_cast<fft_node::difference_type
>(_partition_size));
222 using difference_type =
typename std::iterator_traits<In>::difference_type;
223 auto chunk = std::min(
224 static_cast<difference_type
>(_block_size), std::distance(first, last));
229 partition.
zero =
true;
234 std::copy(first, first + chunk, partition.begin());
235 std::fill(partition.begin() + chunk, partition.end(), 0.0f);
236 _fft(partition.data());
237 partition.
zero =
false;
239 return first + chunk;
246TransformBase::_sort_coefficients(
float* data)
const
256 buffer[4] = data[_block_size];
257 buffer[5] = data[_partition_size - 1];
258 buffer[6] = data[_partition_size - 2];
259 buffer[7] = data[_partition_size - 3];
261 for (
size_t i = 0; i < (_partition_size / 8-1); i++)
263 for (
size_t ii = 0; ii < 4; ii++)
265 buffer[base+ii] = data[base/2+ii];
268 for (
size_t ii = 0; ii < 4; ii++)
270 buffer[base+4+ii] = data[_partition_size-base/2-ii];
276 std::copy(buffer.begin(), buffer.end(), data);
286 fft_node planning_space(this->partition_size());
297 assert(std::distance(first, last) > 0);
298 assert(this->partitions() > 0);
310 Input(
size_t block_size_,
size_t partitions_)
313 ,
spectra(partitions_ + 1, this->partition_size())
315 assert(partitions_ > 0);
320 template<
typename In>
323 size_t partitions()
const {
return spectra.size() - 1; }
339 std::advance(last,
static_cast<std::ptrdiff_t
>(this->block_size()));
344 auto& current = this->
spectra.front();
345 auto& next = this->
spectra.back();
347 auto offset =
static_cast<fft_node::difference_type
>(this->block_size());
360 std::fill(current.begin() + offset, current.end(), 0.0f);
368 std::fill(current.begin(), current.begin() + offset, 0.0f);
372 std::copy(first, last, current.begin() + offset);
373 current.zero =
false;
375 std::copy(first, last, next.begin());
385 _fft(current.data());
393 float*
convolve(
float weight = 1.0f);
395 size_t block_size()
const {
return _input.block_size(); }
396 size_t partitions()
const {
return _filter_ptrs.size(); }
408 void _multiply_spectra();
409 void _multiply_partition_cpp(
const float* signal,
const float* filter);
411 void _multiply_partition_simd(
const float* signal,
const float* filter);
414 void _unsort_coefficients();
420 const size_t _partition_size;
426OutputBase::OutputBase(
const Input& input)
427 : _empty_partition(0)
429 , _filter_ptrs(input.partitions(), &_empty_partition)
431 , _partition_size(input.partition_size())
432 , _output_buffer(_partition_size)
433 , _ifft_plan(
fftw<float>::plan_r2r_1d, int(_partition_size)
434 , _output_buffer.data()
435 , _output_buffer.data(), FFTW_HC2R, FFTW_PATIENT)
437 assert(_filter_ptrs.size() > 0);
452 auto offset =
static_cast<fft_node::difference_type
>(_input.block_size());
455 auto second_half = make_begin_and_end(
456 _output_buffer.begin() + offset, _output_buffer.end());
458 assert(std::distance(second_half.begin(), second_half.end()) == offset);
460 if (_output_buffer.
zero)
470 const auto norm = weight / float(_partition_size);
471 for (
auto& x: second_half)
476 return &second_half[0];
480OutputBase::_multiply_partition_cpp(
const float* signal,
const float* filter)
484 auto d1s = _output_buffer[0] + signal[0] * filter[0];
485 auto d2s = _output_buffer[4] + signal[4] * filter[4];
487 for (
size_t nn = 0; nn < _partition_size; nn += 8)
490 _output_buffer[nn+0] += signal[nn+0] * filter[nn + 0] -
491 signal[nn+4] * filter[nn + 4];
492 _output_buffer[nn+1] += signal[nn+1] * filter[nn + 1] -
493 signal[nn+5] * filter[nn + 5];
494 _output_buffer[nn+2] += signal[nn+2] * filter[nn + 2] -
495 signal[nn+6] * filter[nn + 6];
496 _output_buffer[nn+3] += signal[nn+3] * filter[nn + 3] -
497 signal[nn+7] * filter[nn + 7];
500 _output_buffer[nn+4] += signal[nn+0] * filter[nn + 4] +
501 signal[nn+4] * filter[nn + 0];
502 _output_buffer[nn+5] += signal[nn+1] * filter[nn + 5] +
503 signal[nn+5] * filter[nn + 1];
504 _output_buffer[nn+6] += signal[nn+2] * filter[nn + 6] +
505 signal[nn+6] * filter[nn + 2];
506 _output_buffer[nn+7] += signal[nn+3] * filter[nn + 7] +
507 signal[nn+7] * filter[nn + 3];
511 _output_buffer[0] = d1s;
512 _output_buffer[4] = d2s;
517OutputBase::_multiply_partition_simd(
const float* signal,
const float* filter)
522 auto dc = _output_buffer[0] + signal[0] * filter[0];
523 auto ny = _output_buffer[4] + signal[4] * filter[4];
525 for(
size_t i = 0; i < _partition_size; i += 8)
528 __m128 sigr = _mm_load_ps(signal + i);
529 __m128 sigi = _mm_load_ps(signal + i + 4);
530 __m128 filtr = _mm_load_ps(filter + i);
531 __m128 filti = _mm_load_ps(filter + i + 4);
534 __m128 res1 = _mm_sub_ps(_mm_mul_ps(sigr, filtr), _mm_mul_ps(sigi, filti));
537 __m128 res2 = _mm_add_ps(_mm_mul_ps(sigr, filti), _mm_mul_ps(sigi, filtr));
540 __m128 acc1 = _mm_load_ps(&_output_buffer[i]);
541 __m128 acc2 = _mm_load_ps(&_output_buffer[i + 4]);
544 acc1 = _mm_add_ps(acc1, res1);
545 acc2 = _mm_add_ps(acc2, res2);
548 _mm_store_ps(&_output_buffer[i], acc1);
549 _mm_store_ps(&_output_buffer[i + 4], acc2);
552 _output_buffer[0] = dc;
553 _output_buffer[4] = ny;
559OutputBase::_multiply_spectra()
562 std::fill(_output_buffer.begin(), _output_buffer.end(), 0.0f);
563 _output_buffer.
zero =
true;
565 assert(_filter_ptrs.size() == _input.partitions());
567 auto input = _input.
spectra.begin();
569 for (
const auto* filter: _filter_ptrs)
571 assert(filter !=
nullptr);
573 if (input->zero || filter->zero)
580 _multiply_partition_simd(input->data(), filter->data());
582 _multiply_partition_cpp(input->data(), filter->data());
584 _output_buffer.
zero =
false;
591OutputBase::_unsort_coefficients()
593 fixed_vector<float> buffer(_partition_size);
597 buffer[0] = _output_buffer[0];
598 buffer[1] = _output_buffer[1];
599 buffer[2] = _output_buffer[2];
600 buffer[3] = _output_buffer[3];
601 buffer[_input.block_size()] = _output_buffer[4];
602 buffer[_partition_size-1] = _output_buffer[5];
603 buffer[_partition_size-2] = _output_buffer[6];
604 buffer[_partition_size-3] = _output_buffer[7];
606 for (
size_t i=0; i < (_partition_size / 8-1); i++)
608 for (
size_t ii = 0; ii < 4; ii++)
610 buffer[base/2+ii] = _output_buffer[base+ii];
613 for (
size_t ii = 0; ii < 4; ii++)
615 buffer[_partition_size-base/2-ii] = _output_buffer[base+4+ii];
621 std::copy(buffer.begin(), buffer.end(), _output_buffer.begin());
627 _unsort_coefficients();
628 fftw<float>::execute(_ifft_plan);
661 auto partition = filter.begin();
664 if (partition != filter.end())
666 _filter_ptrs.front() = &*partition++;
669 for (
size_t i = 0; i < _queues.size(); ++i)
672 = (partition == filter.end()) ? &_empty_partition : &*partition++;
685 if (_queues.empty())
return true;
690 auto first = _queues.rbegin()->begin();
691 auto last = _queues.rbegin()->end();
702 auto target = _filter_ptrs.begin();
706 for (
auto& queue: _queues)
709 if (queue.front()) *target = queue.front();
711 std::copy(queue.begin() + 1, queue.end(), queue.begin());
712 *queue.rbegin() =
nullptr;
725 template<
typename In>
729 _filter.reset(
new Filter(input.block_size(), first, last
730 , input.partitions()));
732 _set_filter(*_filter);
745 void _set_filter(
const Filter& filter)
747 auto from = filter.begin();
749 for (
auto& to: _filter_ptrs)
752 to = (from == filter.end()) ? &_empty_partition : &*from++;
758 std::unique_ptr<Filter> _filter;
764 Convolver(
size_t block_size_,
size_t partitions_)
765 :
Input(block_size_, partitions_)
774 template<
typename In>
775 StaticConvolver(
size_t block_size_, In first, In last,
size_t partitions_ = 0)
776 :
Input(block_size_, partitions_ ? partitions_
777 :
min_partitions(block_size_,
size_t(std::distance(first, last))))
780 assert(std::distance(first, last) > 0);
784 :
Input(filter.block_size()
785 , partitions_ ? partitions_ : filter.partitions())
791template<
typename BinaryFunction>
795 auto it1 = in1.begin();
796 auto it2 = in2.begin();
798 for (
auto& result: out)
800 if (it1 == in1.end() || it1->zero)
802 if (it2 == in2.end() || it2->zero)
808 assert(it2->size() == result.size());
809 std::transform(it2->begin(), it2->end(), result.begin()
810 , std::bind(f, 0, std::placeholders::_1));
816 if (it2 == in2.end() || it2->zero)
818 assert(it1->size() == result.size());
819 std::transform(it1->begin(), it1->end(), result.begin()
820 , std::bind(f, std::placeholders::_1, 0));
825 assert(it1->size() == it2->size());
826 assert(it1->size() == result.size());
827 std::transform(it1->begin(), it1->end(), it2->begin(), result.begin()
832 if (it1 != in1.end()) ++it1;
833 if (it2 != in2.end()) ++it2;
Base class for Output and StaticOutput.
float * convolve(float weight=1.0f)
Fast convolution of one audio block.
Convolution engine (output part).
void rotate_queues()
Update filter queues.
bool queues_empty() const
Check if there are still valid partitions in the queues.
void set_filter(const Filter &filter)
Set a new filter.
Convolver output stage with static filter.
StaticOutput(const Input &input, In first, In last)
Constructor from time domain samples.
StaticOutput(const Input &input, const Filter &filter)
Constructor from existing frequency domain filter coefficients.
Derived from std::list, but without re-sizing.
Derived from std::vector, but without memory re-allocations.
index_iterator< T > make_index_iterator(T start)
Helper function to create an index_iterator.
Several more or less useful iterators and some macros.
Mathematical constants and helper functions.
void transform_nested(const Filter &in1, const Filter &in2, Filter &out, BinaryFunction f)
Apply std::transform to a container of fft_nodes.
static size_t min_partitions(size_t block_size, size_t filter_size)
Calculate necessary number of partitions for a given filter length.
bool has_only_zeros(I first, I last)
Check if there are only zeros in a range.
Audio Processing Framework.
Combination of Input and Output.
Container holding a number of FFT blocks.
Filter(size_t block_size_, size_t partitions_)
Constructor; create empty filter.
Combination of Input and StaticOutput.
Two blocks of time-domain or FFT (half-complex) data.
bool zero
To avoid unnecessary FFTs and filling buffers with zeros.
Traits class to select float/double/long double versions of FFTW functions.
Identity function object. Function call returns a const reference to input.