- Edited
I am trying to do real-time speech enhancement, first i want to input the speech and do windowing with 50% overlap, fft, then ifft, add up three segments and output the middle part of the segment (the part with complete information), to check if i get the output from the speaker similar to the input from the mic. My feedthrough is okay but after some fft calculations I kept hearing beeping sounds when i run the program, anyone knows how to solve this?
thanks in advance
The following is my code, and the illustration of the concept is in the diagram
#include <Bela.h>
#include <cmath>
#include <vector>
#include <complex>
#include <libraries/Fft/Fft.h>
#include <libraries/Scope/Scope.h>
// feedthrough + hanning
unsigned int fftsize = 128;
unsigned int hopsize = fftsize / 2;
std::vector<float> window;
std::vector<float> inputBuffer(2 * fftsize, 0.0f); // Initialize the buffer size to 256 and value to 0
std::vector<float> outputBuffer(2 * fftsize, 0.0f); // Adjusted to hold the entire processed range
std::vector<float> fftfirst_real(fftsize, 0.0f); // Buffer to store real part of FFT output
std::vector<float> fftfirst_img(fftsize, 0.0f); // Buffer to store imaginary part of FFT output
unsigned int bufferIndex = 0; // Index to keep track of buffer filling
bool bufferFull = false; // Flag to check if buffer is full
Fft fft; // FFT processor
// Hanning window function
void hanningWindow(std::vector<float>& window, unsigned int size) {
for (unsigned int i = 0; i < size; ++i) { //++i is pre-increment order
window[i] = 0.5 * (1 - cos(2 * M_PI * i / (size - 1)));
}
}
bool setup(BelaContext *context, void *userData) {
window.resize(fftsize); // Properly resize the window vector before calling the hanning function
hanningWindow(window, fftsize); // Calling the Hanning window function to initialize the window vector to value of fftsize
if (fft.setup(fftsize) != 0) { // Ensure the FFT setup is correct
rt_printf("FFT setup failed\n");
return false;
}
rt_printf("Number of input channels: %d\n", context->audioInChannels);
rt_printf("Number of output channels: %d\n", context->audioOutChannels);
return true;
}
void process_segment(const std::vector<float>& inputBuffer, unsigned int startIndex) { // process_segment function can read from inputBuffer but cannot modify it
std::vector<float> windowedBuffer(fftsize, 0.0f);
// Apply window and perform FFT
for (unsigned int i = 0; i < fftsize; ++i) {
windowedBuffer[i] = inputBuffer[startIndex + i] * window[i];
}
fft.fft(windowedBuffer);
// Store FFT output in real and imaginary parts
for (unsigned int i = 0; i < fftsize; ++i) {
fftfirst_real[i] = fft.fdr(i);
fftfirst_img[i] = fft.fdi(i);
}
// Perform IFFT
fft.ifft(fftfirst_real, fftfirst_img);
}
void render(BelaContext *context, void *userData) {
for (unsigned int n = 0; n < context->audioFrames; n++) {
// Read audio input from channel 0 (microphone)
float in = audioRead(context, n, 0); // in is a local var within the render function
inputBuffer[bufferIndex] = in; // Store the input in the doubled-sized buffer (i call it inputBuffer here)
bufferIndex++;
// Check if buffer is full
if (bufferIndex >= 2 * fftsize) {
bufferFull = true;
bufferIndex = fftsize; // Reset the buffer index for the next set of samples
}
}
// Check if the buffer is full and process the samples
if (bufferFull) {
// Reset the buffer full flag
bufferFull = false;
std::fill(outputBuffer.begin(), outputBuffer.end(), 0.0f);
// Process three overlapping segments
process_segment(inputBuffer, 0); // Process first segment
for (unsigned int i = 0; i < fftsize; ++i) {
outputBuffer[i] += fft.td(i);
}
process_segment(inputBuffer, hopsize); // Process second segment
for (unsigned int i = 0; i < fftsize; ++i) {
outputBuffer[hopsize + i] += fft.td(i);
}
process_segment(inputBuffer, fftsize); // Process third segment
for (unsigned int i = 0; i < fftsize; ++i) {
outputBuffer[2 * hopsize + i] += fft.td(i);
}
// Write the processed output buffer to the audio output
for (unsigned int n = 0; n < context->audioFrames; n++) {
if (n >= hopsize && n < 3 * hopsize) {
audioWrite(context, n, 0, outputBuffer[n]);
}
}
}
}
void cleanup(BelaContext *context, void *userData) {
fft.cleanup(); // Properly clean up the FFT resources
}