Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ add_custom_target(build_rust_ffi

add_library(livekit SHARED
src/audio_frame.cpp
src/audio_processing_module.cpp
src/audio_source.cpp
src/audio_stream.cpp
src/data_stream.cpp
Expand Down
169 changes: 169 additions & 0 deletions include/livekit/audio_processing_module.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
/*
* Copyright 2025 LiveKit
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cstdint>

#include "livekit/audio_frame.h"
#include "livekit/ffi_handle.h"

namespace livekit {

/**
* @brief WebRTC Audio Processing Module (APM) for real-time audio enhancement.
*
* AudioProcessingModule exposes WebRTC's built-in audio processing capabilities
* including echo cancellation, noise suppression, automatic gain control, and
* high-pass filtering.
*
* This class is designed for scenarios where you need explicit control over
* audio processing, separate from the built-in processing in AudioSource.
*
* Typical usage pattern for echo cancellation:
* 1. Create an APM with desired features enabled
* 2. Call processReverseStream() with speaker/playback audio (reference signal)
* 3. Call processStream() with microphone audio (near-end signal)
* 4. The processed microphone audio will have echo removed
*
* Note: Audio frames must be exactly 10ms in duration.
*/
class AudioProcessingModule {
public:
/**
* @brief Configuration options for the Audio Processing Module.
*/
struct Options {
/// Enable acoustic echo cancellation (AEC3).
/// Removes acoustic echo in two-way communication scenarios.
bool echo_cancellation = false;

/// Enable noise suppression.
/// Reduces background noise from non-speech sources.
bool noise_suppression = false;

/// Enable high-pass filter.
/// Removes low-frequency noise below ~80 Hz (DC offset, rumble).
bool high_pass_filter = false;

/// Enable automatic gain control (AGC).
/// Auto-adjusts microphone gain to maintain consistent audio levels.
bool auto_gain_control = false;

/// Default constructor.
Options() = default;
};

/**
* @brief Create a new Audio Processing Module with default options (all
* disabled).
*
* @throws std::runtime_error if the APM could not be created.
*/
AudioProcessingModule();

/**
* @brief Create a new Audio Processing Module with the specified options.
*
* @param options Configuration for which processing features to enable.
* @throws std::runtime_error if the APM could not be created.
*/
explicit AudioProcessingModule(const Options &options);

virtual ~AudioProcessingModule() = default;

// Non-copyable
AudioProcessingModule(const AudioProcessingModule &) = delete;
AudioProcessingModule &operator=(const AudioProcessingModule &) = delete;

// Movable
AudioProcessingModule(AudioProcessingModule &&) noexcept = default;
AudioProcessingModule &operator=(AudioProcessingModule &&) noexcept = default;

/**
* @brief Process the forward (near-end/microphone) audio stream.
*
* This method processes audio captured from the local microphone. It applies
* the enabled processing features (noise suppression, gain control, etc.)
* and removes echo based on the reference signal provided via
* processReverseStream().
*
* The audio data is modified in-place.
*
* @param frame The audio frame to process (modified in-place).
*
* @throws std::runtime_error if processing fails.
*
* @note The frame must contain exactly 10ms of audio.
*/
void processStream(AudioFrame &frame);

/**
* @brief Process the reverse (far-end/speaker) audio stream.
*
* This method provides the reference signal for echo cancellation. Call this
* with the audio that is being played through the speakers, so the APM can
* learn the acoustic characteristics and remove the echo from the microphone
* signal.
*
* The audio data is modified in-place.
*
* @param frame The audio frame to process (modified in-place).
*
* @throws std::runtime_error if processing fails.
*
* @note The frame must contain exactly 10ms of audio.
*/
void processReverseStream(AudioFrame &frame);

/**
* @brief Set the estimated delay between the reverse and forward streams.
*
* This must be called if and only if echo processing is enabled.
*
* Sets the delay in ms between processReverseStream() receiving a far-end
* frame and processStream() receiving a near-end frame containing the
* corresponding echo. On the client-side this can be expressed as:
*
* delay = (t_render - t_analyze) + (t_process - t_capture)
*
* where:
* - t_analyze is the time a frame is passed to processReverseStream() and
* t_render is the time the first sample of the same frame is rendered by
* the audio hardware.
* - t_capture is the time the first sample of a frame is captured by the
* audio hardware and t_process is the time the same frame is passed to
* processStream().
*
* @param delay_ms Delay in milliseconds.
*
* @throws std::runtime_error if setting the delay fails.
*/
void setStreamDelayMs(int delay_ms);

private:
/// Check if the APM handle is valid (used internally).
bool valid() const noexcept { return handle_.valid(); }

/// Get the underlying FFI handle ID (used internally).
std::uint64_t ffi_handle_id() const noexcept {
return static_cast<std::uint64_t>(handle_.get());
}

FfiHandle handle_;
};

} // namespace livekit
1 change: 1 addition & 0 deletions include/livekit/livekit.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#pragma once

#include "audio_frame.h"
#include "audio_processing_module.h"
#include "audio_source.h"
#include "audio_stream.h"
#include "build.h"
Expand Down
142 changes: 142 additions & 0 deletions src/audio_processing_module.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
/*
* Copyright 2025 LiveKit
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "livekit/audio_processing_module.h"

#include <stdexcept>

#include "audio_frame.pb.h"
#include "ffi.pb.h"
#include "ffi_client.h"

namespace livekit {

AudioProcessingModule::AudioProcessingModule()
: AudioProcessingModule(Options{}) {}

AudioProcessingModule::AudioProcessingModule(const Options &options) {
proto::FfiRequest req;
auto *msg = req.mutable_new_apm();
msg->set_echo_canceller_enabled(options.echo_cancellation);
msg->set_noise_suppression_enabled(options.noise_suppression);
msg->set_high_pass_filter_enabled(options.high_pass_filter);
msg->set_gain_controller_enabled(options.auto_gain_control);

proto::FfiResponse resp = FfiClient::instance().sendRequest(req);

if (!resp.has_new_apm()) {
throw std::runtime_error(
"AudioProcessingModule: failed to create APM - no response");
}

const auto &apm_info = resp.new_apm().apm();
handle_ = FfiHandle(static_cast<uintptr_t>(apm_info.handle().id()));

if (!handle_.valid()) {
throw std::runtime_error(
"AudioProcessingModule: failed to create APM - invalid handle");
}
}

void AudioProcessingModule::processStream(AudioFrame &frame) {
if (!handle_.valid()) {
throw std::runtime_error("AudioProcessingModule: invalid handle");
}

if (frame.data().empty()) {
return;
}

proto::FfiRequest req;
auto *msg = req.mutable_apm_process_stream();
msg->set_apm_handle(static_cast<std::uint64_t>(handle_.get()));
msg->set_data_ptr(reinterpret_cast<std::uint64_t>(frame.data().data()));
msg->set_size(
static_cast<std::uint32_t>(frame.data().size() * sizeof(std::int16_t)));
msg->set_sample_rate(static_cast<std::uint32_t>(frame.sample_rate()));
msg->set_num_channels(static_cast<std::uint32_t>(frame.num_channels()));

proto::FfiResponse resp = FfiClient::instance().sendRequest(req);

if (!resp.has_apm_process_stream()) {
throw std::runtime_error(
"AudioProcessingModule::processStream: unexpected response");
}

const auto &result = resp.apm_process_stream();
if (result.has_error()) {
throw std::runtime_error("AudioProcessingModule::processStream: " +
result.error());
}
}

void AudioProcessingModule::processReverseStream(AudioFrame &frame) {
if (!handle_.valid()) {
throw std::runtime_error("AudioProcessingModule: invalid handle");
}

if (frame.data().empty()) {
return;
}

proto::FfiRequest req;
auto *msg = req.mutable_apm_process_reverse_stream();
msg->set_apm_handle(static_cast<std::uint64_t>(handle_.get()));
msg->set_data_ptr(reinterpret_cast<std::uint64_t>(frame.data().data()));
msg->set_size(
static_cast<std::uint32_t>(frame.data().size() * sizeof(std::int16_t)));
msg->set_sample_rate(static_cast<std::uint32_t>(frame.sample_rate()));
msg->set_num_channels(static_cast<std::uint32_t>(frame.num_channels()));

proto::FfiResponse resp = FfiClient::instance().sendRequest(req);

if (!resp.has_apm_process_reverse_stream()) {
throw std::runtime_error(
"AudioProcessingModule::processReverseStream: unexpected response");
}

const auto &result = resp.apm_process_reverse_stream();
if (result.has_error()) {
throw std::runtime_error("AudioProcessingModule::processReverseStream: " +
result.error());
}
}

void AudioProcessingModule::setStreamDelayMs(int delay_ms) {
if (!handle_.valid()) {
throw std::runtime_error("AudioProcessingModule: invalid handle");
}

proto::FfiRequest req;
auto *msg = req.mutable_apm_set_stream_delay();
msg->set_apm_handle(static_cast<std::uint64_t>(handle_.get()));
msg->set_delay_ms(delay_ms);

proto::FfiResponse resp = FfiClient::instance().sendRequest(req);

if (!resp.has_apm_set_stream_delay()) {
throw std::runtime_error(
"AudioProcessingModule::setStreamDelayMs: unexpected response");
}

const auto &result = resp.apm_set_stream_delay();
if (result.has_error()) {
throw std::runtime_error("AudioProcessingModule::setStreamDelayMs: " +
result.error());
}
}

} // namespace livekit
6 changes: 6 additions & 0 deletions src/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ if(INTEGRATION_TEST_SOURCES)
${LIVEKIT_ROOT_DIR}/src
)

# Define LIVEKIT_ROOT_DIR for tests to find data files
target_compile_definitions(livekit_integration_tests
PRIVATE
LIVEKIT_ROOT_DIR="${LIVEKIT_ROOT_DIR}"
)

# Copy shared libraries to test executable directory
if(WIN32)
add_custom_command(TARGET livekit_integration_tests POST_BUILD
Expand Down
Loading
Loading