diff --git a/daemon/Makefile b/daemon/Makefile index ffaa13a6d..9940ba92c 100644 --- a/daemon/Makefile +++ b/daemon/Makefile @@ -82,7 +82,7 @@ SRCS= main.c kernel.c poller.c aux.c control_tcp.c call.c control_udp.c redis.c crypto.c rtp.c call_interfaces.strhash.c dtls.c log.c cli.c graphite.c ice.c \ media_socket.c homer.c recording.c statistics.c cdr.c ssrc.c iptables.c tcp_listener.c \ codec.c load.c dtmf.c timerthread.c media_player.c jitter_buffer.c t38.c websocket.c \ - mqtt.c janus.strhash.c + mqtt.c janus.strhash.c audio_player.c LIBSRCS= loglib.c auxlib.c rtplib.c str.c socket.c streambuf.c ssllib.c dtmflib.c mix_buffer.c ifeq ($(with_transcoding),yes) LIBSRCS+= codeclib.strhash.c resample.c diff --git a/daemon/audio_player.c b/daemon/audio_player.c new file mode 100644 index 000000000..f3f79926e --- /dev/null +++ b/daemon/audio_player.c @@ -0,0 +1,204 @@ +#ifdef WITH_TRANSCODING + +#include "audio_player.h" +#include "call.h" +#include "media_player.h" +#include "mix_buffer.h" +#include "codec.h" + + +struct audio_player { + struct media_player *mp; + struct mix_buffer mb; + struct timeval last_run; + + unsigned int ptime_us; + unsigned int ptime; // in samples + + unsigned long long pts; +}; + + +// call is locked in R and mp is locked +static bool audio_player_run(struct media_player *mp) { + if (!mp || !mp->media) + return false; + + struct audio_player *ap = mp->media->audio_player; + if (!ap || !ap->ptime_us) + return false; + + ap->last_run = rtpe_now; // equals mp->next_run + + unsigned int size; + void *buf = mix_buffer_read_fast(&ap->mb, ap->ptime, &size); + if (!buf) { + buf = g_alloca(size); + mix_buffer_read_slow(&ap->mb, buf, ap->ptime); + } + + media_player_add_packet(mp, buf, size, ap->ptime_us, ap->pts); + ap->pts += ap->ptime; + + return false; +} + +// call locked in W +bool audio_player_setup(struct call_media *m, const struct rtp_payload_type *dst_pt, + unsigned int size_ms, unsigned int delay_ms) +{ + if (!dst_pt) + return false; + unsigned int bufsize_ms = size_ms; + if (!bufsize_ms) + bufsize_ms = rtpe_config.audio_buffer_length; + if (!bufsize_ms) + return false; + + unsigned int clockrate = fraction_mult(dst_pt->clock_rate, &dst_pt->codec_def->default_clockrate_fact); + + unsigned int ptime_ms = m->ptime; + if (!ptime_ms) + ptime_ms = 20; + unsigned int ptime_us = ptime_ms * 1000; + unsigned int ptime_smp = ptime_ms * clockrate / 1000; // in samples + + // TODO: shortcut this to avoid the detour of avframe -> avpacket -> avframe (all in s16) + // TODO: determine dest sample format from created encoder + struct rtp_payload_type src_pt = { + .payload_type = -1, + .encoding = STR_CONST_INIT("PCM-S16LE"), // XXX support flp + .channels = dst_pt->channels, + .clock_rate = clockrate, + .ptime = ptime_ms, + }; + + struct audio_player *ap; + struct media_player *mp = NULL; + + // check if objects exists and parameters are still the same + + if ((ap = m->audio_player) && (mp = ap->mp)) { + if (!media_player_pt_match(mp, &src_pt, dst_pt)) + { /* do reset below */ } + if (ap->ptime != ptime_smp || ap->ptime_us != ptime_us) + { /* do reset below */ } + else // everything matched + return true; + + ilogs(transcoding, LOG_DEBUG, "Resetting audio player for new parameters"); + } + else + ilogs(transcoding, LOG_DEBUG, "Creating new audio player"); + + // create ap and mp objects, or reset them if needed + + if (ap) { + mix_buffer_destroy(&ap->mb); + ZERO(ap->mb); + } + else + ap = m->audio_player = g_slice_alloc0(sizeof(*m->audio_player)); + + if (mp) + media_player_stop(mp); + else + mp = ap->mp = media_player_new(m->monologue); + if (!mp) + goto error; + + // set everything up + + src_pt.codec_def = codec_find_by_av(AV_CODEC_ID_PCM_S16LE), // XXX shortcut this? + + mp->run_func = audio_player_run; + + ap->ptime_us = ptime_us; + ap->ptime = ptime_smp; + + if (media_player_setup(mp, &src_pt, dst_pt)) + goto error; + + bufsize_ms = MAX(bufsize_ms, ptime_ms * 2); // make sure the buf size is at least 2 frames + + mix_buffer_init(&ap->mb, AV_SAMPLE_FMT_S16, clockrate, dst_pt->channels, bufsize_ms, delay_ms); + + return true; + +error: + audio_player_free(m); + return false; +} + + +// call locked in W +void audio_player_start(struct call_media *m) { + struct audio_player *ap; + + if (!m || !(ap = m->audio_player)) + return; + + struct media_player *mp = ap->mp; + if (!mp) + return; + + media_player_set_media(mp, m); + + if (mp->next_run.tv_sec) // already running? + return; + + ilogs(transcoding, LOG_DEBUG, "Starting audio player"); + + ap->last_run = rtpe_now; + + mp->next_run = rtpe_now; + timeval_add_usec(&mp->next_run, ap->ptime_us); + timerthread_obj_schedule_abs(&mp->tt_obj, &mp->next_run); + +} + + +void audio_player_add_frame(struct audio_player *ap, uint32_t ssrc, AVFrame *frame) { + bool ret = mix_buffer_write(&ap->mb, ssrc, frame->extended_data[0], frame->nb_samples); + if (!ret) + ilogs(transcoding, LOG_WARN | LOG_FLAG_LIMIT, "Failed to add samples to mix buffer"); +} + + +void audio_player_stop(struct call_media *m) { + struct audio_player *ap = m->audio_player; + if (!ap) + return; + ilogs(transcoding, LOG_DEBUG, "Stopping audio player"); + media_player_stop(ap->mp); + media_player_put(&ap->mp); +} + + +bool audio_player_is_active(struct call_media *m) { + if (!m->audio_player) + return false; + if (!m->audio_player->mp) + return false; + if (!m->audio_player->mp->next_run.tv_sec) + return false; + return true; +} + + +bool audio_player_pt_match(struct call_media *m, const struct rtp_payload_type *pt) { + return rtp_payload_type_eq_exact(&m->audio_player->mp->coder.handler->dest_pt, pt); +} + + +void audio_player_free(struct call_media *m) { + struct audio_player *ap = m->audio_player; + if (!ap) + return; + mix_buffer_destroy(&ap->mb); + media_player_put(&ap->mp); + g_slice_free1(sizeof(*ap), ap); + m->audio_player = NULL; +} + +#endif diff --git a/daemon/call.c b/daemon/call.c index 2e74b1b99..4919a9491 100644 --- a/daemon/call.c +++ b/daemon/call.c @@ -50,6 +50,7 @@ #include "mqtt.h" #include "janus.h" #include "dtmf.h" +#include "audio_player.h" struct iterator_helper { @@ -2815,6 +2816,7 @@ static void __update_init_subscribers(struct call_monologue *ml, GQueue *streams recording_setup_media(media); t38_gateway_start(media->t38_gateway); + audio_player_start(media); if (mqtt_publish_scope() == MPS_MEDIA) mqtt_timer_start(&media->mqtt_timer, media->call, media); @@ -3657,6 +3659,7 @@ static void __call_cleanup(struct call *c) { ice_shutdown(&md->ice_agent); media_stop(md); t38_gateway_put(&md->t38_gateway); + audio_player_free(md); } for (GList *l = c->monologues.head; l; l = l->next) { @@ -4597,6 +4600,7 @@ int call_get_mono_dialogue(struct call_monologue *dialogue[2], struct call *call static void media_stop(struct call_media *m) { t38_gateway_stop(m->t38_gateway); + audio_player_stop(m); codec_handlers_stop(&m->codec_handlers_store); rtcp_timer_stop(&m->rtcp_timer); mqtt_timer_stop(&m->mqtt_timer); diff --git a/daemon/call_interfaces.c b/daemon/call_interfaces.c index c743aeef8..9a6b2522b 100644 --- a/daemon/call_interfaces.c +++ b/daemon/call_interfaces.c @@ -1079,6 +1079,14 @@ static void call_ng_flags_flags(struct sdp_ng_flags *out, str *s, void *dummy) { case CSH_LOOKUP("no-passthrough"): out->passthrough_off = 1; break; + case CSH_LOOKUP("player"): + case CSH_LOOKUP("audio-player"): + out->audio_player = AP_TRANSCODING; + break; + case CSH_LOOKUP("no-player"): + case CSH_LOOKUP("no-audio-player"): + out->audio_player = AP_OFF; + break; case CSH_LOOKUP("no-jitter-buffer"): out->disable_jb = 1; break; @@ -1516,6 +1524,37 @@ static void call_ng_main_flags(struct sdp_ng_flags *out, str *key, bencode_item_ STR_FMT(&s)); } break; + case CSH_LOOKUP("player"): + case CSH_LOOKUP("audio-player"): + switch (__csh_lookup(&s)) { + case CSH_LOOKUP("default"): + out->audio_player = AP_DEFAULT; + break; + case CSH_LOOKUP("on"): + case CSH_LOOKUP("yes"): + case CSH_LOOKUP("enable"): + case CSH_LOOKUP("enabled"): + case CSH_LOOKUP("transcode"): + case CSH_LOOKUP("transcoding"): + out->audio_player = AP_TRANSCODING; + break; + case CSH_LOOKUP("no"): + case CSH_LOOKUP("off"): + case CSH_LOOKUP("disable"): + case CSH_LOOKUP("disabled"): + out->audio_player = AP_OFF; + break; + case CSH_LOOKUP("force"): + case CSH_LOOKUP("forced"): + case CSH_LOOKUP("always"): + case CSH_LOOKUP("everything"): + out->audio_player = AP_FORCE; + break; + default: + ilog(LOG_WARN, "Unknown 'audio-player' flag encountered: '" STR_FORMAT "'", + STR_FMT(&s)); + } + break; case CSH_LOOKUP("transport protocol"): case CSH_LOOKUP("transport-protocol"): if (!str_cmp(&s, "accept")) diff --git a/daemon/codec.c b/daemon/codec.c index 91fd00257..947390520 100644 --- a/daemon/codec.c +++ b/daemon/codec.c @@ -17,6 +17,7 @@ #include "timerthread.h" #include "log_funcs.h" #include "mqtt.h" +#include "audio_player.h" #ifdef WITH_TRANSCODING #include "fix_frame_channel_layout.h" #endif @@ -234,6 +235,7 @@ static codec_handler_func handler_func_dtmf; static codec_handler_func handler_func_t38; static struct ssrc_entry *__ssrc_handler_transcode_new(void *p); +static struct ssrc_entry *__ssrc_handler_decode_new(void *p); static struct ssrc_entry *__ssrc_handler_new(void *p); static void __ssrc_handler_stop(void *p, void *dummy); static void __free_ssrc_handler(void *); @@ -246,6 +248,7 @@ static int packet_decode(struct codec_ssrc_handler *, struct codec_ssrc_handler static int packet_encoded_rtp(encoder_t *enc, void *u1, void *u2); static int packet_decoded_fifo(decoder_t *decoder, AVFrame *frame, void *u1, void *u2); static int packet_decoded_direct(decoder_t *decoder, AVFrame *frame, void *u1, void *u2); +static int packet_decoded_audio_player(decoder_t *decoder, AVFrame *frame, void *u1, void *u2); static void codec_touched(struct codec_store *cs, struct rtp_payload_type *pt); @@ -513,6 +516,12 @@ static void __make_transcoder(struct codec_handler *handler, struct rtp_payload_ __make_transcoder_full(handler, dest, output_transcoders, dtmf_payload_type, pcm_dtmf_detect, cn_payload_type, packet_decoded_fifo, __ssrc_handler_transcode_new); } +static void __make_audio_player_decoder(struct codec_handler *handler, struct rtp_payload_type *dest, + bool pcm_dtmf_detect) +{ + __make_transcoder_full(handler, dest, NULL, -1, pcm_dtmf_detect, -1, packet_decoded_audio_player, + __ssrc_handler_decode_new); +} // used for generic playback (audio_player, t38_gateway) struct codec_handler *codec_handler_make_playback(const struct rtp_payload_type *src_pt, @@ -543,6 +552,12 @@ struct codec_handler *codec_handler_make_media_player(const struct rtp_payload_t struct codec_handler *h = codec_handler_make_playback(src_pt, dst_pt, last_ts, media, ssrc); if (!h) return NULL; + if (audio_player_is_active(media)) { + h->packet_decoded = packet_decoded_audio_player; + if (!audio_player_pt_match(media, dst_pt)) + ilogs(codec, LOG_WARN, "Codec mismatch between audio player and media player (wanted: " + STR_FORMAT ")", STR_FMT(&dst_pt->encoding_with_params)); + } return h; } struct codec_handler *codec_handler_make_dummy(const struct rtp_payload_type *dst_pt, struct call_media *media) @@ -794,6 +809,10 @@ static void __generator_stop(struct call_media *media) { t38_gateway_put(&media->t38_gateway); } } +static void __generator_stop_all(struct call_media *media) { + __generator_stop(media); + audio_player_stop(media); +} static void __t38_options_from_flags(struct t38_options *t_opts, const struct sdp_ng_flags *flags) { #define t38_opt(name) t_opts->name = flags ? flags->t38_ ## name : 0 @@ -1016,8 +1035,8 @@ bool codec_handlers_update(struct call_media *receiver, struct call_media *sink, } // everything else is unsupported: pass through if (proto_is_not_rtp(receiver->protocol)) { - __generator_stop(receiver); - __generator_stop(sink); + __generator_stop_all(receiver); + __generator_stop_all(sink); codec_handlers_stop(&receiver->codec_handlers_store); return false; } @@ -1041,6 +1060,17 @@ bool codec_handlers_update(struct call_media *receiver, struct call_media *sink, receiver->dtmf_count = 0; GSList *passthrough_handlers = NULL; + // default choice of audio player usage is based on whether it was in use previously, + // overridden by signalling flags, overridden by global option + bool use_audio_player = !!MEDIA_ISSET(sink, AUDIO_PLAYER); + + if (flags && flags->audio_player == AP_FORCE) + use_audio_player = true; + else if (flags && flags->audio_player == AP_OFF) + use_audio_player = false; + else if (rtpe_config.use_audio_player == UAP_ALWAYS) + use_audio_player = true; + // first gather info about what we can send AUTO_CLEANUP_NULL(GHashTable *supplemental_sinks, __g_hash_table_destroy); struct rtp_payload_type *pref_dest_codec = NULL; @@ -1068,7 +1098,7 @@ bool codec_handlers_update(struct call_media *receiver, struct call_media *sink, do_dtmf_detect = true; // do we have to force everything through the transcoding engine even if codecs match? - bool force_transcoding = do_pcm_dtmf_blocking || do_dtmf_blocking; + bool force_transcoding = do_pcm_dtmf_blocking || do_dtmf_blocking || use_audio_player; if (sink->monologue->inject_dtmf) force_transcoding = true; @@ -1160,6 +1190,7 @@ bool codec_handlers_update(struct call_media *receiver, struct call_media *sink, STR_FMT(&pt->encoding_with_params), STR_FMT(&sink_pt->encoding_with_full_params), sink_pt->payload_type); +sink_pt_fixed:; // we have found a usable output codec. gather matching output supp codecs struct rtp_payload_type *sink_dtmf_pt = __supp_payload_type(supplemental_sinks, sink_pt->clock_rate, "telephone-event"); @@ -1290,7 +1321,27 @@ bool codec_handlers_update(struct call_media *receiver, struct call_media *sink, __make_passthrough_gsl(handler, &passthrough_handlers, sink_dtmf_pt, sink_cn_pt); goto next; -transcode:; +transcode: + // enable audio player if not explicitly disabled + if (rtpe_config.use_audio_player == UAP_TRANSCODING && (!flags || flags->audio_player != AP_OFF)) + use_audio_player = true; + else if (flags && flags->audio_player == AP_TRANSCODING) + use_audio_player = true; + + if (use_audio_player) { + // when using the audio player, everything must decode to the same + // format that is appropriate for the audio player + if (sink_pt != pref_dest_codec && pref_dest_codec) { + ilogs(codec, LOG_DEBUG, "Switching sink codec for " STR_FORMAT " to " + STR_FORMAT " (%i) due to usage of audio player", + STR_FMT(&pt->encoding_with_params), + STR_FMT(&pref_dest_codec->encoding_with_full_params), + pref_dest_codec->payload_type); + sink_pt = pref_dest_codec; + force_transcoding = true; + goto sink_pt_fixed; + } + } // look up the reverse side of this payload type, which is the decoder to our // encoder. if any codec options such as bitrate were set during an offer, // they're in the decoder PT. copy them to the encoder PT. @@ -1305,9 +1356,12 @@ transcode:; } } is_transcoding = true; - __make_transcoder(handler, sink_pt, output_transcoders, - sink_dtmf_pt ? sink_dtmf_pt->payload_type : -1, - pcm_dtmf_detect, sink_cn_pt ? sink_cn_pt->payload_type : -1); + if (!use_audio_player) + __make_transcoder(handler, sink_pt, output_transcoders, + sink_dtmf_pt ? sink_dtmf_pt->payload_type : -1, + pcm_dtmf_detect, sink_cn_pt ? sink_cn_pt->payload_type : -1); + else + __make_audio_player_decoder(handler, sink_pt, pcm_dtmf_detect); // for DTMF delay: we pretend that there is no output DTMF payload type (sink_dtmf_pt == NULL) // so that DTMF is converted to audio (so it can be replaced with silence). we still want // to output DTMF event packets when we can though, so we need to remember the DTMF payload @@ -1319,37 +1373,66 @@ next: l = l->next; } + if (!use_audio_player) { + MEDIA_CLEAR(sink, AUDIO_PLAYER); + audio_player_stop(sink); + } + else + MEDIA_SET(sink, AUDIO_PLAYER); + if (is_transcoding) { - // we have to translate RTCP packets - receiver->rtcp_handler = rtcp_transcode_handler; + MEDIA_SET(receiver, TRANSCODE); - for (GList *l = receiver->codecs.codec_prefs.head; l; ) { - struct rtp_payload_type *pt = l->data; + if (!use_audio_player) { + // we have to translate RTCP packets + receiver->rtcp_handler = rtcp_transcode_handler; - if (pt->codec_def) { - // supported - l = l->next; - continue; + for (GList *l = receiver->codecs.codec_prefs.head; l; ) { + struct rtp_payload_type *pt = l->data; + + if (pt->codec_def) { + // supported + l = l->next; + continue; + } + + ilogs(codec, LOG_DEBUG, "Stripping unsupported codec " STR_FORMAT + " due to active transcoding", + STR_FMT(&pt->encoding)); + codec_touched(&receiver->codecs, pt); + l = __codec_store_delete_link(l, &receiver->codecs); } - ilogs(codec, LOG_DEBUG, "Stripping unsupported codec " STR_FORMAT - " due to active transcoding", - STR_FMT(&pt->encoding)); - codec_touched(&receiver->codecs, pt); - l = __codec_store_delete_link(l, &receiver->codecs); + + // at least some payload types will be transcoded, which will result in SSRC + // change. for payload types which we don't actually transcode, we still + // must substitute the SSRC + while (passthrough_handlers) { + struct codec_handler *handler = passthrough_handlers->data; + __make_passthrough_ssrc(handler); + passthrough_handlers = g_slist_delete_link(passthrough_handlers, + passthrough_handlers); + + } } + else { + receiver->rtcp_handler = rtcp_sink_handler; + MEDIA_CLEAR(receiver, RTCP_GEN); + // change all passthrough handlers also to transcoders + while (passthrough_handlers) { + struct codec_handler *handler = passthrough_handlers->data; + __make_audio_player_decoder(handler, pref_dest_codec, false); + passthrough_handlers = g_slist_delete_link(passthrough_handlers, + passthrough_handlers); - // at least some payload types will be transcoded, which will result in SSRC - // change. for payload types which we don't actually transcode, we still - // must substitute the SSRC - while (passthrough_handlers) { - struct codec_handler *handler = passthrough_handlers->data; - __make_passthrough_ssrc(handler); - passthrough_handlers = g_slist_delete_link(passthrough_handlers, passthrough_handlers); + } + audio_player_setup(sink, pref_dest_codec, rtpe_config.audio_buffer_length, + rtpe_config.audio_buffer_delay); } } + g_slist_free(passthrough_handlers); if (MEDIA_ISSET(receiver, RTCP_GEN)) { @@ -1361,9 +1444,6 @@ next: __codec_rtcp_timer(sink); } - if (is_transcoding) - MEDIA_SET(receiver, TRANSCODE); - return is_transcoding; } @@ -3537,6 +3617,32 @@ static struct ssrc_entry *__ssrc_handler_transcode_new(void *p) { return &ch->h; +err: + obj_put(&ch->h); + return NULL; +} +static struct ssrc_entry *__ssrc_handler_decode_new(void *p) { + struct codec_handler *h = p; + + ilogs(codec, LOG_DEBUG, "Creating SSRC decoder for %s/%u/%i", + h->source_pt.codec_def->rtpname, h->source_pt.clock_rate, + h->source_pt.channels); + + struct codec_ssrc_handler *ch = obj_alloc0("codec_ssrc_handler", sizeof(*ch), __free_ssrc_handler); + ch->handler = h; + ch->ptime = h->dest_pt.ptime; + + format_t dest_format = { + .clockrate = h->dest_pt.clock_rate, + .channels = h->dest_pt.channels, + .format = AV_SAMPLE_FMT_S16, + }; + + if (!__ssrc_handler_decode_common(ch, h, &dest_format)) + goto err; + + return &ch->h; + err: obj_put(&ch->h); return NULL; @@ -3794,6 +3900,24 @@ static int packet_decoded_fifo(decoder_t *decoder, AVFrame *frame, void *u1, voi static int packet_decoded_direct(decoder_t *decoder, AVFrame *frame, void *u1, void *u2) { return packet_decoded_common(decoder, frame, u1, u2, encoder_input_data); } +static int packet_decoded_audio_player(decoder_t *decoder, AVFrame *frame, void *u1, void *u2) { + struct codec_ssrc_handler *ch = u1; + struct media_packet *mp = u2; + + ilogs(transcoding, LOG_DEBUG, "RTP media decoded for audio player: TS %llu, samples %u", + (unsigned long long) frame->pts, frame->nb_samples); + + struct call_media *m = mp->media_out; + if (!m || !m->audio_player) { + // discard XXX log? + return 0; + } + + audio_player_add_frame(m->audio_player, ch->h.ssrc, frame); + // XXX error checking/reporting + + return 0; +} static int __rtp_decode(struct codec_ssrc_handler *ch, struct codec_ssrc_handler *input_ch, struct transcode_packet *packet, struct media_packet *mp) diff --git a/daemon/main.c b/daemon/main.c index 05ac50718..b3192a180 100644 --- a/daemon/main.c +++ b/daemon/main.c @@ -92,6 +92,8 @@ struct rtpengine_config rtpe_config = { .dtx_shift = 5, .dtx_buffer = 10, .dtx_lag = 100, + .audio_buffer_delay = 5, + .audio_buffer_length = 500, .mqtt_port = 1883, .mqtt_keepalive = 30, .mqtt_publish_interval = 5000, @@ -448,6 +450,7 @@ static void options(int *argc, char ***argv) { #endif AUTO_CLEANUP_GBUF(mos); AUTO_CLEANUP_GBUF(dcc); + AUTO_CLEANUP_GBUF(use_audio_player); rwlock_lock_w(&rtpe_config.config_lock); @@ -555,6 +558,9 @@ static void options(int *argc, char ***argv) { { "silence-detect",0,0, G_OPTION_ARG_DOUBLE, &silence_detect, "Audio level threshold in percent for silence detection","FLOAT"}, { "cn-payload",0,0, G_OPTION_ARG_STRING_ARRAY,&cn_payload, "Comfort noise parameters to replace silence with","INT INT INT ..."}, { "player-cache",0,0, G_OPTION_ARG_NONE, &rtpe_config.player_cache,"Cache media files for playback in memory",NULL}, + { "audio-buffer-length",0,0, G_OPTION_ARG_INT,&rtpe_config.audio_buffer_length,"Length in milliseconds of audio buffer","INT"}, + { "audio-buffer-delay",0,0, G_OPTION_ARG_INT,&rtpe_config.audio_buffer_delay,"Initial delay in milliseconds for buffered audio","INT"}, + { "audio-player",0,0, G_OPTION_ARG_STRING, &use_audio_player, "When to enable the internal audio player","on-demand|play-media|transcoding|always"}, #endif #ifdef HAVE_MQTT { "mqtt-host",0,0, G_OPTION_ARG_STRING, &rtpe_config.mqtt_host, "Mosquitto broker host or address", "HOST|IP"}, @@ -832,6 +838,30 @@ static void options(int *argc, char ***argv) { die("Invalid --amr-dtx ('%s')", amr_dtx); } + if (use_audio_player) { + if (!strcasecmp(use_audio_player, "on-demand") + || !strcasecmp(use_audio_player, "on demand") + || !strcasecmp(use_audio_player, "off") + || !strcasecmp(use_audio_player, "no") + || !strcasecmp(use_audio_player, "never")) + rtpe_config.use_audio_player = UAP_ON_DEMAND; + else if (!strcasecmp(use_audio_player, "play-media") + || !strcasecmp(use_audio_player, "play media") + || !strcasecmp(use_audio_player, "media player") + || !strcasecmp(use_audio_player, "media-player")) + rtpe_config.use_audio_player = UAP_PLAY_MEDIA; + else if (!strcasecmp(use_audio_player, "transcoding") + || !strcasecmp(use_audio_player, "transcode")) + rtpe_config.use_audio_player = UAP_TRANSCODING; + else if (!strcasecmp(use_audio_player, "always") + || !strcasecmp(use_audio_player, "everything") + || !strcasecmp(use_audio_player, "force") + || !strcasecmp(use_audio_player, "forced")) + rtpe_config.use_audio_player = UAP_ALWAYS; + else + die("Invalid --audio-player option ('%s')", use_audio_player); + } + if (!rtpe_config.software_id) rtpe_config.software_id = g_strdup_printf("rtpengine-%s", RTPENGINE_VERSION); g_strcanon(rtpe_config.software_id, "QWERTYUIOPASDFGHJKLZXCVBNMqwertyuiopasdfghjklzxcvbnm1234567890-", '-'); diff --git a/daemon/rtpengine.pod b/daemon/rtpengine.pod index 34e281fc0..3c576b82a 100644 --- a/daemon/rtpengine.pod +++ b/daemon/rtpengine.pod @@ -952,6 +952,65 @@ option enabled. RTP data is cached and retained in memory for the lifetime of the process. +=item BI + +Set the buffer length used by the audio player (see below) in milliseconds. The +default is 500 milliseconds. + +The buffer must be long enough to accommodate at least two frames of audio from +all contributing sources, which means at least 40 ms or 60 ms for most cases. +If media playback (via the B) command is desired, then the buffer +must be able to accommodate at least one full frame from the source media file, +whose length can vary depending on the format of the source media file. For 8 +kHz B<.wav> files this is 256 ms (2048 samples). Therefore 500 ms is the +recommended value. + +=item BI + +Initial delay for new sources contributing to an audio buffer (used by the +audio player, see below) in milliseconds. The default is 5 ms. + +The initial delay is meant to compensate for varying inter-arrival times of +media packets (jitter). If set too low, intermittent high jitter will result in +gaps in the output audio. If set too high, output audio will have an +unnecessary latency added to it. + +=item BB|B|B|B + +Define when to enable the audio player if not explicitly instructed otherwise. +The default setting is B. + +Enabling the audio player for a party to a call makes B produce its +own audio RTP stream (instead of just forwarding an audio stream received from +elsewhere). The audio is generated from a circular audio buffer (see above) and +all contributing audio sources are mixed into that one audio buffer. +Contributing audio sources are audio streams received from elsewhere (that +would otherwise simply be forwarded) and audio produced by the B +command. + +With this set to B, the audio player is enabled only if explicitly +requested by the user for a particular call via the B option +used in a signalling message. + +When set to B, the audio player is enabled only while media +playback via the B command is active. After media playback is +finished, the audio player is again disabled and audio goes back to simply +being forwarded. + +Setting this option to B leaves the audio player disabled unless +any sort of transcoding is required for a call. + +With a setting of B, the audio player is enabled for all calls, unless +explicitly disabled via the B option used in a signalling +message. This forces all audio through the transcoding engine, even if input +and output codecs are the same. + +Audio player usage can be changed on a call-by-call basis by including the +B option in a signalling message. This option supports the +values B and B, which result in the behaviour described +just above, and B which forces the audio player to be disabled regardless +of this setting. + =item B<--poller-per-thread> Enable 'poller per thread' functionality: for every worker thread (see the diff --git a/docs/ng_control_protocol.md b/docs/ng_control_protocol.md index 8eb153730..712327784 100644 --- a/docs/ng_control_protocol.md +++ b/docs/ng_control_protocol.md @@ -119,6 +119,16 @@ Optionally included keys are: body. The default is to auto-detect the address family if possible (if the receiving end is known already) or otherwise to leave it unchanged. +* `audio player` + + Contains a string value of either `default`, `transcoding`, `off`, or `always`. + + The values `transcoding` and `always` result in the behaviour described + under the `audio-player` config option in the manual, and override the + global setting from the config file. The value `off` disables usage of the + audio player regardless of the global config setting. The option `default` + results in the behaviour mandated by the global config setting. + * `delay-buffer` Takes an integer as value. When set to non-zero, enables the delay diff --git a/include/audio_player.h b/include/audio_player.h new file mode 100644 index 000000000..af8bdfc08 --- /dev/null +++ b/include/audio_player.h @@ -0,0 +1,42 @@ +#ifndef _AUDIO_PLAYER_H_ +#define _AUDIO_PLAYER_H_ + +#ifdef WITH_TRANSCODING + +#include +#include +#include + + +/* + * Similar to the existing media_player, but instead of simply producing + * its own standalone output media stream, the audio_player takes over the + * entire media stream flowing to the receiver, including media forwarded + * from the opposite side of the call, as well as media produced by the + * media_player. + */ + +struct audio_player; +struct call_media; +struct rtp_payload_type; + +bool audio_player_setup(struct call_media *, const struct rtp_payload_type *, + unsigned int size_ms, unsigned int delay_ms); +void audio_player_free(struct call_media *); + +void audio_player_start(struct call_media *); +void audio_player_stop(struct call_media *); +bool audio_player_is_active(struct call_media *); +bool audio_player_pt_match(struct call_media *, const struct rtp_payload_type *); + +void audio_player_add_frame(struct audio_player *, uint32_t ssrc, AVFrame *); + +#else + +INLINE void audio_player_start(struct call_media *m) { } +INLINE void audio_player_free(struct call_media *m) { } +INLINE void audio_player_stop(struct call_media *m) { } + +#endif + +#endif diff --git a/include/call.h b/include/call.h index e5382f6ab..65cdf3d24 100644 --- a/include/call.h +++ b/include/call.h @@ -188,6 +188,7 @@ enum { #define MEDIA_FLAG_ECHO 0x10000000 #define MEDIA_FLAG_BLACKHOLE 0x20000000 #define MEDIA_FLAG_REORDER_FORCED 0x40000000 +#define MEDIA_FLAG_AUDIO_PLAYER 0x80000000 #define MEDIA_FLAG_LEGACY_OSRTP SHARED_FLAG_LEGACY_OSRTP #define MEDIA_FLAG_LEGACY_OSRTP_REV SHARED_FLAG_LEGACY_OSRTP_REV @@ -253,6 +254,7 @@ struct codec_tracker; struct rtcp_timer; struct mqtt_timer; struct janus_session; +struct audio_player; typedef bencode_buffer_t call_buffer_t; @@ -433,6 +435,7 @@ struct call_media { struct mqtt_timer *mqtt_timer; /* master lock for scheduling purposes */ //struct codec_handler *dtmf_injector; struct t38_gateway *t38_gateway; + struct audio_player *audio_player; struct codec_handler *t38_handler; unsigned int buffer_delay; diff --git a/include/call_interfaces.h b/include/call_interfaces.h index c90325896..083bf8186 100644 --- a/include/call_interfaces.h +++ b/include/call_interfaces.h @@ -109,6 +109,12 @@ struct sdp_ng_flags { long long duration; long long pause; long long start_pos; + enum { + AP_DEFAULT = 0, + AP_OFF, + AP_TRANSCODING, + AP_FORCE, + } audio_player:2; unsigned int asymmetric:1, protocol_accept:1, no_redis_update:1, diff --git a/include/main.h b/include/main.h index 40e5592aa..e836f91c9 100644 --- a/include/main.h +++ b/include/main.h @@ -132,6 +132,14 @@ struct rtpengine_config { uint32_t silence_detect_int; str cn_payload; int player_cache; + int audio_buffer_length; + int audio_buffer_delay; + enum { + UAP_ON_DEMAND = 0, + UAP_PLAY_MEDIA, + UAP_TRANSCODING, + UAP_ALWAYS, + } use_audio_player; char *software_id; int poller_per_thread; char *mqtt_host; diff --git a/t/.gitignore b/t/.gitignore index d0ff95e3d..c7283ca96 100644 --- a/t/.gitignore +++ b/t/.gitignore @@ -78,3 +78,4 @@ mvr2s_x64_avx2.S mvr2s_x64_avx512.S test-mix-buffer mix_buffer.c +audio_player.c diff --git a/t/Makefile b/t/Makefile index 1d8bfa56f..36caba389 100644 --- a/t/Makefile +++ b/t/Makefile @@ -78,7 +78,8 @@ LIBSRCS+= codeclib.strhash.c resample.c socket.c streambuf.c dtmflib.c DAEMONSRCS+= codec.c call.c ice.c kernel.c media_socket.c stun.c bencode.c poller.c \ dtls.c recording.c statistics.c rtcp.c redis.c iptables.c graphite.c \ cookie_cache.c udp_listener.c homer.c load.c cdr.c dtmf.c timerthread.c \ - media_player.c jitter_buffer.c t38.c tcp_listener.c mqtt.c websocket.c cli.c + media_player.c jitter_buffer.c t38.c tcp_listener.c mqtt.c websocket.c cli.c \ + audio_player.c HASHSRCS+= call_interfaces.c control_ng.c sdp.c janus.c LIBASM= mvr2s_x64_avx2.S mvr2s_x64_avx512.S endif @@ -267,7 +268,7 @@ test-stats: test-stats.o $(COMMONOBJS) codeclib.strhash.o resample.o codec.o ssr control_ng.strhash.o graphite.o \ streambuf.o cookie_cache.o udp_listener.o homer.o load.o cdr.o dtmf.o timerthread.o \ media_player.o jitter_buffer.o dtmflib.o t38.o tcp_listener.o mqtt.o janus.strhash.o \ - websocket.o cli.o mvr2s_x64_avx2.o mvr2s_x64_avx512.o + websocket.o cli.o mvr2s_x64_avx2.o mvr2s_x64_avx512.o audio_player.o mix_buffer.o test-transcode: test-transcode.o $(COMMONOBJS) codeclib.strhash.o resample.o codec.o ssrc.o call.o ice.o aux.o \ kernel.o media_socket.o stun.o bencode.o socket.o poller.o dtls.o recording.o statistics.o \ @@ -275,7 +276,7 @@ test-transcode: test-transcode.o $(COMMONOBJS) codeclib.strhash.o resample.o cod control_ng.strhash.o \ streambuf.o cookie_cache.o udp_listener.o homer.o load.o cdr.o dtmf.o timerthread.o \ media_player.o jitter_buffer.o dtmflib.o t38.o tcp_listener.o mqtt.o janus.strhash.o websocket.o \ - cli.o mvr2s_x64_avx2.o mvr2s_x64_avx512.o + cli.o mvr2s_x64_avx2.o mvr2s_x64_avx512.o audio_player.o mix_buffer.o test-resample: test-resample.o $(COMMONOBJS) codeclib.strhash.o resample.o dtmflib.o mvr2s_x64_avx2.o \ mvr2s_x64_avx512.o diff --git a/utils/rtpengine-ng-client b/utils/rtpengine-ng-client index db16a3adc..70a249044 100755 --- a/utils/rtpengine-ng-client +++ b/utils/rtpengine-ng-client @@ -84,6 +84,7 @@ my @string_opts = qw( frequency blob sdp + audio-player ); my @int_opts = qw(