Browse Source

TT#101653 add silence detection option

Change-Id: I14940fcabdef475ed5dbe74d9af7ebc2af10311b
pull/1163/head
Richard Fuchs 5 years ago
parent
commit
73e4ef42eb
6 changed files with 270 additions and 5 deletions
  1. +1
    -0
      README.md
  2. +119
    -2
      daemon/codec.c
  3. +30
    -0
      daemon/main.c
  4. +45
    -0
      daemon/rtpengine.pod
  5. +3
    -0
      include/main.h
  6. +72
    -3
      t/auto-daemon-tests.pl

+ 1
- 0
README.md View File

@ -54,6 +54,7 @@ the following additional features are available:
- Injection of DTMF events or PCM DTMF tones into running audio streams
- Playback of pre-recorded streams/announcements
- Transcoding between T.38 and PCM (G.711 or other audio codecs)
- Silence detection and comfort noise (RFC 3389) payloads
*Rtpengine* does not (yet) support:


+ 119
- 2
daemon/codec.c View File

@ -85,6 +85,11 @@ struct dtx_entry {
void *ssrc_ptr; // opaque pointer, doesn't hold a reference
};
struct silence_event {
uint64_t start;
uint64_t end;
};
struct codec_ssrc_handler {
struct ssrc_entry h; // must be first
struct codec_handler *handler;
@ -109,6 +114,9 @@ struct codec_ssrc_handler {
GQueue dtmf_events;
struct dtmf_event dtmf_event;
// silence detection
GQueue silence_events;
uint64_t skip_pts;
int rtp_mark:1;
@ -1169,6 +1177,7 @@ void codec_handlers_update(struct call_media *receiver, struct call_media *sink,
struct rtp_payload_type *dtmf_pt = NULL;
struct rtp_payload_type *reverse_dtmf_pt = NULL;
int dtmf_pt_match = __supp_codec_match(receiver, sink, dtmf_payload_type, &dtmf_pt, &reverse_dtmf_pt);
int cn_pt_match = __supp_codec_match(receiver, sink, cn_payload_type, NULL, NULL);
// stop transcoding if we've determined that we don't need it
if (MEDIA_ISSET(sink, TRANSCODE) && !sink_transcoding) {
@ -1240,8 +1249,8 @@ void codec_handlers_update(struct call_media *receiver, struct call_media *sink,
GQueue *dest_codecs = NULL;
if (!flags || !flags->always_transcode) {
// we ignore output codec matches if we must transcode DTMF
if (dtmf_pt_match == 1 && MEDIA_ISSET(sink, TRANSCODE))
// we ignore output codec matches if we must transcode supp codecs
if ((dtmf_pt_match == 1 || cn_pt_match == 1) && MEDIA_ISSET(sink, TRANSCODE))
;
else if (pcm_dtmf_detect)
;
@ -1293,6 +1302,11 @@ void codec_handlers_update(struct call_media *receiver, struct call_media *sink,
if (rtp_payload_type_cmp_nf(pt, dest_pt))
goto transcode;
// do we need silence detection?
if (cn_pt_match == 2 && MEDIA_ISSET(sink, TRANSCODE))
goto transcode;
// XXX check format parameters as well
ilog(LOG_DEBUG, "Sink supports codec " STR_FORMAT, STR_FMT(&pt->encoding_with_params));
__make_passthrough_gsl(handler, &passthrough_handlers);
if (pt->codec_def && pt->codec_def->dtmf)
@ -2198,6 +2212,102 @@ void codec_handlers_stop(GQueue *q) {
}
static void silence_event_free(void *p) {
g_slice_free1(sizeof(struct silence_event), p);
}
#define __silence_detect_type(type) \
static void __silence_detect_ ## type(struct codec_ssrc_handler *ch, AVFrame *frame, type thres) { \
type *s = (void *) frame->data[0]; \
struct silence_event *last = g_queue_peek_tail(&ch->silence_events); \
\
if (last && last->end) /* last event finished? */ \
last = NULL; \
\
for (unsigned int i = 0; i < frame->nb_samples; i++) { \
if (s[i] <= thres && s[1] >= -thres) { \
/* silence */ \
if (!last) { \
/* new event */ \
last = g_slice_alloc0(sizeof(*last)); \
last->start = frame->pts + i; \
g_queue_push_tail(&ch->silence_events, last); \
} \
} \
else { \
/* not silence */ \
if (last && !last->end) { \
/* close off event */ \
last->end = frame->pts + i; \
last = NULL; \
} \
} \
} \
}
__silence_detect_type(double)
__silence_detect_type(float)
__silence_detect_type(int32_t)
__silence_detect_type(int16_t)
static void __silence_detect(struct codec_ssrc_handler *ch, AVFrame *frame) {
if (!rtpe_config.silence_detect_int)
return;
if (ch->handler->cn_payload_type < 0)
return;
switch (frame->format) {
case AV_SAMPLE_FMT_DBL:
__silence_detect_double(ch, frame, rtpe_config.silence_detect_double);
break;
case AV_SAMPLE_FMT_FLT:
__silence_detect_float(ch, frame, rtpe_config.silence_detect_double);
break;
case AV_SAMPLE_FMT_S32:
__silence_detect_int32_t(ch, frame, rtpe_config.silence_detect_int);
break;
case AV_SAMPLE_FMT_S16:
__silence_detect_int16_t(ch, frame, rtpe_config.silence_detect_int >> 16);
break;
default:
ilog(LOG_WARN | LOG_FLAG_LIMIT, "Unsupported sample format %i for silence detection",
frame->format);
}
}
static int is_silence_event(str *inout, GQueue *events, uint64_t pts, uint64_t duration) {
uint64_t end = pts + duration;
while (events->length) {
struct silence_event *first = g_queue_peek_head(events);
if (first->start > pts) // future event
return 0;
if (!first->end) // ongoing event
goto silence;
if (first->end > end) // event finished with end in the future
goto silence;
// event has ended: remove it
g_queue_pop_head(events);
// does the event fill the entire span?
if (first->end == end) {
silence_event_free(first);
goto silence;
}
// keep going, there might be more
silence_event_free(first);
}
return 0;
silence:
// replace with CN payload
inout->len = rtpe_config.cn_payload.len;
memcpy(inout->s, rtpe_config.cn_payload.s, inout->len);
return 1;
}
static struct ssrc_entry *__ssrc_handler_transcode_new(void *p) {
struct codec_handler *h = p;
@ -2296,6 +2406,7 @@ static void __free_ssrc_handler(void *chp) {
dtmf_rx_free(ch->dtmf_dsp);
resample_shutdown(&ch->dtmf_resampler);
g_queue_clear_full(&ch->dtmf_events, dtmf_event_free);
g_queue_clear_full(&ch->silence_events, silence_event_free);
if (ch->dtx_buffer)
obj_put(&ch->dtx_buffer->ttq.tt_obj);
}
@ -2340,6 +2451,7 @@ static int packet_encoded_rtp(encoder_t *enc, void *u1, void *u2) {
unsigned int repeats = 0;
int payload_type = -1;
int is_dtmf = dtmf_event_payload(&inout, (uint64_t *) &enc->avpkt.pts, enc->avpkt.duration,
&ch->dtmf_event, &ch->dtmf_events);
if (is_dtmf) {
@ -2349,6 +2461,10 @@ static int packet_encoded_rtp(encoder_t *enc, void *u1, void *u2) {
else if (is_dtmf == 3)
repeats = 2; // DTMF end event
}
else {
if (is_silence_event(&inout, &ch->silence_events, enc->avpkt.pts, enc->avpkt.duration))
payload_type = ch->handler->cn_payload_type;
}
// ready to send
@ -2469,6 +2585,7 @@ static int packet_decoded_common(decoder_t *decoder, AVFrame *frame, void *u1, v
}
__dtmf_detect(ch, frame);
__silence_detect(ch, frame);
// locking deliberately ignored
if (mp->media_out)


+ 30
- 0
daemon/main.c View File

@ -380,6 +380,8 @@ static void options(int *argc, char ***argv) {
AUTO_CLEANUP_GBUF(dtmf_udp_ep);
AUTO_CLEANUP_GBUF(endpoint_learning);
AUTO_CLEANUP_GBUF(dtls_sig);
double silence_detect = 0;
AUTO_CLEANUP_GVBUF(cn_payload);
GOptionEntry e[] = {
{ "table", 't', 0, G_OPTION_ARG_INT, &rtpe_config.kernel_table, "Kernel table to use", "INT" },
@ -465,6 +467,8 @@ static void options(int *argc, char ***argv) {
#ifdef WITH_TRANSCODING
{ "dtx-delay", 0,0, G_OPTION_ARG_INT, &rtpe_config.dtx_delay, "Delay in milliseconds to trigger DTX handling","INT"},
{ "max-dtx", 0,0, G_OPTION_ARG_INT, &rtpe_config.max_dtx, "Maximum duration of DTX handling", "INT"},
{ "silence-detect",0,0, G_OPTION_ARG_DOUBLE, &silence_detect, "Audio level threshold in percent for silence detection","FLOAT"},
{ "cn-payload",0,0, G_OPTION_ARG_STRING_ARRAY,&cn_payload, "Comfort noise parameters to replace silence with","INT INT INT ..."},
#endif
{ NULL, }
@ -684,6 +688,32 @@ static void options(int *argc, char ***argv) {
if (rtpe_config.jb_length < 0)
die("Invalid negative jitter buffer size");
if (silence_detect > 0) {
rtpe_config.silence_detect_double = silence_detect / 100.0;
rtpe_config.silence_detect_int = (int) ((silence_detect / 100.0) * UINT32_MAX);
}
if (!cn_payload)
str_init_dup(&rtpe_config.cn_payload, "\x20");
else {
int len = g_strv_length(cn_payload);
if (len < 1)
die("Invalid CN payload specified");
rtpe_config.cn_payload.s = malloc(len);
for (int i = 0; i < len; i++) {
char *endp;
long p = strtol(cn_payload[i], &endp, 0);
if (endp == cn_payload[i] || *endp != '\0')
die("Invalid CN payload specified");
if (p < 0 || p > 254)
die("Invalid CN payload specified");
if (i == 0 && p > 127)
die("Invalid CN payload specified");
rtpe_config.cn_payload.s[i] = p;
}
rtpe_config.cn_payload.len = len;
}
}
void fill_initial_rtpe_cfg(struct rtpengine_config* ini_rtpe_cfg) {


+ 45
- 0
daemon/rtpengine.pod View File

@ -745,6 +745,51 @@ received within this time frame, then DTX processing will stop. Can be set to
zero or negative to disable and keep DTX processing on indefinitely. Defaults
to 30 seconds.
=item B<--silence-detect=>I<FLOAT>
Enable silence detection and specify threshold in percent. This option is
applicable to transcoded stream only and defaults to zero (disabled).
When enabled, silence detection will be performed on all transcoded audio
streams. The threshold specified here is the sensitivity for detecting silence:
higher thresholds result in more audio to be detected as silence, while lower
thresholds result in less audio to be detected as silence. The threshold is
specified as percent between zero and 100. If set to 100, then all audio would
be detected as silence; if set to 50, then any audio that is quieter than 50%
of the maximum volume would be detected as silence; and so on. Setting it to
zero disables silence detection. To only detect silence that is very near or
equal to absolute silence, set this value to a low number such as 0.01. (For
certain codecs such as PCMA, a higher minimum threshold is required to detect
complete silence, as their compressed payloads don't decode to actual silence
but instead have a residual DC offset. For PCMA the minimum value is 0.013.)
Audio that is detected as silence will be replaced by comfort noise as
specified by the B<cn-payload> option (see below). Currently this is applicable
only to RTP peers that have advertised support for the B<CN> RTP payload type,
in which case the silence audio frames will be replaced by B<CN> RTP frames.
=item B<--cn-payload=>I<INT>
Specify one comfort noise parameter. This option can be given multiple times
and the format follows RFC 3389. When specified at the command line, list the
B<--cn-payload=> option multiple times, each one specifying a single CN
parameter. When used in the config file, list the option only a single time and
list multiple CN parameters separated by semicolons (e.g.
I<cn-payload = 20;40;60>).
The first CN payload value given is the noise level, specified as -dBov as per
RFC 3389. This means that a noise level of zero corresponds to maximum volume,
while higher numbers correspond to lower volumes. The highest allowable number
is 127, corresponding to -127 dBov, which is near silence.
Subsequent CN payload values carry spectral information (reflection
coefficients) as per RFC 3389. Allowable values for each coefficient are
between 0 and 254. Specifying spectral information is optional and the number
of coefficients listed (model order) is variable.
The default values are 32 (-32 dBov) for the noise level and no spectral
information.
=back
=head1 INTERFACES


+ 3
- 0
include/main.h View File

@ -108,6 +108,9 @@ struct rtpengine_config {
int http_threads;
int dtx_delay;
int max_dtx;
double silence_detect_double;
uint32_t silence_detect_int;
str cn_payload;
};


+ 72
- 3
t/auto-daemon-tests.pl View File

@ -10,7 +10,7 @@ use NGCP::Rtpclient::ICE;
autotest_start(qw(--config-file=none -t -1 -i 203.0.113.1 -i 2001:db8:4321::1
-n 2223 -c 12345 -f -L 7 -E -u 2222))
-n 2223 -c 12345 -f -L 7 -E -u 2222 --silence-detect=1))
or die;
@ -684,7 +684,7 @@ o=- 1545997027 1 IN IP4 198.51.101.1
s=tester
t=0 0
m=audio 3000 RTP/AVP 0
c=IN IP4 198.51.100.1
c=IN IP4 198.51.101.1
a=sendrecv
----------------------------------
v=0
@ -721,13 +721,82 @@ a=rtcp:PORT
SDP
snd($sock_a, $port_b, rtp(0, 1000, 3000, 0x1234, "\x00" x 160));
rcv($sock_b, $port_a, rtpm(0, 1000, 3000, 0x1234, "\x00" x 160));
rcv($sock_b, $port_a, rtpm(0, 1000, 3000, -1, "\x00" x 160));
snd($sock_b, $port_a, rtp(0, 2000, 4000, 0x3456, "\x00" x 160));
($ssrc) = rcv($sock_a, $port_b, rtpm(0, 2000, 4000, -1, "\x00" x 160));
snd($sock_b, $port_a, rtp(13, 2001, 4160, 0x3456, "\x12\x23\x23\x34\x56"));
rcv($sock_a, $port_b, rtpm(0, 2001, 4160, $ssrc, "\xce\x56\x69\xcc\x61\xca\x63\xd2\x66\x57\xe2\x47\x65\x59\x6a\x74\x5d\x4a\x68\xe9\x60\x4a\x63\x4b\xf4\x43\x4b\x48\x48\x52\x39\x57\x37\x4c\x39\x4c\x48\x3b\x43\x47\x44\x57\x48\xf5\x3e\x59\x3e\x52\x3b\x53\x3d\x53\x3b\x41\x5b\x38\x4a\x4b\x35\x48\x4a\x3e\x52\x50\x4b\x46\xfd\x3e\xf1\x3a\xd6\x35\x54\x5d\x3a\x58\x45\x42\x3d\x3e\x4c\x42\x3a\x58\x3c\x50\x3b\x6e\x36\x60\x3e\x3d\x3b\x41\x3a\x47\x35\x48\x35\x4b\x3e\x3d\x47\x3a\x3d\x39\x4f\x40\x42\x4a\x47\x3d\x6b\x42\x5a\x75\x53\x45\x5a\x4b\x4f\x48\x59\x48\x78\x43\x77\x4c\x42\x59\x47\x46\x3e\x67\x44\x3a\x67\x4b\x3f\x51\x48\x44\x3e\x54\x37\x6c\x45\x45\x3f\x6e\x3a\x68\x49\x4e\x3f\x47\x4b\x3e\xf3\x39"));
snd($sock_b, $port_a, rtp(0, 2002, 4320, 0x3456, "\x00" x 160));
rcv($sock_a, $port_b, rtpm(0, 2002, 4320, $ssrc, "\x00" x 160));
# test silence detection
snd($sock_a, $port_b, rtp(0, 1001, 3160, 0x1234, "\x00" x 160));
($ssrc) = rcv($sock_b, $port_a, rtpm(0, 1001, 3160, -1, "\x00" x 160));
snd($sock_a, $port_b, rtp(0, 1002, 3320, 0x1234, "\xff" x 160));
rcv($sock_b, $port_a, rtpm(13, 1002, 3320, $ssrc, "\x20"));
# reverse of the above, sockets/ports swapped
($sock_b, $sock_a) = new_call([qw(198.51.101.1 6002)], [qw(198.51.101.3 7002)]);
($port_b) = offer('accept CN',
{ ICE => 'remove', replace => ['origin'], flags => ['always transcode'] }, <<SDP);
v=0
o=- 1545997027 1 IN IP4 198.51.101.1
s=tester
t=0 0
m=audio 6002 RTP/AVP 0 13
c=IN IP4 198.51.101.1
a=sendrecv
----------------------------------
v=0
o=- 1545997027 1 IN IP4 203.0.113.1
s=tester
t=0 0
m=audio PORT RTP/AVP 0 13
c=IN IP4 203.0.113.1
a=rtpmap:0 PCMU/8000
a=rtpmap:13 CN/8000
a=sendrecv
a=rtcp:PORT
SDP
($port_a) = answer('accept CN',
{ ICE => 'remove', replace => ['origin'] }, <<SDP);
v=0
o=- 1545997027 1 IN IP4 198.51.101.1
s=tester
t=0 0
m=audio 7002 RTP/AVP 0
c=IN IP4 198.51.101.3
a=sendrecv
----------------------------------
v=0
o=- 1545997027 1 IN IP4 203.0.113.1
s=tester
t=0 0
m=audio PORT RTP/AVP 0 13
c=IN IP4 203.0.113.1
a=rtpmap:0 PCMU/8000
a=rtpmap:13 CN/8000
a=sendrecv
a=rtcp:PORT
SDP
snd($sock_a, $port_b, rtp(0, 1000, 3000, 0x1234, "\x00" x 160));
rcv($sock_b, $port_a, rtpm(0, 1000, 3000, -1, "\x00" x 160));
snd($sock_b, $port_a, rtp(0, 2000, 4000, 0x3456, "\x00" x 160));
($ssrc) = rcv($sock_a, $port_b, rtpm(0, 2000, 4000, -1, "\x00" x 160));
snd($sock_b, $port_a, rtp(13, 2001, 4160, 0x3456, "\x12\x23\x23\x34\x56"));
rcv($sock_a, $port_b, rtpm(0, 2001, 4160, $ssrc, "\xce\x56\x69\xcc\x61\xca\x63\xd2\x66\x57\xe2\x47\x65\x59\x6a\x74\x5d\x4a\x68\xe9\x60\x4a\x63\x4b\xf4\x43\x4b\x48\x48\x52\x39\x57\x37\x4c\x39\x4c\x48\x3b\x43\x47\x44\x57\x48\xf5\x3e\x59\x3e\x52\x3b\x53\x3d\x53\x3b\x41\x5b\x38\x4a\x4b\x35\x48\x4a\x3e\x52\x50\x4b\x46\xfd\x3e\xf1\x3a\xd6\x35\x54\x5d\x3a\x58\x45\x42\x3d\x3e\x4c\x42\x3a\x58\x3c\x50\x3b\x6e\x36\x60\x3e\x3d\x3b\x41\x3a\x47\x35\x48\x35\x4b\x3e\x3d\x47\x3a\x3d\x39\x4f\x40\x42\x4a\x47\x3d\x6b\x42\x5a\x75\x53\x45\x5a\x4b\x4f\x48\x59\x48\x78\x43\x77\x4c\x42\x59\x47\x46\x3e\x67\x44\x3a\x67\x4b\x3f\x51\x48\x44\x3e\x54\x37\x6c\x45\x45\x3f\x6e\x3a\x68\x49\x4e\x3f\x47\x4b\x3e\xf3\x39"));
snd($sock_b, $port_a, rtp(0, 2002, 4320, 0x3456, "\x00" x 160));
rcv($sock_a, $port_b, rtpm(0, 2002, 4320, $ssrc, "\x00" x 160));
# test silence detection
snd($sock_a, $port_b, rtp(0, 1001, 3160, 0x1234, "\x00" x 160));
($ssrc) = rcv($sock_b, $port_a, rtpm(0, 1001, 3160, -1, "\x00" x 160));
snd($sock_a, $port_b, rtp(0, 1002, 3320, 0x1234, "\xff" x 160));
rcv($sock_b, $port_a, rtpm(13, 1002, 3320, $ssrc, "\x20"));


Loading…
Cancel
Save