From a920ca39dbfc873df980951f928f24eccf76396c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 4 Jan 2005 13:57:51 +0000 Subject: [PATCH] Performance optimization The performance of rate conversion is optimized. - with S16 format, no format conversion is done - linear polation of expansion uses 16bit resolution --- src/pcm/pcm_rate.c | 249 ++++++++++++++++++++++++++++----------------- 1 file changed, 155 insertions(+), 94 deletions(-) diff --git a/src/pcm/pcm_rate.c b/src/pcm/pcm_rate.c index 13c96310..b802bc20 100644 --- a/src/pcm/pcm_rate.c +++ b/src/pcm/pcm_rate.c @@ -54,16 +54,8 @@ enum rate_type { RATE_TYPE_POLYPHASE, /* polyphase resampling */ }; -typedef struct { - union { - struct { - int init; - int16_t old_sample, new_sample; - int64_t sum; - } linear; - } u; -} snd_pcm_rate_state_t; - +typedef struct _snd_pcm_rate snd_pcm_rate_t; + typedef void (*rate_f)(const snd_pcm_channel_area_t *dst_areas, snd_pcm_uframes_t dst_offset, snd_pcm_uframes_t dst_frames, @@ -71,11 +63,9 @@ typedef void (*rate_f)(const snd_pcm_channel_area_t *dst_areas, snd_pcm_uframes_t src_offset, snd_pcm_uframes_t src_frames, unsigned int channels, - unsigned int getidx, unsigned int putidx, - unsigned int arg, - snd_pcm_rate_state_t *states); + snd_pcm_rate_t *rate); -typedef struct { +struct _snd_pcm_rate { snd_pcm_t *slave; int close_slave; snd_atomic_write_t watom; @@ -86,72 +76,74 @@ typedef struct { unsigned int get_idx; unsigned int put_idx; unsigned int pitch; + unsigned int pitch_shift; /* for expand interpolation */ rate_f func; snd_pcm_format_t sformat; unsigned int srate; - snd_pcm_rate_state_t *states; snd_pcm_channel_area_t *pareas; /* areas for splitted period (rate pcm) */ snd_pcm_channel_area_t *sareas; /* areas for splitted period (slave pcm) */ -} snd_pcm_rate_t; +}; static void snd_pcm_rate_expand(const snd_pcm_channel_area_t *dst_areas, snd_pcm_uframes_t dst_offset, snd_pcm_uframes_t dst_frames, const snd_pcm_channel_area_t *src_areas, snd_pcm_uframes_t src_offset, snd_pcm_uframes_t src_frames, unsigned int channels, - unsigned int getidx, unsigned int putidx, - unsigned int get_threshold, - snd_pcm_rate_state_t *states) + snd_pcm_rate_t *rate) { #define GET16_LABELS #define PUT16_LABELS #include "plugin_ops.h" #undef GET16_LABELS #undef PUT16_LABELS - void *get = get16_labels[getidx]; - void *put = put16_labels[putidx]; + void *get = get16_labels[rate->get_idx]; + void *put = put16_labels[rate->put_idx]; + unsigned int get_threshold = rate->pitch; unsigned int channel; - snd_pcm_uframes_t src_frames1 = 0; - snd_pcm_uframes_t dst_frames1 = 0; + snd_pcm_uframes_t src_frames1; + snd_pcm_uframes_t dst_frames1; int16_t sample = 0; for (channel = 0; channel < channels; ++channel) { - //int xpos = 0; const snd_pcm_channel_area_t *src_area = &src_areas[channel]; const snd_pcm_channel_area_t *dst_area = &dst_areas[channel]; const char *src; char *dst; int src_step, dst_step; - int16_t old_sample = states->u.linear.old_sample; - int16_t new_sample = states->u.linear.new_sample; + int16_t old_sample = 0; + int16_t new_sample = 0; + int old_weight, new_weight; unsigned int pos = 0; + int init; src = snd_pcm_channel_area_addr(src_area, src_offset); dst = snd_pcm_channel_area_addr(dst_area, dst_offset); src_step = snd_pcm_channel_area_step(src_area); dst_step = snd_pcm_channel_area_step(dst_area); src_frames1 = 0; dst_frames1 = 0; - if (!states->u.linear.init) { - goto __force_get; - } else { - states->u.linear.init = 2; - } + init = 1; while (dst_frames1 < dst_frames) { - if (states->u.linear.init == 2) { - old_sample = new_sample; - __force_get: - goto *get; + if (pos >= get_threshold) { + src += src_step; + src_frames1++; + if (src_frames1 < src_frames) { + old_sample = new_sample; + goto *get; #define GET16_END after_get #include "plugin_ops.h" #undef GET16_END - after_get: - new_sample = sample; - if (!states->u.linear.init) - old_sample = sample; - states->u.linear.init = 1; + after_get: + new_sample = sample; + if (init) { + init = 0; + continue; + } + } + pos -= get_threshold; } - sample = (((int64_t)old_sample * (int64_t)(get_threshold - pos)) + ((int64_t)new_sample * pos)) / get_threshold; - //printf("sample[%i] = %i (old_sample = %i, new_sample = %i)\n", xpos++, sample, old_sample, new_sample); + new_weight = (pos << (16 - rate->pitch_shift)) / (get_threshold >> rate->pitch_shift); + old_weight = 0x10000 - new_weight; + sample = (old_sample * old_weight + new_sample * new_weight) >> 16; goto *put; #define PUT16_END after_put #include "plugin_ops.h" @@ -160,24 +152,58 @@ static void snd_pcm_rate_expand(const snd_pcm_channel_area_t *dst_areas, dst += dst_step; dst_frames1++; pos += LINEAR_DIV; + } + } +} + +/* optimized version for S16 format */ +static void snd_pcm_rate_expand_s16(const snd_pcm_channel_area_t *dst_areas, + snd_pcm_uframes_t dst_offset, snd_pcm_uframes_t dst_frames, + const snd_pcm_channel_area_t *src_areas, + snd_pcm_uframes_t src_offset, snd_pcm_uframes_t src_frames, + unsigned int channels, + snd_pcm_rate_t *rate) +{ + unsigned int channel; + snd_pcm_uframes_t src_frames1; + snd_pcm_uframes_t dst_frames1; + unsigned int get_threshold = rate->pitch; + + for (channel = 0; channel < channels; ++channel) { + const snd_pcm_channel_area_t *src_area = &src_areas[channel]; + const snd_pcm_channel_area_t *dst_area = &dst_areas[channel]; + const int16_t *src; + int16_t *dst; + int src_step, dst_step; + int16_t old_sample; + int16_t new_sample; + int old_weight, new_weight; + unsigned int pos; + src = snd_pcm_channel_area_addr(src_area, src_offset); + dst = snd_pcm_channel_area_addr(dst_area, dst_offset); + src_step = snd_pcm_channel_area_step(src_area) >> 1; + dst_step = snd_pcm_channel_area_step(dst_area) >> 1; + src_frames1 = 0; + dst_frames1 = 0; + old_sample = new_sample = *src; + pos = get_threshold; + while (dst_frames1 < dst_frames) { if (pos >= get_threshold) { pos -= get_threshold; src += src_step; src_frames1++; - states->u.linear.init = 2; /* get a new sample */ - //printf("new_src_pos = %i\n", (src - (char *)snd_pcm_channel_area_addr(src_area, src_offset)) / src_step); - if (CHECK_SANITY(src_frames1 > src_frames)) { - SNDERR("src_frames overflow"); - break; + if (src_frames1 < src_frames) { + old_sample = new_sample; + new_sample = *src; } } + new_weight = (pos << (16 - rate->pitch_shift)) / (get_threshold >> rate->pitch_shift); + old_weight = 0x10000 - new_weight; + *dst = (old_sample * old_weight + new_sample * new_weight) >> 16; + dst += dst_step; + dst_frames1++; + pos += LINEAR_DIV; } - if (dst_frames != dst_frames1) { - SNDERR("dst_frames %lu, dst_frames1 %lu, src_frames %lu, src_frames1 %lu\n", dst_frames, dst_frames1, src_frames, src_frames1); - } - states->u.linear.old_sample = old_sample; - states->u.linear.new_sample = new_sample; - states++; } } @@ -186,37 +212,32 @@ static void snd_pcm_rate_shrink(const snd_pcm_channel_area_t *dst_areas, const snd_pcm_channel_area_t *src_areas, snd_pcm_uframes_t src_offset, snd_pcm_uframes_t src_frames, unsigned int channels, - unsigned int getidx, unsigned int putidx, - unsigned int get_increment, - snd_pcm_rate_state_t *states) + snd_pcm_rate_t *rate) { #define GET16_LABELS #define PUT16_LABELS #include "plugin_ops.h" #undef GET16_LABELS #undef PUT16_LABELS - void *get = get16_labels[getidx]; - void *put = put16_labels[putidx]; + void *get = get16_labels[rate->get_idx]; + void *put = put16_labels[rate->put_idx]; + unsigned int get_increment = rate->pitch; unsigned int channel; - snd_pcm_uframes_t src_frames1 = 0; - snd_pcm_uframes_t dst_frames1 = 0; + snd_pcm_uframes_t src_frames1; + snd_pcm_uframes_t dst_frames1; int16_t sample = 0; - unsigned int pos = 0; + unsigned int pos; for (channel = 0; channel < channels; ++channel) { const snd_pcm_channel_area_t *src_area = &src_areas[channel]; const snd_pcm_channel_area_t *dst_area = &dst_areas[channel]; - int64_t sum; const char *src; char *dst; int src_step, dst_step; int16_t old_sample = 0; int16_t new_sample = 0; int old_weight, new_weight; - sum = states->u.linear.sum; - //int16_t old_sample = states->u.linear.old_sample; pos = LINEAR_DIV - get_increment; /* Force first sample to be copied */ - states->u.linear.init = 0; src = snd_pcm_channel_area_addr(src_area, src_offset); dst = snd_pcm_channel_area_addr(dst_area, dst_offset); src_step = snd_pcm_channel_area_step(src_area); @@ -253,13 +274,59 @@ static void snd_pcm_rate_shrink(const snd_pcm_channel_area_t *dst_areas, } old_sample = new_sample; } - - states->u.linear.sum = sum; - states->u.linear.old_sample = sample; - states++; } - if (dst_frames != dst_frames1) { - SNDERR("dst %lu, dst1 %lu, src %lu, src1 %lu pos = %u\n", dst_frames, dst_frames1, src_frames, src_frames1, pos ); +} + +/* optimized version for S16 format */ +static void snd_pcm_rate_shrink_s16(const snd_pcm_channel_area_t *dst_areas, + snd_pcm_uframes_t dst_offset, snd_pcm_uframes_t dst_frames, + const snd_pcm_channel_area_t *src_areas, + snd_pcm_uframes_t src_offset, snd_pcm_uframes_t src_frames, + unsigned int channels, + snd_pcm_rate_t *rate) +{ + unsigned int get_increment = rate->pitch; + unsigned int channel; + snd_pcm_uframes_t src_frames1; + snd_pcm_uframes_t dst_frames1; + unsigned int pos = 0; + + for (channel = 0; channel < channels; ++channel) { + const snd_pcm_channel_area_t *src_area = &src_areas[channel]; + const snd_pcm_channel_area_t *dst_area = &dst_areas[channel]; + const int16_t *src; + int16_t *dst; + int src_step, dst_step; + int16_t old_sample = 0; + int16_t new_sample = 0; + int old_weight, new_weight; + pos = LINEAR_DIV - get_increment; /* Force first sample to be copied */ + src = snd_pcm_channel_area_addr(src_area, src_offset); + dst = snd_pcm_channel_area_addr(dst_area, dst_offset); + src_step = snd_pcm_channel_area_step(src_area) >> 1; + dst_step = snd_pcm_channel_area_step(dst_area) >> 1 ; + src_frames1 = 0; + dst_frames1 = 0; + while (src_frames1 < src_frames) { + + new_sample = *src; + src += src_step; + src_frames1++; + pos += get_increment; + if (pos >= LINEAR_DIV) { + pos -= LINEAR_DIV; + old_weight = (pos << (32 - LINEAR_DIV_SHIFT)) / (get_increment >> (LINEAR_DIV_SHIFT - 16)); + new_weight = 0x10000 - old_weight; + *dst = (old_sample * old_weight + new_sample * new_weight) >> 16; + dst += dst_step; + dst_frames1++; + if (CHECK_SANITY(dst_frames1 > dst_frames)) { + SNDERR("dst_frames overflow"); + break; + } + } + old_sample = new_sample; + } } } @@ -500,20 +567,23 @@ static int snd_pcm_rate_hw_params(snd_pcm_t *pcm, snd_pcm_hw_params_t * params) rate->get_idx = snd_pcm_linear_get_index(src_format, SND_PCM_FORMAT_S16); rate->put_idx = snd_pcm_linear_put_index(SND_PCM_FORMAT_S16, dst_format); if (src_rate < dst_rate) { - rate->func = snd_pcm_rate_expand; + if (src_format == dst_format && src_format == SND_PCM_FORMAT_S16) + rate->func = snd_pcm_rate_expand_s16; + else + rate->func = snd_pcm_rate_expand; /* pitch is get_threshold */ } else { - rate->func = snd_pcm_rate_shrink; + if (src_format == dst_format && src_format == SND_PCM_FORMAT_S16) + rate->func = snd_pcm_rate_shrink_s16; + else + rate->func = snd_pcm_rate_shrink; /* pitch is get_increment */ } rate->pitch = (((u_int64_t)dst_rate * LINEAR_DIV) + (src_rate / 2)) / src_rate; - if (CHECK_SANITY(rate->states || rate->pareas)) { + if (CHECK_SANITY(rate->pareas)) { SNDMSG("rate plugin already in use"); return -EBUSY; } - rate->states = malloc(channels * sizeof(*rate->states)); - if (rate->states == NULL) - return -ENOMEM; if ((buffer_size / period_size) * period_size == buffer_size && (slave->buffer_size / slave->period_size) * slave->period_size == slave->buffer_size) return 0; @@ -550,10 +620,6 @@ static int snd_pcm_rate_hw_free(snd_pcm_t *pcm) rate->pareas = NULL; rate->sareas = NULL; } - if (rate->states) { - free(rate->states); - rate->states = NULL; - } return snd_pcm_hw_free(rate->slave); } @@ -657,6 +723,12 @@ static int snd_pcm_rate_sw_params(snd_pcm_t *pcm, snd_pcm_sw_params_t * params) return -EIO; } } + if (rate->pitch >= LINEAR_DIV) { + /* shift for expand linear interpolation */ + rate->pitch_shift = 0; + while ((rate->pitch >> rate->pitch_shift) >= (1 << 16)) + rate->pitch_shift++; + } recalc(pcm, &sparams->avail_min); rate->orig_avail_min = sparams->avail_min; recalc(pcm, &sparams->xfer_align); @@ -682,15 +754,8 @@ static int snd_pcm_rate_sw_params(snd_pcm_t *pcm, snd_pcm_sw_params_t * params) static int snd_pcm_rate_init(snd_pcm_t *pcm) { snd_pcm_rate_t *rate = pcm->private_data; - unsigned int k; switch (rate->type) { case RATE_TYPE_LINEAR: - for (k = 0; k < pcm->channels; ++k) { - rate->states[k].u.linear.sum = 0; - rate->states[k].u.linear.old_sample = 0; - rate->states[k].u.linear.new_sample = 0; - rate->states[k].u.linear.init = 0; - } break; default: assert(0); @@ -708,9 +773,7 @@ snd_pcm_rate_write_areas1(snd_pcm_t *pcm, snd_pcm_rate_t *rate = pcm->private_data; rate->func(slave_areas, slave_offset, rate->slave->period_size, areas, offset, pcm->period_size, - pcm->channels, - rate->get_idx, rate->put_idx, - rate->pitch, rate->states); + pcm->channels, rate); return 0; } @@ -724,9 +787,7 @@ snd_pcm_rate_read_areas1(snd_pcm_t *pcm, snd_pcm_rate_t *rate = pcm->private_data; rate->func(areas, offset, pcm->period_size, slave_areas, slave_offset, rate->slave->period_size, - pcm->channels, - rate->get_idx, rate->put_idx, - rate->pitch, rate->states); + pcm->channels, rate); return 0; } -- 2.47.1