]> git.alsa-project.org Git - alsa-lib.git/commitdiff
Performance optimization
authorTakashi Iwai <tiwai@suse.de>
Tue, 4 Jan 2005 13:57:51 +0000 (13:57 +0000)
committerTakashi Iwai <tiwai@suse.de>
Tue, 4 Jan 2005 13:57:51 +0000 (13:57 +0000)
The performance of rate conversion is optimized.
- with S16 format, no format conversion is done
- linear polation of expansion uses 16bit resolution

src/pcm/pcm_rate.c

index 13c963101855101dbb96606745013ced46da0366..b802bc2057952187828455dea5b83162eeb303c0 100644 (file)
@@ -54,16 +54,8 @@ enum rate_type {
        RATE_TYPE_POLYPHASE,            /* polyphase resampling */
 };
 
-typedef struct {
-       union {
-               struct {
-                       int init;
-                       int16_t old_sample, new_sample;
-                       int64_t sum;
-               } linear;
-       } u;
-} snd_pcm_rate_state_t;
+typedef struct _snd_pcm_rate snd_pcm_rate_t;
+
 typedef void (*rate_f)(const snd_pcm_channel_area_t *dst_areas,
                       snd_pcm_uframes_t dst_offset,
                       snd_pcm_uframes_t dst_frames,
@@ -71,11 +63,9 @@ typedef void (*rate_f)(const snd_pcm_channel_area_t *dst_areas,
                       snd_pcm_uframes_t src_offset,
                       snd_pcm_uframes_t src_frames,
                       unsigned int channels,
-                      unsigned int getidx, unsigned int putidx,
-                      unsigned int arg,
-                      snd_pcm_rate_state_t *states);
+                      snd_pcm_rate_t *rate);
 
-typedef struct {
+struct _snd_pcm_rate {
        snd_pcm_t *slave;
        int close_slave;
        snd_atomic_write_t watom;
@@ -86,72 +76,74 @@ typedef struct {
        unsigned int get_idx;
        unsigned int put_idx;
        unsigned int pitch;
+       unsigned int pitch_shift;       /* for expand interpolation */
        rate_f func;
        snd_pcm_format_t sformat;
        unsigned int srate;
-       snd_pcm_rate_state_t *states;
        snd_pcm_channel_area_t *pareas; /* areas for splitted period (rate pcm) */
        snd_pcm_channel_area_t *sareas; /* areas for splitted period (slave pcm) */
-} snd_pcm_rate_t;
+};
 
 static void snd_pcm_rate_expand(const snd_pcm_channel_area_t *dst_areas,
                                snd_pcm_uframes_t dst_offset, snd_pcm_uframes_t dst_frames,
                                const snd_pcm_channel_area_t *src_areas,
                                snd_pcm_uframes_t src_offset, snd_pcm_uframes_t src_frames,
                                unsigned int channels,
-                               unsigned int getidx, unsigned int putidx,
-                               unsigned int get_threshold,
-                               snd_pcm_rate_state_t *states)
+                               snd_pcm_rate_t *rate)
 {
 #define GET16_LABELS
 #define PUT16_LABELS
 #include "plugin_ops.h"
 #undef GET16_LABELS
 #undef PUT16_LABELS
-       void *get = get16_labels[getidx];
-       void *put = put16_labels[putidx];
+       void *get = get16_labels[rate->get_idx];
+       void *put = put16_labels[rate->put_idx];
+       unsigned int get_threshold = rate->pitch;
        unsigned int channel;
-       snd_pcm_uframes_t src_frames1 = 0;
-       snd_pcm_uframes_t dst_frames1 = 0;
+       snd_pcm_uframes_t src_frames1;
+       snd_pcm_uframes_t dst_frames1;
        int16_t sample = 0;
        
        for (channel = 0; channel < channels; ++channel) {
-               //int xpos = 0;
                const snd_pcm_channel_area_t *src_area = &src_areas[channel];
                const snd_pcm_channel_area_t *dst_area = &dst_areas[channel];
                const char *src;
                char *dst;
                int src_step, dst_step;
-               int16_t old_sample = states->u.linear.old_sample;
-               int16_t new_sample = states->u.linear.new_sample;
+               int16_t old_sample = 0;
+               int16_t new_sample = 0;
+               int old_weight, new_weight;
                unsigned int pos = 0;
+               int init;
                src = snd_pcm_channel_area_addr(src_area, src_offset);
                dst = snd_pcm_channel_area_addr(dst_area, dst_offset);
                src_step = snd_pcm_channel_area_step(src_area);
                dst_step = snd_pcm_channel_area_step(dst_area);
                src_frames1 = 0;
                dst_frames1 = 0;
-               if (!states->u.linear.init) {
-                       goto __force_get;
-               } else {
-                       states->u.linear.init = 2;
-               }
+               init = 1;
                while (dst_frames1 < dst_frames) {
-                       if (states->u.linear.init == 2) {
-                               old_sample = new_sample;
-                             __force_get:
-                               goto *get;
+                       if (pos >= get_threshold) {
+                               src += src_step;
+                               src_frames1++;
+                               if (src_frames1 < src_frames) {
+                                       old_sample = new_sample;
+                                       goto *get;
 #define GET16_END after_get
 #include "plugin_ops.h"
 #undef GET16_END
-                       after_get:
-                               new_sample = sample;
-                               if (!states->u.linear.init)
-                                       old_sample = sample;
-                               states->u.linear.init = 1;
+                               after_get:
+                                       new_sample = sample;
+                                       if (init) {
+                                               init = 0;
+                                               continue;
+                                       }
+                               }
+                               pos -= get_threshold;
                        }
-                       sample = (((int64_t)old_sample * (int64_t)(get_threshold - pos)) + ((int64_t)new_sample * pos)) / get_threshold;
-                       //printf("sample[%i] = %i (old_sample = %i, new_sample = %i)\n", xpos++, sample, old_sample, new_sample);
+                       new_weight = (pos << (16 - rate->pitch_shift)) / (get_threshold >> rate->pitch_shift);
+                       old_weight = 0x10000 - new_weight;
+                       sample = (old_sample * old_weight + new_sample * new_weight) >> 16;
                        goto *put;
 #define PUT16_END after_put
 #include "plugin_ops.h"
@@ -160,24 +152,58 @@ static void snd_pcm_rate_expand(const snd_pcm_channel_area_t *dst_areas,
                        dst += dst_step;
                        dst_frames1++;
                        pos += LINEAR_DIV;
+               } 
+       }
+}
+
+/* optimized version for S16 format */
+static void snd_pcm_rate_expand_s16(const snd_pcm_channel_area_t *dst_areas,
+                                   snd_pcm_uframes_t dst_offset, snd_pcm_uframes_t dst_frames,
+                                   const snd_pcm_channel_area_t *src_areas,
+                                   snd_pcm_uframes_t src_offset, snd_pcm_uframes_t src_frames,
+                                   unsigned int channels,
+                                   snd_pcm_rate_t *rate)
+{
+       unsigned int channel;
+       snd_pcm_uframes_t src_frames1;
+       snd_pcm_uframes_t dst_frames1;
+       unsigned int get_threshold = rate->pitch;
+       
+       for (channel = 0; channel < channels; ++channel) {
+               const snd_pcm_channel_area_t *src_area = &src_areas[channel];
+               const snd_pcm_channel_area_t *dst_area = &dst_areas[channel];
+               const int16_t *src;
+               int16_t *dst;
+               int src_step, dst_step;
+               int16_t old_sample;
+               int16_t new_sample;
+               int old_weight, new_weight;
+               unsigned int pos;
+               src = snd_pcm_channel_area_addr(src_area, src_offset);
+               dst = snd_pcm_channel_area_addr(dst_area, dst_offset);
+               src_step = snd_pcm_channel_area_step(src_area) >> 1;
+               dst_step = snd_pcm_channel_area_step(dst_area) >> 1;
+               src_frames1 = 0;
+               dst_frames1 = 0;
+               old_sample = new_sample = *src;
+               pos = get_threshold;
+               while (dst_frames1 < dst_frames) {
                        if (pos >= get_threshold) {
                                pos -= get_threshold;
                                src += src_step;
                                src_frames1++;
-                               states->u.linear.init = 2; /* get a new sample */
-                               //printf("new_src_pos = %i\n", (src - (char *)snd_pcm_channel_area_addr(src_area, src_offset)) / src_step);
-                               if (CHECK_SANITY(src_frames1 > src_frames)) {
-                                       SNDERR("src_frames overflow");
-                                       break;
+                               if (src_frames1 < src_frames) {
+                                       old_sample = new_sample;
+                                       new_sample = *src;
                                }
                        }
+                       new_weight = (pos << (16 - rate->pitch_shift)) / (get_threshold >> rate->pitch_shift);
+                       old_weight = 0x10000 - new_weight;
+                       *dst = (old_sample * old_weight + new_sample * new_weight) >> 16;
+                       dst += dst_step;
+                       dst_frames1++;
+                       pos += LINEAR_DIV;
                } 
-               if (dst_frames != dst_frames1) {
-                       SNDERR("dst_frames %lu, dst_frames1 %lu, src_frames %lu, src_frames1 %lu\n", dst_frames, dst_frames1, src_frames, src_frames1);
-               }
-               states->u.linear.old_sample = old_sample;
-               states->u.linear.new_sample = new_sample;
-               states++;
        }
 }
 
@@ -186,37 +212,32 @@ static void snd_pcm_rate_shrink(const snd_pcm_channel_area_t *dst_areas,
                                const snd_pcm_channel_area_t *src_areas,
                                snd_pcm_uframes_t src_offset, snd_pcm_uframes_t src_frames,
                                unsigned int channels,
-                               unsigned int getidx, unsigned int putidx,
-                               unsigned int get_increment,
-                               snd_pcm_rate_state_t *states)
+                               snd_pcm_rate_t *rate)
 {
 #define GET16_LABELS
 #define PUT16_LABELS
 #include "plugin_ops.h"
 #undef GET16_LABELS
 #undef PUT16_LABELS
-       void *get = get16_labels[getidx];
-       void *put = put16_labels[putidx];
+       void *get = get16_labels[rate->get_idx];
+       void *put = put16_labels[rate->put_idx];
+       unsigned int get_increment = rate->pitch;
        unsigned int channel;
-       snd_pcm_uframes_t src_frames1 = 0;
-       snd_pcm_uframes_t dst_frames1 = 0;
+       snd_pcm_uframes_t src_frames1;
+       snd_pcm_uframes_t dst_frames1;
        int16_t sample = 0;
-       unsigned int pos = 0;
+       unsigned int pos;
 
        for (channel = 0; channel < channels; ++channel) {
                const snd_pcm_channel_area_t *src_area = &src_areas[channel];
                const snd_pcm_channel_area_t *dst_area = &dst_areas[channel];
-               int64_t sum;
                const char *src;
                char *dst;
                int src_step, dst_step;
                int16_t old_sample = 0;
                int16_t new_sample = 0;
                int old_weight, new_weight;
-               sum = states->u.linear.sum;
-               //int16_t old_sample = states->u.linear.old_sample;
                pos = LINEAR_DIV - get_increment; /* Force first sample to be copied */
-               states->u.linear.init = 0;
                src = snd_pcm_channel_area_addr(src_area, src_offset);
                dst = snd_pcm_channel_area_addr(dst_area, dst_offset);
                src_step = snd_pcm_channel_area_step(src_area);
@@ -253,13 +274,59 @@ static void snd_pcm_rate_shrink(const snd_pcm_channel_area_t *dst_areas,
                        }
                        old_sample = new_sample;
                }
-
-               states->u.linear.sum = sum;
-               states->u.linear.old_sample = sample;
-               states++;
        }
-       if (dst_frames != dst_frames1) {
-               SNDERR("dst %lu, dst1 %lu, src %lu, src1 %lu pos = %u\n", dst_frames, dst_frames1, src_frames, src_frames1, pos );
+}
+
+/* optimized version for S16 format */
+static void snd_pcm_rate_shrink_s16(const snd_pcm_channel_area_t *dst_areas,
+                                   snd_pcm_uframes_t dst_offset, snd_pcm_uframes_t dst_frames,
+                                   const snd_pcm_channel_area_t *src_areas,
+                                   snd_pcm_uframes_t src_offset, snd_pcm_uframes_t src_frames,
+                                   unsigned int channels,
+                                   snd_pcm_rate_t *rate)
+{
+       unsigned int get_increment = rate->pitch;
+       unsigned int channel;
+       snd_pcm_uframes_t src_frames1;
+       snd_pcm_uframes_t dst_frames1;
+       unsigned int pos = 0;
+
+       for (channel = 0; channel < channels; ++channel) {
+               const snd_pcm_channel_area_t *src_area = &src_areas[channel];
+               const snd_pcm_channel_area_t *dst_area = &dst_areas[channel];
+               const int16_t *src;
+               int16_t *dst;
+               int src_step, dst_step;
+               int16_t old_sample = 0;
+               int16_t new_sample = 0;
+               int old_weight, new_weight;
+               pos = LINEAR_DIV - get_increment; /* Force first sample to be copied */
+               src = snd_pcm_channel_area_addr(src_area, src_offset);
+               dst = snd_pcm_channel_area_addr(dst_area, dst_offset);
+               src_step = snd_pcm_channel_area_step(src_area) >> 1;
+               dst_step = snd_pcm_channel_area_step(dst_area) >> 1 ;
+               src_frames1 = 0;
+               dst_frames1 = 0;
+               while (src_frames1 < src_frames) {
+                       
+                       new_sample = *src;
+                       src += src_step;
+                       src_frames1++;
+                       pos += get_increment;
+                       if (pos >= LINEAR_DIV) {
+                               pos -= LINEAR_DIV;
+                               old_weight = (pos << (32 - LINEAR_DIV_SHIFT)) / (get_increment >> (LINEAR_DIV_SHIFT - 16));
+                               new_weight = 0x10000 - old_weight;
+                               *dst = (old_sample * old_weight + new_sample * new_weight) >> 16;
+                               dst += dst_step;
+                               dst_frames1++;
+                               if (CHECK_SANITY(dst_frames1 > dst_frames)) {
+                                       SNDERR("dst_frames overflow");
+                                       break;
+                               }
+                       }
+                       old_sample = new_sample;
+               }
        }
 }
 
@@ -500,20 +567,23 @@ static int snd_pcm_rate_hw_params(snd_pcm_t *pcm, snd_pcm_hw_params_t * params)
        rate->get_idx = snd_pcm_linear_get_index(src_format, SND_PCM_FORMAT_S16);
        rate->put_idx = snd_pcm_linear_put_index(SND_PCM_FORMAT_S16, dst_format);
        if (src_rate < dst_rate) {
-               rate->func = snd_pcm_rate_expand;
+               if (src_format == dst_format && src_format == SND_PCM_FORMAT_S16)
+                       rate->func = snd_pcm_rate_expand_s16;
+               else
+                       rate->func = snd_pcm_rate_expand;
                /* pitch is get_threshold */
        } else {
-               rate->func = snd_pcm_rate_shrink;
+               if (src_format == dst_format && src_format == SND_PCM_FORMAT_S16)
+                       rate->func = snd_pcm_rate_shrink_s16;
+               else
+                       rate->func = snd_pcm_rate_shrink;
                /* pitch is get_increment */
        }
        rate->pitch = (((u_int64_t)dst_rate * LINEAR_DIV) + (src_rate / 2)) / src_rate;
-       if (CHECK_SANITY(rate->states || rate->pareas)) {
+       if (CHECK_SANITY(rate->pareas)) {
                SNDMSG("rate plugin already in use");
                return -EBUSY;
        }
-       rate->states = malloc(channels * sizeof(*rate->states));
-       if (rate->states == NULL)
-               return -ENOMEM;
        if ((buffer_size / period_size) * period_size == buffer_size &&
            (slave->buffer_size / slave->period_size) * slave->period_size == slave->buffer_size)
                return 0;
@@ -550,10 +620,6 @@ static int snd_pcm_rate_hw_free(snd_pcm_t *pcm)
                rate->pareas = NULL;
                rate->sareas = NULL;
        }
-       if (rate->states) {
-               free(rate->states);
-               rate->states = NULL;
-       }
        return snd_pcm_hw_free(rate->slave);
 }
 
@@ -657,6 +723,12 @@ static int snd_pcm_rate_sw_params(snd_pcm_t *pcm, snd_pcm_sw_params_t * params)
                        return -EIO;
                }
        }
+       if (rate->pitch >= LINEAR_DIV) {
+               /* shift for expand linear interpolation */
+               rate->pitch_shift = 0;
+               while ((rate->pitch >> rate->pitch_shift) >= (1 << 16))
+                       rate->pitch_shift++;
+       }
        recalc(pcm, &sparams->avail_min);
        rate->orig_avail_min = sparams->avail_min;
        recalc(pcm, &sparams->xfer_align);
@@ -682,15 +754,8 @@ static int snd_pcm_rate_sw_params(snd_pcm_t *pcm, snd_pcm_sw_params_t * params)
 static int snd_pcm_rate_init(snd_pcm_t *pcm)
 {
        snd_pcm_rate_t *rate = pcm->private_data;
-       unsigned int k;
        switch (rate->type) {
        case RATE_TYPE_LINEAR:
-               for (k = 0; k < pcm->channels; ++k) {
-                       rate->states[k].u.linear.sum = 0;
-                       rate->states[k].u.linear.old_sample = 0;
-                       rate->states[k].u.linear.new_sample = 0;
-                       rate->states[k].u.linear.init = 0;
-               }
                break;
        default:
                assert(0);
@@ -708,9 +773,7 @@ snd_pcm_rate_write_areas1(snd_pcm_t *pcm,
        snd_pcm_rate_t *rate = pcm->private_data;
        rate->func(slave_areas, slave_offset, rate->slave->period_size,
                   areas, offset, pcm->period_size,
-                  pcm->channels,
-                  rate->get_idx, rate->put_idx,
-                  rate->pitch, rate->states);
+                  pcm->channels, rate);
        return 0;
 }
 
@@ -724,9 +787,7 @@ snd_pcm_rate_read_areas1(snd_pcm_t *pcm,
        snd_pcm_rate_t *rate = pcm->private_data;
        rate->func(areas, offset, pcm->period_size,
                   slave_areas, slave_offset, rate->slave->period_size,
-                  pcm->channels,
-                  rate->get_idx, rate->put_idx,
-                  rate->pitch, rate->states);
+                  pcm->channels, rate);
        return 0;
 }