Implemented snd_pcm_rewind() for the dmix plugin
authorJaroslav Kysela <perex@perex.cz>
Thu, 10 Jan 2008 09:01:14 +0000 (10:01 +0100)
committerJaroslav Kysela <perex@perex.cz>
Thu, 10 Jan 2008 09:01:14 +0000 (10:01 +0100)
src/pcm/pcm_direct.h
src/pcm/pcm_dmix.c
src/pcm/pcm_dmix_generic.c
src/pcm/pcm_dmix_i386.c
src/pcm/pcm_dmix_i386.h
src/pcm/pcm_dmix_x86_64.c
src/pcm/pcm_dmix_x86_64.h

index 803329e..006617a 100644 (file)
@@ -162,6 +162,10 @@ struct snd_pcm_direct {
                        mix_areas_32_t *mix_areas_32;
                        mix_areas_24_t *mix_areas_24;
                        mix_areas_u8_t *mix_areas_u8;
+                       mix_areas_16_t *remix_areas_16;
+                       mix_areas_32_t *remix_areas_32;
+                       mix_areas_24_t *remix_areas_24;
+                       mix_areas_u8_t *remix_areas_u8;
                } dmix;
                struct {
                } dsnoop;
index 2e70b6c..e54cf92 100644 (file)
@@ -213,6 +213,70 @@ static void mix_areas(snd_pcm_direct_t *dmix,
        }
 }
 
+static void remix_areas(snd_pcm_direct_t *dmix,
+                       const snd_pcm_channel_area_t *src_areas,
+                       const snd_pcm_channel_area_t *dst_areas,
+                       snd_pcm_uframes_t src_ofs,
+                       snd_pcm_uframes_t dst_ofs,
+                       snd_pcm_uframes_t size)
+{
+       unsigned int src_step, dst_step;
+       unsigned int chn, dchn, channels, sample_size;
+       mix_areas_t *do_remix_areas;
+       
+       channels = dmix->channels;
+       switch (dmix->shmptr->s.format) {
+       case SND_PCM_FORMAT_S16_LE:
+       case SND_PCM_FORMAT_S16_BE:
+               sample_size = 2;
+               do_remix_areas = (mix_areas_t *)dmix->u.dmix.remix_areas_16;
+               break;
+       case SND_PCM_FORMAT_S32_LE:
+       case SND_PCM_FORMAT_S32_BE:
+               sample_size = 4;
+               do_remix_areas = (mix_areas_t *)dmix->u.dmix.remix_areas_32;
+               break;
+       case SND_PCM_FORMAT_S24_3LE:
+               sample_size = 3;
+               do_remix_areas = (mix_areas_t *)dmix->u.dmix.remix_areas_24;
+               break;
+       case SND_PCM_FORMAT_U8:
+               sample_size = 1;
+               do_remix_areas = (mix_areas_t *)dmix->u.dmix.remix_areas_u8;
+               break;
+       default:
+               return;
+       }
+       if (dmix->interleaved) {
+               /*
+                * process all areas in one loop
+                * it optimizes the memory accesses for this case
+                */
+               do_remix_areas(size * channels,
+                              (unsigned char *)dst_areas[0].addr + sample_size * dst_ofs * channels,
+                              (unsigned char *)src_areas[0].addr + sample_size * src_ofs * channels,
+                              dmix->u.dmix.sum_buffer + dst_ofs * channels,
+                              sample_size,
+                              sample_size,
+                              sizeof(signed int));
+               return;
+       }
+       for (chn = 0; chn < channels; chn++) {
+               dchn = dmix->bindings ? dmix->bindings[chn] : chn;
+               if (dchn >= dmix->shmptr->s.channels)
+                       continue;
+               src_step = src_areas[chn].step / 8;
+               dst_step = dst_areas[dchn].step / 8;
+               do_remix_areas(size,
+                              ((unsigned char *)dst_areas[dchn].addr + dst_areas[dchn].first / 8) + dst_ofs * dst_step,
+                              ((unsigned char *)src_areas[chn].addr + src_areas[chn].first / 8) + src_ofs * src_step,
+                              dmix->u.dmix.sum_buffer + channels * dst_ofs + chn,
+                              dst_step,
+                              src_step,
+                              channels * sizeof(signed int));
+       }
+}
+
 /*
  * if no concurrent access is allowed in the mixing routines, we need to protect
  * the area via semaphore
@@ -234,7 +298,7 @@ static void snd_pcm_dmix_sync_area(snd_pcm_t *pcm)
 {
        snd_pcm_direct_t *dmix = pcm->private_data;
        snd_pcm_uframes_t slave_hw_ptr, slave_appl_ptr, slave_size;
-       snd_pcm_uframes_t appl_ptr, size;
+       snd_pcm_uframes_t appl_ptr, size, transfer;
        const snd_pcm_channel_area_t *src_areas, *dst_areas;
        
        /* calculate the size to transfer */
@@ -247,6 +311,27 @@ static void snd_pcm_dmix_sync_area(snd_pcm_t *pcm)
        if (size >= pcm->boundary / 2)
                size = pcm->boundary - size;
 
+       /* the slave_app_ptr can be far behing the slave_hw_ptr */
+       /* reduce mixing and errors here - just skip not catched writes */
+       if (dmix->slave_hw_ptr < dmix->slave_appl_ptr)
+               slave_size = dmix->slave_appl_ptr - dmix->slave_hw_ptr;
+       else
+               slave_size = dmix->slave_appl_ptr + (dmix->slave_boundary - dmix->slave_hw_ptr);
+       if (slave_size > dmix->slave_buffer_size) {
+               transfer = dmix->slave_buffer_size - slave_size;
+               if (transfer > size)
+                       transfer = size;
+               dmix->last_appl_ptr += transfer;
+               dmix->last_appl_ptr %= pcm->boundary;
+               dmix->slave_appl_ptr += transfer;
+               dmix->slave_appl_ptr %= dmix->slave_boundary;
+               size = dmix->appl_ptr - dmix->last_appl_ptr;
+               if (! size)
+                       return;
+               if (size >= pcm->boundary / 2)
+                       size = pcm->boundary - size;
+       }
+
        /* check the available size in the slave PCM buffer */
        slave_hw_ptr = dmix->slave_hw_ptr;
        /* don't write on the last active period - this area may be cleared
@@ -276,7 +361,7 @@ static void snd_pcm_dmix_sync_area(snd_pcm_t *pcm)
        dmix->slave_appl_ptr %= dmix->slave_boundary;
        dmix_down_sem(dmix);
        for (;;) {
-               snd_pcm_uframes_t transfer = size;
+               transfer = size;
                if (appl_ptr + transfer > pcm->buffer_size)
                        transfer = pcm->buffer_size - appl_ptr;
                if (slave_appl_ptr + transfer > dmix->slave_buffer_size)
@@ -564,15 +649,78 @@ static int snd_pcm_dmix_pause(snd_pcm_t *pcm ATTRIBUTE_UNUSED, int enable ATTRIB
        return -EIO;
 }
 
-static snd_pcm_sframes_t snd_pcm_dmix_rewind(snd_pcm_t *pcm ATTRIBUTE_UNUSED, snd_pcm_uframes_t frames ATTRIBUTE_UNUSED)
+static snd_pcm_sframes_t snd_pcm_dmix_rewind(snd_pcm_t *pcm, snd_pcm_uframes_t frames)
 {
-#if 0
-       /* FIXME: substract samples from the mix ring buffer, too? */
+       snd_pcm_direct_t *dmix = pcm->private_data;
+       snd_pcm_uframes_t slave_appl_ptr, slave_size;
+       snd_pcm_uframes_t appl_ptr, size, transfer, result;
+       const snd_pcm_channel_area_t *src_areas, *dst_areas;
+
+       if (dmix->state == SND_PCM_STATE_RUNNING ||
+           dmix->state == SND_PCM_STATE_DRAINING)
+               return snd_pcm_dmix_hwsync(pcm);
+
+       if (dmix->last_appl_ptr < dmix->appl_ptr)
+               size = dmix->appl_ptr - dmix->last_appl_ptr;
+       else
+               size = dmix->appl_ptr + (pcm->boundary - dmix->last_appl_ptr);
+       if (frames < size)
+               size = frames;
+       snd_pcm_mmap_appl_backward(pcm, size);
+       frames -= size;
+       if (!frames)
+               return size;
+       result = size;
+
+       if (dmix->hw_ptr < dmix->appl_ptr)
+               size = dmix->appl_ptr - dmix->hw_ptr;
+       else
+               size = dmix->appl_ptr + (pcm->boundary - dmix->hw_ptr);
+       if (size > frames)
+               size = frames;
+       if (dmix->slave_hw_ptr < dmix->slave_appl_ptr)
+               slave_size = dmix->slave_appl_ptr - dmix->slave_hw_ptr;
+       else
+               slave_size = dmix->slave_appl_ptr + (pcm->boundary - dmix->slave_hw_ptr);
+       if (slave_size < size)
+               size = slave_size;
+       frames -= size;
+       result += size;
+               
+       /* add sample areas here */
+       src_areas = snd_pcm_mmap_areas(pcm);
+       dst_areas = snd_pcm_mmap_areas(dmix->spcm);
+       dmix->last_appl_ptr -= size;
+       dmix->last_appl_ptr %= pcm->boundary;
+       appl_ptr = dmix->last_appl_ptr % pcm->buffer_size;
+       dmix->slave_appl_ptr -= size;
+       dmix->slave_appl_ptr %= dmix->slave_boundary;
+       slave_appl_ptr = dmix->slave_appl_ptr % dmix->slave_buffer_size;
+       dmix_down_sem(dmix);
+       for (;;) {
+               transfer = size;
+               if (appl_ptr + transfer > pcm->buffer_size)
+                       transfer = pcm->buffer_size - appl_ptr;
+               if (slave_appl_ptr + transfer > dmix->slave_buffer_size)
+                       transfer = dmix->slave_buffer_size - slave_appl_ptr;
+               remix_areas(dmix, src_areas, dst_areas, appl_ptr, slave_appl_ptr, transfer);
+               size -= transfer;
+               if (! size)
+                       break;
+               slave_appl_ptr += transfer;
+               slave_appl_ptr %= dmix->slave_buffer_size;
+               appl_ptr += transfer;
+               appl_ptr %= pcm->buffer_size;
+       }
+       dmix->last_appl_ptr -= frames;
+       dmix->last_appl_ptr %= pcm->boundary;
+       dmix->slave_appl_ptr -= frames;
+       dmix->slave_appl_ptr %= dmix->slave_boundary;
+       dmix_up_sem(dmix);
+
        snd_pcm_mmap_appl_backward(pcm, frames);
-       return frames;
-#else
-       return -EIO;
-#endif
+
+       return result + frames;
 }
 
 static snd_pcm_sframes_t snd_pcm_dmix_forward(snd_pcm_t *pcm, snd_pcm_uframes_t frames)
index 9bef18d..29ea91a 100644 (file)
@@ -87,7 +87,7 @@ static void mix_areas_32(unsigned int size,
        register signed int sample, old_sample;
 
        for (;;) {
-               sample = *src / 256;
+               sample = *src >> 8;
                old_sample = *sum;
                if (ARCH_CMPXCHG(dst, 0, 1) == 0)
                        sample -= old_sample;
@@ -159,6 +159,37 @@ static void generic_mix_areas_16_native(unsigned int size,
        }
 }
 
+static void generic_remix_areas_16_native(unsigned int size,
+                                         volatile signed short *dst,
+                                         signed short *src,
+                                         volatile signed int *sum,
+                                         size_t dst_step,
+                                         size_t src_step,
+                                         size_t sum_step)
+{
+       register signed int sample;
+
+       for (;;) {
+               sample = *src;
+               if (! *dst) {
+                       *sum = -sample;
+                       *dst = *src;
+               } else {
+                       *sum = sample = *sum - sample;
+                       if (sample > 0x7fff)
+                               sample = 0x7fff;
+                       else if (sample < -0x8000)
+                               sample = -0x8000;
+                       *dst = sample;
+               }
+               if (!--size)
+                       return;
+               src = (signed short *) ((char *)src + src_step);
+               dst = (signed short *) ((char *)dst + dst_step);
+               sum = (signed int *)   ((char *)sum + sum_step);
+       }
+}
+
 static void generic_mix_areas_32_native(unsigned int size,
                                        volatile signed int *dst,
                                        signed int *src,
@@ -170,7 +201,7 @@ static void generic_mix_areas_32_native(unsigned int size,
        register signed int sample;
 
        for (;;) {
-               sample = *src / 256;
+               sample = *src >> 8;
                if (! *dst) {
                        *sum = sample;
                        *dst = *src;
@@ -193,6 +224,39 @@ static void generic_mix_areas_32_native(unsigned int size,
        }
 }
 
+static void generic_remix_areas_32_native(unsigned int size,
+                                         volatile signed int *dst,
+                                         signed int *src,
+                                         volatile signed int *sum,
+                                         size_t dst_step,
+                                         size_t src_step,
+                                         size_t sum_step)
+{
+       register signed int sample;
+
+       for (;;) {
+               sample = *src >> 8;
+               if (! *dst) {
+                       *sum = -sample;
+                       *dst = *src;
+               } else {
+                       *sum = sample = *sum - sample;
+                       if (sample > 0x7fffff)
+                               sample = 0x7fffffff;
+                       else if (sample < -0x800000)
+                               sample = -0x80000000;
+                       else
+                               sample *= 256;
+                       *dst = sample;
+               }
+               if (!--size)
+                       return;
+               src = (signed int *) ((char *)src + src_step);
+               dst = (signed int *) ((char *)dst + dst_step);
+               sum = (signed int *) ((char *)sum + sum_step);
+       }
+}
+
 static void generic_mix_areas_16_swap(unsigned int size,
                                      volatile signed short *dst,
                                      signed short *src,
@@ -225,6 +289,37 @@ static void generic_mix_areas_16_swap(unsigned int size,
        }
 }
 
+static void generic_remix_areas_16_swap(unsigned int size,
+                                       volatile signed short *dst,
+                                       signed short *src,
+                                       volatile signed int *sum,
+                                       size_t dst_step,
+                                       size_t src_step,
+                                       size_t sum_step)
+{
+       register signed int sample;
+
+       for (;;) {
+               sample = (signed short) bswap_16(*src);
+               if (! *dst) {
+                       *sum = -sample;
+                       *dst = *src;
+               } else {
+                       *sum = sample = *sum - sample;
+                       if (sample > 0x7fff)
+                               sample = 0x7fff;
+                       else if (sample < -0x8000)
+                               sample = -0x8000;
+                       *dst = (signed short) bswap_16((signed short) sample);
+               }
+               if (!--size)
+                       return;
+               src = (signed short *) ((char *)src + src_step);
+               dst = (signed short *) ((char *)dst + dst_step);
+               sum = (signed int *)   ((char *)sum + sum_step);
+       }
+}
+
 static void generic_mix_areas_32_swap(unsigned int size,
                                      volatile signed int *dst,
                                      signed int *src,
@@ -236,7 +331,7 @@ static void generic_mix_areas_32_swap(unsigned int size,
        register signed int sample;
 
        for (;;) {
-               sample = bswap_32(*src) / 256;
+               sample = bswap_32(*src) >> 8;
                if (! *dst) {
                        *sum = sample;
                        *dst = *src;
@@ -259,6 +354,39 @@ static void generic_mix_areas_32_swap(unsigned int size,
        }
 }
 
+static void generic_remix_areas_32_swap(unsigned int size,
+                                       volatile signed int *dst,
+                                       signed int *src,
+                                       volatile signed int *sum,
+                                       size_t dst_step,
+                                       size_t src_step,
+                                       size_t sum_step)
+{
+       register signed int sample;
+
+       for (;;) {
+               sample = bswap_32(*src) >> 8;
+               if (! *dst) {
+                       *sum = -sample;
+                       *dst = *src;
+               } else {
+                       *sum = sample = *sum - sample;
+                       if (sample > 0x7fffff)
+                               sample = 0x7fffffff;
+                       else if (sample < -0x800000)
+                               sample = -0x80000000;
+                       else
+                               sample *= 256;
+                       *dst = bswap_32(sample);
+               }
+               if (!--size)
+                       return;
+               src = (signed int *) ((char *)src + src_step);
+               dst = (signed int *) ((char *)dst + dst_step);
+               sum = (signed int *) ((char *)sum + sum_step);
+       }
+}
+
 /* always little endian */
 static void generic_mix_areas_24(unsigned int size,
                                 volatile unsigned char *dst,
@@ -293,6 +421,38 @@ static void generic_mix_areas_24(unsigned int size,
        }
 }
 
+static void generic_remix_areas_24(unsigned int size,
+                                  volatile unsigned char *dst,
+                                  unsigned char *src,
+                                  volatile signed int *sum,
+                                  size_t dst_step,
+                                  size_t src_step,
+                                  size_t sum_step)
+{
+       register signed int sample;
+
+       for (;;) {
+               sample = src[0] | (src[1] << 8) | (((signed char *)src)[2] << 16);
+               if (!(dst[0] | dst[1] | dst[2])) {
+                       *sum = -sample;
+               } else {
+                       *sum = sample = *sum - sample;
+                       if (sample > 0x7fffff)
+                               sample = 0x7fffff;
+                       else if (sample < -0x800000)
+                               sample = -0x800000;
+               }
+               dst[0] = sample;
+               dst[1] = sample >> 8;
+               dst[2] = sample >> 16;
+               if (!--size)
+                       return;
+               dst += dst_step;
+               src += src_step;
+               sum = (signed int *) ((char *)sum + sum_step);
+       }
+}
+
 static void generic_mix_areas_u8(unsigned int size,
                                 volatile unsigned char *dst,
                                 unsigned char *src,
@@ -322,18 +482,52 @@ static void generic_mix_areas_u8(unsigned int size,
        }
 }
 
+static void generic_remix_areas_u8(unsigned int size,
+                                  volatile unsigned char *dst,
+                                  unsigned char *src,
+                                  volatile signed int *sum,
+                                  size_t dst_step,
+                                  size_t src_step,
+                                  size_t sum_step)
+{
+       for (;;) {
+               register int sample = *src - 0x80;
+               if (*dst == 0x80) {
+                       *sum = -sample;
+               } else {
+                       *sum = sample = *sum - sample;
+                       if (sample > 0x7f)
+                               sample = 0x7f;
+                       else if (sample < -0x80)
+                               sample = -0x80;
+               }
+               *dst = sample + 0x80;
+               if (!--size)
+                       return;
+               dst += dst_step;
+               src += src_step;
+               sum = (signed int *) ((char *)sum + sum_step);
+       }
+}
+
 
 static void generic_mix_select_callbacks(snd_pcm_direct_t *dmix)
 {
        if (snd_pcm_format_cpu_endian(dmix->shmptr->s.format)) {
                dmix->u.dmix.mix_areas_16 = generic_mix_areas_16_native;
                dmix->u.dmix.mix_areas_32 = generic_mix_areas_32_native;
+               dmix->u.dmix.remix_areas_16 = generic_remix_areas_16_native;
+               dmix->u.dmix.remix_areas_32 = generic_remix_areas_32_native;
        } else {
                dmix->u.dmix.mix_areas_16 = generic_mix_areas_16_swap;
                dmix->u.dmix.mix_areas_32 = generic_mix_areas_32_swap;
+               dmix->u.dmix.remix_areas_16 = generic_remix_areas_16_swap;
+               dmix->u.dmix.remix_areas_32 = generic_remix_areas_32_swap;
        }
        dmix->u.dmix.mix_areas_24 = generic_mix_areas_24;
        dmix->u.dmix.mix_areas_u8 = generic_mix_areas_u8;
+       dmix->u.dmix.remix_areas_24 = generic_remix_areas_24;
+       dmix->u.dmix.remix_areas_u8 = generic_remix_areas_u8;
 }
 
 #endif
index 8fbd6b9..68c4755 100644 (file)
@@ -8,6 +8,8 @@
 #define MIX_AREAS_24 mix_areas_24
 #define MIX_AREAS_24_CMOV mix_areas_24_cmov
 #define LOCK_PREFIX ""
+#define XADD "addl"
+#define XSUB "subl"
 #include "pcm_dmix_i386.h"
 #undef MIX_AREAS_16
 #undef MIX_AREAS_16_MMX
 #undef MIX_AREAS_24
 #undef MIX_AREAS_24_CMOV
 #undef LOCK_PREFIX
+#undef XADD
+#undef XSUB
+
+#define MIX_AREAS_16 remix_areas_16
+#define MIX_AREAS_16_MMX remix_areas_16_mmx
+#define MIX_AREAS_32 remix_areas_32
+#define MIX_AREAS_24 remix_areas_24
+#define MIX_AREAS_24_CMOV remix_areas_24_cmov
+#define LOCK_PREFIX ""
+#define XADD "subl"
+#define XSUB "addl"
+#include "pcm_dmix_i386.h"
+#undef MIX_AREAS_16
+#undef MIX_AREAS_16_MMX
+#undef MIX_AREAS_32
+#undef MIX_AREAS_24
+#undef MIX_AREAS_24_CMOV
+#undef LOCK_PREFIX
+#undef XADD
+#undef XSUB
 
 #define MIX_AREAS_16 mix_areas_16_smp
 #define MIX_AREAS_16_MMX mix_areas_16_smp_mmx
 #define MIX_AREAS_24 mix_areas_24_smp
 #define MIX_AREAS_24_CMOV mix_areas_24_smp_cmov
 #define LOCK_PREFIX "lock ; "
+#define XADD "addl"
+#define XSUB "subl"
+#include "pcm_dmix_i386.h"
+#undef MIX_AREAS_16
+#undef MIX_AREAS_16_MMX
+#undef MIX_AREAS_32
+#undef MIX_AREAS_24
+#undef MIX_AREAS_24_CMOV
+#undef LOCK_PREFIX
+#undef XADD
+#undef XSUB
+#define MIX_AREAS_16 remix_areas_16_smp
+#define MIX_AREAS_16_MMX remix_areas_16_smp_mmx
+#define MIX_AREAS_32 remix_areas_32_smp
+#define MIX_AREAS_24 remix_areas_24_smp
+#define MIX_AREAS_24_CMOV remix_areas_24_smp_cmov
+#define LOCK_PREFIX "lock ; "
+#define XADD "subl"
+#define XSUB "addl"
 #include "pcm_dmix_i386.h"
 #undef MIX_AREAS_16
 #undef MIX_AREAS_16_MMX
@@ -29,6 +71,8 @@
 #undef MIX_AREAS_24
 #undef MIX_AREAS_24_CMOV
 #undef LOCK_PREFIX
+#undef XADD
+#undef XSUB
  
 #define i386_dmix_supported_format \
        ((1ULL << SND_PCM_FORMAT_S16_LE) |\
@@ -71,13 +115,18 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix)
        
        if (mmx) {
                dmix->u.dmix.mix_areas_16 = smp > 1 ? mix_areas_16_smp_mmx : mix_areas_16_mmx;
+               dmix->u.dmix.remix_areas_16 = smp > 1 ? remix_areas_16_smp_mmx : remix_areas_16_mmx;
        } else {
                dmix->u.dmix.mix_areas_16 = smp > 1 ? mix_areas_16_smp : mix_areas_16;
+               dmix->u.dmix.remix_areas_16 = smp > 1 ? remix_areas_16_smp : remix_areas_16;
        }
        dmix->u.dmix.mix_areas_32 = smp > 1 ? mix_areas_32_smp : mix_areas_32;
+       dmix->u.dmix.remix_areas_32 = smp > 1 ? remix_areas_32_smp : remix_areas_32;
        if (cmov) {
                dmix->u.dmix.mix_areas_24 = smp > 1 ? mix_areas_24_smp_cmov : mix_areas_24_cmov;
+               dmix->u.dmix.remix_areas_24 = smp > 1 ? remix_areas_24_smp_cmov : remix_areas_24_cmov;
        } else {
                dmix->u.dmix.mix_areas_24 = smp > 1 ? mix_areas_24_smp: mix_areas_24;
+               dmix->u.dmix.remix_areas_24 = smp > 1 ? remix_areas_24_smp: remix_areas_24;
        }
 }
index 6fa4440..1ae037a 100644 (file)
@@ -83,9 +83,9 @@ static void MIX_AREAS_16(unsigned int size,
                "\t" LOCK_PREFIX "cmpxchgw %%cx, (%%edi)\n"
                "\tmovswl (%%esi), %%ecx\n"
                "\tjnz 3f\n"
-               "\tsubl %%edx, %%ecx\n"
+               "\t" XSUB " %%edx, %%ecx\n"
                "3:"
-               "\t" LOCK_PREFIX "addl %%ecx, (%%ebx)\n"
+               "\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
 
                /*
                 *   do {
@@ -202,9 +202,9 @@ static void MIX_AREAS_16_MMX(unsigned int size,
                "\t" LOCK_PREFIX "cmpxchgw %%cx, (%%edi)\n"
                "\tmovswl (%%esi), %%ecx\n"
                "\tjnz 3f\n"
-               "\tsubl %%edx, %%ecx\n"
+               "\t" XSUB " %%edx, %%ecx\n"
                "3:"
-               "\t" LOCK_PREFIX "addl %%ecx, (%%ebx)\n"
+               "\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
 
                /*
                 *   do {
@@ -291,14 +291,14 @@ static void MIX_AREAS_32(unsigned int size,
                "\tmovl (%%esi), %%ecx\n"
                /* sample >>= 8 */
                "\tsarl $8, %%ecx\n"
-               "\tsubl %%edx, %%ecx\n"
+               "\t" XSUB " %%edx, %%ecx\n"
                "\tjmp 21f\n"
                "2:"
                "\tmovl (%%esi), %%ecx\n"
                /* sample >>= 8 */
                "\tsarl $8, %%ecx\n"
                "21:"
-               "\t" LOCK_PREFIX "addl %%ecx, (%%ebx)\n"
+               "\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
 
                /*
                 *   do {
@@ -403,9 +403,9 @@ static void MIX_AREAS_24(unsigned int size,
                "\t" LOCK_PREFIX "btsl $0, (%%edi)\n"
                "\tleal (%%ecx,%%eax,1), %%ecx\n"
                "\tjc 2f\n"
-               "\tsubl %%edx, %%ecx\n"
+               "\t" XSUB " %%edx, %%ecx\n"
                "2:"
-               "\t" LOCK_PREFIX "addl %%ecx, (%%ebx)\n"
+               "\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
 
                /*
                 *   do {
@@ -508,9 +508,9 @@ static void MIX_AREAS_24_CMOV(unsigned int size,
                "\t" LOCK_PREFIX "btsl $0, (%%edi)\n"
                "\tleal (%%ecx,%%eax,1), %%ecx\n"
                "\tjc 2f\n"
-               "\tsubl %%edx, %%ecx\n"
+               "\t" XSUB " %%edx, %%ecx\n"
                "2:"
-               "\t" LOCK_PREFIX "addl %%ecx, (%%ebx)\n"
+               "\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
 
                /*
                 *   do {
index 1dea9c9..e7f9e5a 100644 (file)
@@ -6,21 +6,57 @@
 #define MIX_AREAS_32 mix_areas_32
 #define MIX_AREAS_24 mix_areas_24
 #define LOCK_PREFIX ""
+#define XADD "addl"
+#define XSUB "subl"
 #include "pcm_dmix_x86_64.h"
 #undef MIX_AREAS_16
 #undef MIX_AREAS_32
 #undef MIX_AREAS_24
 #undef LOCK_PREFIX
+#undef XADD
+#undef XSUB
+
+#define MIX_AREAS_16 remix_areas_16
+#define MIX_AREAS_32 remix_areas_32
+#define MIX_AREAS_24 remix_areas_24
+#define LOCK_PREFIX ""
+#define XADD "subl"
+#define XSUB "addl"
+#include "pcm_dmix_x86_64.h"
+#undef MIX_AREAS_16
+#undef MIX_AREAS_32
+#undef MIX_AREAS_24
+#undef LOCK_PREFIX
+#undef XADD
+#undef XSUB
 
 #define MIX_AREAS_16 mix_areas_16_smp
 #define MIX_AREAS_32 mix_areas_32_smp
 #define MIX_AREAS_24 mix_areas_24_smp
 #define LOCK_PREFIX "lock ; "
+#define XADD "addl"
+#define XSUB "subl"
+#include "pcm_dmix_x86_64.h"
+#undef MIX_AREAS_16
+#undef MIX_AREAS_32
+#undef MIX_AREAS_24
+#undef LOCK_PREFIX
+#undef XADD
+#undef XSUB
+#define MIX_AREAS_16 remix_areas_16_smp
+#define MIX_AREAS_32 remix_areas_32_smp
+#define MIX_AREAS_24 remix_areas_24_smp
+#define LOCK_PREFIX "lock ; "
+#define XADD "subl"
+#define XSUB "addl"
 #include "pcm_dmix_x86_64.h"
 #undef MIX_AREAS_16
 #undef MIX_AREAS_32
 #undef MIX_AREAS_24
 #undef LOCK_PREFIX
+#undef XADD
+#undef XSUB
  
 #define x86_64_dmix_supported_format \
        ((1ULL << SND_PCM_FORMAT_S16_LE) |\
index 6ad8133..4562734 100644 (file)
@@ -79,9 +79,9 @@ static void MIX_AREAS_16(unsigned int size,
                "\t" LOCK_PREFIX "cmpxchgw %%cx, (%%rdi)\n"
                "\tmovswl (%%rsi), %%ecx\n"
                "\tjnz 2f\n"
-               "\tsubl %%edx, %%ecx\n"
+               "\t" XSUB " %%edx, %%ecx\n"
                "2:"
-               "\t" LOCK_PREFIX "addl %%ecx, (%%rbx)\n"
+               "\t" LOCK_PREFIX XADD " %%ecx, (%%rbx)\n"
 
                /*
                 *   do {
@@ -176,14 +176,14 @@ static void MIX_AREAS_32(unsigned int size,
                "\tmovl (%%rsi), %%ecx\n"
                /* sample >>= 8 */
                "\tsarl $8, %%ecx\n"
-               "\tsubl %%edx, %%ecx\n"
+               "\t" XSUB " %%edx, %%ecx\n"
                "\tjmp 21f\n"
                "2:"
                "\tmovl (%%rsi), %%ecx\n"
                /* sample >>= 8 */
                "\tsarl $8, %%ecx\n"
                "21:"
-               "\t" LOCK_PREFIX "addl %%ecx, (%%rbx)\n"
+               "\t" LOCK_PREFIX XADD " %%ecx, (%%rbx)\n"
 
                /*
                 *   do {
@@ -290,9 +290,9 @@ static void MIX_AREAS_24(unsigned int size,
                "\t" LOCK_PREFIX "btsl $0, (%%rdi)\n"
                "\t.byte 0x67, 0x8d, 0x0c, 0x01\n"
                "\tjc 2f\n"
-               "\tsubl %%edx, %%ecx\n"
+               "\t" XSUB " %%edx, %%ecx\n"
                "2:"
-               "\t" LOCK_PREFIX "addl %%ecx, (%%rbx)\n"
+               "\t" LOCK_PREFIX XADD " %%ecx, (%%rbx)\n"
 
                /*
                 *   do {