]> git.alsa-project.org Git - alsa-lib.git/commitdiff
dmix: add S24_3LE support
authorClemens Ladisch <clemens@ladisch.de>
Mon, 19 Dec 2005 07:39:03 +0000 (07:39 +0000)
committerClemens Ladisch <clemens@ladisch.de>
Mon, 19 Dec 2005 07:39:03 +0000 (07:39 +0000)
Add to the dmix plugin support for the S24_3LE sample format which is
used by 24-bit USB devices.

The optimized assembler version uses only 23 bits for sample data so
that the lowest bit can be used for synchronization because there is no
24-bit cmpxchg instruction.

src/pcm/pcm_direct.c
src/pcm/pcm_direct.h
src/pcm/pcm_dmix.c
src/pcm/pcm_dmix_generic.c
src/pcm/pcm_dmix_i386.c
src/pcm/pcm_dmix_i386.h
src/pcm/pcm_dmix_x86_64.c
src/pcm/pcm_dmix_x86_64.h

index 43eb6a48c7ad4de6aabbc2e11bfa71b3c20ce1de..535cc29fc80b2f55ab88bee716c6869e7d872ab7 100644 (file)
@@ -800,6 +800,7 @@ int snd_pcm_direct_initialize_slave(snd_pcm_direct_t *dmix, snd_pcm_t *spcm, str
                        case SND_PCM_FORMAT_S32_BE:
                        case SND_PCM_FORMAT_S16_LE:
                        case SND_PCM_FORMAT_S16_BE:
+                       case SND_PCM_FORMAT_S24_3LE:
                                break;
                        default:
                                SNDERR("invalid format");
index 4455982781ee7f74d2a84d7c4d6b1e05762c0b70..6588fa5601909740faaae5b1903cfd0028094a53 100644 (file)
@@ -34,6 +34,11 @@ typedef void (mix_areas2_t)(unsigned int size,
                        volatile signed int *sum, size_t dst_step,
                        size_t src_step, size_t sum_step);
 
+typedef void (mix_areas3_t)(unsigned int size,
+                       volatile unsigned char *dst, unsigned char *src,
+                       volatile signed int *sum, size_t dst_step,
+                       size_t src_step, size_t sum_step);
+
 struct slave_params {
        snd_pcm_format_t format;
        int rate;
@@ -120,6 +125,7 @@ struct snd_pcm_direct {
                        signed int *sum_buffer;         /* shared sum buffer */
                        mix_areas1_t *mix_areas1;
                        mix_areas2_t *mix_areas2;
+                       mix_areas3_t *mix_areas3;
                } dmix;
                struct {
                } dsnoop;
index 2f35d32eb41b0adee5c82a236df65d47ce0dfc73..1f1a3831c63145abe7c22b92dd9d519277ce3cb9 100644 (file)
@@ -188,7 +188,8 @@ static void mix_areas(snd_pcm_direct_t *dmix,
                        sum = dmix->u.dmix.sum_buffer + channels * dst_ofs + chn;
                        dmix->u.dmix.mix_areas1(size, dst, src, sum, dst_step, src_step, channels * sizeof(signed int));
                }
-       } else {
+       } else if (dmix->shmptr->s.format == SND_PCM_FORMAT_S32_LE ||
+                  dmix->shmptr->s.format == SND_PCM_FORMAT_S32_BE) {
                signed int *src;
                volatile signed int *dst;
                if (dmix->interleaved) {
@@ -216,6 +217,32 @@ static void mix_areas(snd_pcm_direct_t *dmix,
                        sum = dmix->u.dmix.sum_buffer + channels * dst_ofs + chn;
                        dmix->u.dmix.mix_areas2(size, dst, src, sum, dst_step, src_step, channels * sizeof(signed int));
                }
+       } else { /* SND_PCM_FORMAT_S24_3LE */
+               unsigned char *src;
+               volatile unsigned char *dst;
+               if (dmix->interleaved) {
+                       /*
+                        * process all areas in one loop
+                        * it optimizes the memory accesses for this case
+                        */
+                       dmix->u.dmix.mix_areas3(size * channels,
+                                       ((char *)dst_areas[0].addr) + 3 * dst_ofs * channels,
+                                       ((char *)src_areas[0].addr) + 3 * src_ofs * channels,
+                                       dmix->u.dmix.sum_buffer + (dst_ofs * channels),
+                                       3, 3, sizeof(signed int));
+                       return;
+               }
+               for (chn = 0; chn < channels; chn++) {
+                       dchn = dmix->bindings ? dmix->bindings[chn] : chn;
+                       if (dchn >= dmix->shmptr->s.channels)
+                               continue;
+                       src_step = src_areas[chn].step / 8;
+                       dst_step = dst_areas[dchn].step / 8;
+                       src = (unsigned char *)(((char *)src_areas[chn].addr + src_areas[chn].first / 8) + (src_ofs * src_step));
+                       dst = (unsigned char *)(((char *)dst_areas[dchn].addr + dst_areas[dchn].first / 8) + (dst_ofs * dst_step));
+                       sum = dmix->u.dmix.sum_buffer + channels * dst_ofs + chn;
+                       dmix->u.dmix.mix_areas3(size, dst, src, sum, dst_step, src_step, channels * sizeof(signed int));
+               }
        }
 }
 
index ed6ebe6f1d409e7b58fb2cad7fbc9936a3dbb24f..4e45ba893a11862394c9b2bf829d51bfb1439648 100644 (file)
@@ -121,7 +121,8 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix)
 /* non-concurrent version, supporting both endians */
 static unsigned long long dmix_supported_format =
        (1ULL << SND_PCM_FORMAT_S16_LE) | (1ULL << SND_PCM_FORMAT_S32_LE) |
-       (1ULL << SND_PCM_FORMAT_S16_BE) | (1ULL << SND_PCM_FORMAT_S32_BE);
+       (1ULL << SND_PCM_FORMAT_S16_BE) | (1ULL << SND_PCM_FORMAT_S32_BE) |
+       (1ULL << SND_PCM_FORMAT_S24_3LE);
 
 #include <byteswap.h>
 
@@ -245,6 +246,37 @@ static void mix_areas2_swap(unsigned int size,
        }
 }
 
+/* always little endian */
+static void mix_areas3(unsigned int size,
+                      volatile unsigned char *dst, unsigned char *src,
+                      volatile signed int *sum, size_t dst_step,
+                      size_t src_step, size_t sum_step)
+{
+       register signed int sample;
+
+       for (;;) {
+               sample = src[0] | (src[1] << 8) | (((signed char *)src)[2] << 16);
+               if (!(dst[0] | dst[1] | dst[2])) {
+                       *sum = sample;
+               } else {
+                       sample += *sum;
+                       *sum = sample;
+                       if (sample > 0x7fffff)
+                               sample = 0x7fffff;
+                       else if (sample < -0x800000)
+                               sample = -0x800000;
+               }
+               dst[0] = sample;
+               dst[1] = sample >> 8;
+               dst[2] = sample >> 16;
+               if (!--size)
+                       return;
+               dst += dst_step;
+               src += src_step;
+               sum = (signed int *) ((char *)sum + sum_step);
+       }
+}
+
 
 static void mix_select_callbacks(snd_pcm_direct_t *dmix)
 {
@@ -255,6 +287,7 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix)
                dmix->u.dmix.mix_areas1 = mix_areas1_swap;
                dmix->u.dmix.mix_areas2 = mix_areas2_swap;
        }
+       dmix->u.dmix.mix_areas3 = mix_areas3;
 }
 
 #endif
index 7cbf723dcd7165664ddb1f27720588f2d280ea69..3ea206c633ba54c812e9087285dcff8801de2881 100644 (file)
@@ -5,33 +5,43 @@
 #define MIX_AREAS1 mix_areas1
 #define MIX_AREAS1_MMX mix_areas1_mmx
 #define MIX_AREAS2 mix_areas2
+#define MIX_AREAS3 mix_areas3
+#define MIX_AREAS3_CMOV mix_areas3_cmov
 #define LOCK_PREFIX ""
 #include "pcm_dmix_i386.h"
 #undef MIX_AREAS1
 #undef MIX_AREAS1_MMX
 #undef MIX_AREAS2
+#undef MIX_AREAS3
+#undef MIX_AREAS3_CMOV
 #undef LOCK_PREFIX
 
 #define MIX_AREAS1 mix_areas1_smp
 #define MIX_AREAS1_MMX mix_areas1_smp_mmx
 #define MIX_AREAS2 mix_areas2_smp
+#define MIX_AREAS3 mix_areas3_smp
+#define MIX_AREAS3_CMOV mix_areas3_smp_cmov
 #define LOCK_PREFIX "lock ; "
 #include "pcm_dmix_i386.h"
 #undef MIX_AREAS1
 #undef MIX_AREAS1_MMX
 #undef MIX_AREAS2
+#undef MIX_AREAS3
+#undef MIX_AREAS3_CMOV
 #undef LOCK_PREFIX
  
 static unsigned long long dmix_supported_format =
-       (1ULL << SND_PCM_FORMAT_S16_LE) | (1ULL << SND_PCM_FORMAT_S32_LE);
+       (1ULL << SND_PCM_FORMAT_S16_LE) |
+       (1ULL << SND_PCM_FORMAT_S32_LE) |
+       (1ULL << SND_PCM_FORMAT_S24_3LE);
 
 static void mix_select_callbacks(snd_pcm_direct_t *dmix)
 {
        FILE *in;
        char line[255];
-       int smp = 0, mmx = 0;
+       int smp = 0, mmx = 0, cmov = 0;
        
-       /* try to determine, if we have a MMX capable CPU */
+       /* try to determine the capabilities of the CPU */
        in = fopen("/proc/cpuinfo", "r");
        if (in) {
                while (!feof(in)) {
@@ -41,15 +51,21 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix)
                        else if (!strncmp(line, "flags", 5)) {
                                if (strstr(line, " mmx"))
                                        mmx = 1;
+                               if (strstr(line, " cmov"))
+                                       cmov = 1;
                        }
                }
                fclose(in);
        }
-       // printf("MMX: %i, SMP: %i\n", mmx, smp);
        if (mmx) {
                dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp_mmx : mix_areas1_mmx;
        } else {
                dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp : mix_areas1;
        }
        dmix->u.dmix.mix_areas2 = smp > 1 ? mix_areas2_smp : mix_areas2;
+       if (cmov) {
+               dmix->u.dmix.mix_areas3 = smp > 1 ? mix_areas3_smp_cmov : mix_areas3_cmov;
+       } else {
+               dmix->u.dmix.mix_areas3 = smp > 1 ? mix_areas3_smp: mix_areas3;
+       }
 }
index aaa04ffd2391d8636ea38bfa7f60cd9cbc0fb395..6875b31175507a5e9f561f9b5f7eb10dd550db91 100644 (file)
@@ -352,3 +352,208 @@ static void MIX_AREAS2(unsigned int size,
                : "esi", "edi", "edx", "ecx", "eax"
        );
 }
+
+/*
+ * 24-bit version for plain i386
+ */
+static void MIX_AREAS3(unsigned int size,
+                      volatile unsigned char *dst, unsigned char *src,
+                      volatile signed int *sum, size_t dst_step,
+                      size_t src_step, size_t sum_step)
+{
+       unsigned int old_ebx;
+
+       /*
+        *  ESI - src
+        *  EDI - dst
+        *  EBX - sum
+        *  ECX - old sample
+        *  EAX - sample / temporary
+        *  EDX - temporary
+        */
+       __asm__ __volatile__ (
+               "\n"
+
+               "\tmovl %%ebx, %7\n"    /* ebx is GOT pointer (-fPIC) */
+               /*
+                *  initialization, load ESI, EDI, EBX registers
+                */
+               "\tmovl %1, %%edi\n"
+               "\tmovl %2, %%esi\n"
+               "\tmovl %3, %%ebx\n"
+               "\tcmpl $0, %0\n"
+               "\tjnz 1f\n"
+               "\tjmp 6f\n"
+
+               "\t.p2align 4,,15\n"
+
+               "1:"
+
+               /*
+                *   sample = *src;
+                *   sum_sample = *sum;
+                *   if (test_and_set_bit(0, dst) == 0)
+                *     sample -= sum_sample;
+                *   *sum += sample;
+                */
+               "\tmovsbl 2(%%esi), %%eax\n"
+               "\tmovzwl (%%esi), %%ecx\n"
+               "\tmovl (%%ebx), %%edx\n"
+               "\tsall $16, %%eax\n"
+               "\t" LOCK_PREFIX "btsl $0, (%%edi)\n"
+               "\tleal (%%ecx,%%eax,1), %%ecx\n"
+               "\tjc 2f\n"
+               "\tsubl %%edx, %%ecx\n"
+               "2:"
+               "\t" LOCK_PREFIX "addl %%ecx, (%%ebx)\n"
+
+               /*
+                *   do {
+                *     sample = old_sample = *sum;
+                *     saturate(sample);
+                *     *dst = sample | 1;
+                *   } while (old_sample != *sum);
+                */
+
+               "3:"
+               "\tmovl (%%ebx), %%ecx\n"
+               /*
+                *  if (sample > 0x7fffff)
+                */
+               "\tmovl $0x7fffff, %%eax\n"
+               "\tcmpl %%eax, %%ecx\n"
+               "\tjg 4f\n"
+               /*
+                *  if (sample < -0x7fffff)
+                */
+               "\tmovl $-0x7fffff, %%eax\n"
+               "\tcmpl %%eax, %%ecx\n"
+               "\tjl 4f\n"
+               "\tmovl %%ecx, %%eax\n"
+               "\torl $1, %%eax\n"
+               "4:"
+               "\tmovw %%ax, (%%edi)\n"
+               "\tshrl $16, %%eax\n"
+               "\tmovb %%al, 2(%%edi)\n"
+               "\tcmpl %%ecx, (%%ebx)\n"
+               "\tjnz 3b\n"
+
+               /*
+                * while (size-- > 0)
+                */
+               "\tdecl %0\n"
+               "\tjz 6f\n"
+               "\tadd %4, %%edi\n"
+               "\tadd %5, %%esi\n"
+               "\tadd %6, %%ebx\n"
+               "\tjmp 1b\n"
+               
+               "6:"
+               "\tmovl %7, %%ebx\n"    /* ebx is GOT pointer (-fPIC) */
+
+               : /* no output regs */
+               : "m" (size), "m" (dst), "m" (src),
+                 "m" (sum), "m" (dst_step), "m" (src_step),
+                 "m" (sum_step), "m" (old_ebx)
+               : "esi", "edi", "edx", "ecx", "eax"
+       );
+}
+
+/*
+ * 24-bit version for Pentium Pro/II
+ */
+static void MIX_AREAS3_CMOV(unsigned int size,
+                           volatile unsigned char *dst, unsigned char *src,
+                           volatile signed int *sum, size_t dst_step,
+                           size_t src_step, size_t sum_step)
+{
+       unsigned int old_ebx;
+
+       /*
+        *  ESI - src
+        *  EDI - dst
+        *  EBX - sum
+        *  ECX - old sample
+        *  EAX - sample / temporary
+        *  EDX - temporary
+        */
+       __asm__ __volatile__ (
+               "\n"
+
+               "\tmovl %%ebx, %7\n"    /* ebx is GOT pointer (-fPIC) */
+               /*
+                *  initialization, load ESI, EDI, EBX registers
+                */
+               "\tmovl %1, %%edi\n"
+               "\tmovl %2, %%esi\n"
+               "\tmovl %3, %%ebx\n"
+               "\tcmpl $0, %0\n"
+               "\tjz 6f\n"
+
+               "\t.p2align 4,,15\n"
+
+               "1:"
+
+               /*
+                *   sample = *src;
+                *   sum_sample = *sum;
+                *   if (test_and_set_bit(0, dst) == 0)
+                *     sample -= sum_sample;
+                *   *sum += sample;
+                */
+               "\tmovsbl 2(%%esi), %%eax\n"
+               "\tmovzwl (%%esi), %%ecx\n"
+               "\tmovl (%%ebx), %%edx\n"
+               "\tsall $16, %%eax\n"
+               "\t" LOCK_PREFIX "btsl $0, (%%edi)\n"
+               "\tleal (%%ecx,%%eax,1), %%ecx\n"
+               "\tjc 2f\n"
+               "\tsubl %%edx, %%ecx\n"
+               "2:"
+               "\t" LOCK_PREFIX "addl %%ecx, (%%ebx)\n"
+
+               /*
+                *   do {
+                *     sample = old_sample = *sum;
+                *     saturate(sample);
+                *     *dst = sample | 1;
+                *   } while (old_sample != *sum);
+                */
+
+               "3:"
+               "\tmovl (%%ebx), %%ecx\n"
+
+               "\tmovl $0x7fffff, %%eax\n"
+               "\tmovl $-0x7fffff, %%edx\n"
+               "\tcmpl %%eax, %%ecx\n"
+               "\tcmovng %%ecx, %%eax\n"
+               "\tcmpl %%edx, %%ecx\n"
+               "\tcmovl %%edx, %%eax\n"
+
+               "\torl $1, %%eax\n"
+               "\tmovw %%ax, (%%edi)\n"
+               "\tshrl $16, %%eax\n"
+               "\tmovb %%al, 2(%%edi)\n"
+
+               "\tcmpl %%ecx, (%%ebx)\n"
+               "\tjnz 3b\n"
+
+               /*
+                * while (size-- > 0)
+                */
+               "\tadd %4, %%edi\n"
+               "\tadd %5, %%esi\n"
+               "\tadd %6, %%ebx\n"
+               "\tdecl %0\n"
+               "\tjnz 1b\n"
+               
+               "6:"
+               "\tmovl %7, %%ebx\n"    /* ebx is GOT pointer (-fPIC) */
+
+               : /* no output regs */
+               : "m" (size), "m" (dst), "m" (src),
+                 "m" (sum), "m" (dst_step), "m" (src_step),
+                 "m" (sum_step), "m" (old_ebx)
+               : "esi", "edi", "edx", "ecx", "eax"
+       );
+}
index a64888f26fdad6f1e9857d677da608f01d34b942..7632388c51d0aa0190f60176f34e7e1043d703e7 100644 (file)
@@ -4,22 +4,28 @@
 
 #define MIX_AREAS1 mix_areas1
 #define MIX_AREAS2 mix_areas2
+#define MIX_AREAS3 mix_areas3
 #define LOCK_PREFIX ""
 #include "pcm_dmix_x86_64.h"
 #undef MIX_AREAS1
 #undef MIX_AREAS2
+#undef MIX_AREAS3
 #undef LOCK_PREFIX
 
 #define MIX_AREAS1 mix_areas1_smp
 #define MIX_AREAS2 mix_areas2_smp
+#define MIX_AREAS3 mix_areas3_smp
 #define LOCK_PREFIX "lock ; "
 #include "pcm_dmix_x86_64.h"
 #undef MIX_AREAS1
 #undef MIX_AREAS2
+#undef MIX_AREAS3
 #undef LOCK_PREFIX
  
 static unsigned long long dmix_supported_format =
-       (1ULL << SND_PCM_FORMAT_S16_LE) | (1ULL << SND_PCM_FORMAT_S32_LE);
+       (1ULL << SND_PCM_FORMAT_S16_LE) |
+       (1ULL << SND_PCM_FORMAT_S32_LE) |
+       (1ULL << SND_PCM_FORMAT_S24_3LE);
 
 static void mix_select_callbacks(snd_pcm_direct_t *dmix)
 {
@@ -40,4 +46,5 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix)
        // printf("SMP: %i\n", smp);
        dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp : mix_areas1;
        dmix->u.dmix.mix_areas2 = smp > 1 ? mix_areas2_smp : mix_areas2;
+       dmix->u.dmix.mix_areas3 = smp > 1 ? mix_areas3_smp : mix_areas3;
 }
index 973ed3f9fe099180ca942549a68c2131362d6bc0..134861384525f646537c5b8407d90c2e91c55f34 100644 (file)
@@ -237,3 +237,105 @@ static void MIX_AREAS2(unsigned int size,
        );
 }
 
+/*
+ *  24-bit version
+ */
+static void MIX_AREAS3(unsigned int size,
+                      volatile unsigned char *dst, unsigned char *src,
+                      volatile signed int *sum, size_t dst_step,
+                      size_t src_step, size_t sum_step)
+{
+       unsigned long long old_rbx;
+
+       /*
+        *  RSI - src
+        *  RDI - dst
+        *  RBX - sum
+        *  ECX - old sample
+        *  EAX - sample / temporary
+        *  EDX - temporary
+        */
+       __asm__ __volatile__ (
+               "\n"
+
+               "\tmovq %%rbx, %7\n"
+               /*
+                *  initialization, load ESI, EDI, EBX registers
+                */
+               "\tmovq %1, %%rdi\n"
+               "\tmovq %2, %%rsi\n"
+               "\tmovq %3, %%rbx\n"
+
+               /*
+                * while (size-- > 0) {
+                */
+               "\tcmpl $0, %0\n"
+               "jz 6f\n"
+
+               "\t.p2align 4,,15\n"
+
+               "1:"
+
+               /*
+                *   sample = *src;
+                *   sum_sample = *sum;
+                *   if (test_and_set_bit(0, dst) == 0)
+                *     sample -= sum_sample;
+                *   *sum += sample;
+                */
+               "\tmovsbl 2(%%rsi), %%eax\n"
+               "\tmovswl (%%rsi), %%ecx\n"
+               "\tmovl (%%rbx), %%edx\n"
+               "\tsall $16, %%eax\n"
+               "\t" LOCK_PREFIX "btsl $0, (%%rdi)\n"
+               "\tleal (%%ecx,%%eax,1), %%ecx\n"
+               "\tjc 2f\n"
+               "\tsubl %%edx, %%ecx\n"
+               "2:"
+               "\t" LOCK_PREFIX "addl %%ecx, (%%rbx)\n"
+
+               /*
+                *   do {
+                *     sample = old_sample = *sum;
+                *     saturate(sample);
+                *     *dst = sample | 1;
+                *   } while (old_sample != *sum);
+                */
+
+               "3:"
+               "\tmovl (%%rbx), %%ecx\n"
+
+               "\tmovl $0x7fffff, %%eax\n"
+               "\tmovl $-0x7fffff, %%edx\n"
+               "\tcmpl %%eax, %%ecx\n"
+               "\tcmovng %%ecx, %%eax\n"
+               "\tcmpl %%edx, %%ecx\n"
+               "\tcmovl %%edx, %%eax\n"
+
+               "\torl $1, %%eax\n"
+               "\tmovw %%ax, (%%rdi)\n"
+               "\tshrl $16, %%eax\n"
+               "\tmovb %%al, 2(%%rdi)\n"
+       
+               "\tcmpl %%ecx, (%%rbx)\n"
+               "\tjnz 3b\n"
+
+               /*
+                * while (size-- > 0)
+                */
+               "\tadd %4, %%rdi\n"
+               "\tadd %5, %%rsi\n"
+               "\tadd %6, %%rbx\n"
+               "\tdecl %0\n"
+               "\tjnz 1b\n"
+               
+               "6:"
+               "\tmovq %7, %%rbx\n"
+
+               : /* no output regs */
+               : "m" (size), "m" (dst), "m" (src),
+                 "m" (sum), "m" (dst_step), "m" (src_step),
+                 "m" (sum_step), "m" (old_rbx)
+               : "rsi", "rdi", "edx", "ecx", "eax"
+       );
+}