From 45f63a8735df195f7cdf507af1001183a879fbc2 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Mon, 19 Dec 2005 07:39:03 +0000 Subject: [PATCH] dmix: add S24_3LE support Add to the dmix plugin support for the S24_3LE sample format which is used by 24-bit USB devices. The optimized assembler version uses only 23 bits for sample data so that the lowest bit can be used for synchronization because there is no 24-bit cmpxchg instruction. --- src/pcm/pcm_direct.c | 1 + src/pcm/pcm_direct.h | 6 ++ src/pcm/pcm_dmix.c | 29 +++++- src/pcm/pcm_dmix_generic.c | 35 ++++++- src/pcm/pcm_dmix_i386.c | 24 ++++- src/pcm/pcm_dmix_i386.h | 205 +++++++++++++++++++++++++++++++++++++ src/pcm/pcm_dmix_x86_64.c | 9 +- src/pcm/pcm_dmix_x86_64.h | 102 ++++++++++++++++++ 8 files changed, 404 insertions(+), 7 deletions(-) diff --git a/src/pcm/pcm_direct.c b/src/pcm/pcm_direct.c index 43eb6a48..535cc29f 100644 --- a/src/pcm/pcm_direct.c +++ b/src/pcm/pcm_direct.c @@ -800,6 +800,7 @@ int snd_pcm_direct_initialize_slave(snd_pcm_direct_t *dmix, snd_pcm_t *spcm, str case SND_PCM_FORMAT_S32_BE: case SND_PCM_FORMAT_S16_LE: case SND_PCM_FORMAT_S16_BE: + case SND_PCM_FORMAT_S24_3LE: break; default: SNDERR("invalid format"); diff --git a/src/pcm/pcm_direct.h b/src/pcm/pcm_direct.h index 44559827..6588fa56 100644 --- a/src/pcm/pcm_direct.h +++ b/src/pcm/pcm_direct.h @@ -34,6 +34,11 @@ typedef void (mix_areas2_t)(unsigned int size, volatile signed int *sum, size_t dst_step, size_t src_step, size_t sum_step); +typedef void (mix_areas3_t)(unsigned int size, + volatile unsigned char *dst, unsigned char *src, + volatile signed int *sum, size_t dst_step, + size_t src_step, size_t sum_step); + struct slave_params { snd_pcm_format_t format; int rate; @@ -120,6 +125,7 @@ struct snd_pcm_direct { signed int *sum_buffer; /* shared sum buffer */ mix_areas1_t *mix_areas1; mix_areas2_t *mix_areas2; + mix_areas3_t *mix_areas3; } dmix; struct { } dsnoop; diff --git a/src/pcm/pcm_dmix.c b/src/pcm/pcm_dmix.c index 2f35d32e..1f1a3831 100644 --- a/src/pcm/pcm_dmix.c +++ b/src/pcm/pcm_dmix.c @@ -188,7 +188,8 @@ static void mix_areas(snd_pcm_direct_t *dmix, sum = dmix->u.dmix.sum_buffer + channels * dst_ofs + chn; dmix->u.dmix.mix_areas1(size, dst, src, sum, dst_step, src_step, channels * sizeof(signed int)); } - } else { + } else if (dmix->shmptr->s.format == SND_PCM_FORMAT_S32_LE || + dmix->shmptr->s.format == SND_PCM_FORMAT_S32_BE) { signed int *src; volatile signed int *dst; if (dmix->interleaved) { @@ -216,6 +217,32 @@ static void mix_areas(snd_pcm_direct_t *dmix, sum = dmix->u.dmix.sum_buffer + channels * dst_ofs + chn; dmix->u.dmix.mix_areas2(size, dst, src, sum, dst_step, src_step, channels * sizeof(signed int)); } + } else { /* SND_PCM_FORMAT_S24_3LE */ + unsigned char *src; + volatile unsigned char *dst; + if (dmix->interleaved) { + /* + * process all areas in one loop + * it optimizes the memory accesses for this case + */ + dmix->u.dmix.mix_areas3(size * channels, + ((char *)dst_areas[0].addr) + 3 * dst_ofs * channels, + ((char *)src_areas[0].addr) + 3 * src_ofs * channels, + dmix->u.dmix.sum_buffer + (dst_ofs * channels), + 3, 3, sizeof(signed int)); + return; + } + for (chn = 0; chn < channels; chn++) { + dchn = dmix->bindings ? dmix->bindings[chn] : chn; + if (dchn >= dmix->shmptr->s.channels) + continue; + src_step = src_areas[chn].step / 8; + dst_step = dst_areas[dchn].step / 8; + src = (unsigned char *)(((char *)src_areas[chn].addr + src_areas[chn].first / 8) + (src_ofs * src_step)); + dst = (unsigned char *)(((char *)dst_areas[dchn].addr + dst_areas[dchn].first / 8) + (dst_ofs * dst_step)); + sum = dmix->u.dmix.sum_buffer + channels * dst_ofs + chn; + dmix->u.dmix.mix_areas3(size, dst, src, sum, dst_step, src_step, channels * sizeof(signed int)); + } } } diff --git a/src/pcm/pcm_dmix_generic.c b/src/pcm/pcm_dmix_generic.c index ed6ebe6f..4e45ba89 100644 --- a/src/pcm/pcm_dmix_generic.c +++ b/src/pcm/pcm_dmix_generic.c @@ -121,7 +121,8 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix) /* non-concurrent version, supporting both endians */ static unsigned long long dmix_supported_format = (1ULL << SND_PCM_FORMAT_S16_LE) | (1ULL << SND_PCM_FORMAT_S32_LE) | - (1ULL << SND_PCM_FORMAT_S16_BE) | (1ULL << SND_PCM_FORMAT_S32_BE); + (1ULL << SND_PCM_FORMAT_S16_BE) | (1ULL << SND_PCM_FORMAT_S32_BE) | + (1ULL << SND_PCM_FORMAT_S24_3LE); #include @@ -245,6 +246,37 @@ static void mix_areas2_swap(unsigned int size, } } +/* always little endian */ +static void mix_areas3(unsigned int size, + volatile unsigned char *dst, unsigned char *src, + volatile signed int *sum, size_t dst_step, + size_t src_step, size_t sum_step) +{ + register signed int sample; + + for (;;) { + sample = src[0] | (src[1] << 8) | (((signed char *)src)[2] << 16); + if (!(dst[0] | dst[1] | dst[2])) { + *sum = sample; + } else { + sample += *sum; + *sum = sample; + if (sample > 0x7fffff) + sample = 0x7fffff; + else if (sample < -0x800000) + sample = -0x800000; + } + dst[0] = sample; + dst[1] = sample >> 8; + dst[2] = sample >> 16; + if (!--size) + return; + dst += dst_step; + src += src_step; + sum = (signed int *) ((char *)sum + sum_step); + } +} + static void mix_select_callbacks(snd_pcm_direct_t *dmix) { @@ -255,6 +287,7 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix) dmix->u.dmix.mix_areas1 = mix_areas1_swap; dmix->u.dmix.mix_areas2 = mix_areas2_swap; } + dmix->u.dmix.mix_areas3 = mix_areas3; } #endif diff --git a/src/pcm/pcm_dmix_i386.c b/src/pcm/pcm_dmix_i386.c index 7cbf723d..3ea206c6 100644 --- a/src/pcm/pcm_dmix_i386.c +++ b/src/pcm/pcm_dmix_i386.c @@ -5,33 +5,43 @@ #define MIX_AREAS1 mix_areas1 #define MIX_AREAS1_MMX mix_areas1_mmx #define MIX_AREAS2 mix_areas2 +#define MIX_AREAS3 mix_areas3 +#define MIX_AREAS3_CMOV mix_areas3_cmov #define LOCK_PREFIX "" #include "pcm_dmix_i386.h" #undef MIX_AREAS1 #undef MIX_AREAS1_MMX #undef MIX_AREAS2 +#undef MIX_AREAS3 +#undef MIX_AREAS3_CMOV #undef LOCK_PREFIX #define MIX_AREAS1 mix_areas1_smp #define MIX_AREAS1_MMX mix_areas1_smp_mmx #define MIX_AREAS2 mix_areas2_smp +#define MIX_AREAS3 mix_areas3_smp +#define MIX_AREAS3_CMOV mix_areas3_smp_cmov #define LOCK_PREFIX "lock ; " #include "pcm_dmix_i386.h" #undef MIX_AREAS1 #undef MIX_AREAS1_MMX #undef MIX_AREAS2 +#undef MIX_AREAS3 +#undef MIX_AREAS3_CMOV #undef LOCK_PREFIX static unsigned long long dmix_supported_format = - (1ULL << SND_PCM_FORMAT_S16_LE) | (1ULL << SND_PCM_FORMAT_S32_LE); + (1ULL << SND_PCM_FORMAT_S16_LE) | + (1ULL << SND_PCM_FORMAT_S32_LE) | + (1ULL << SND_PCM_FORMAT_S24_3LE); static void mix_select_callbacks(snd_pcm_direct_t *dmix) { FILE *in; char line[255]; - int smp = 0, mmx = 0; + int smp = 0, mmx = 0, cmov = 0; - /* try to determine, if we have a MMX capable CPU */ + /* try to determine the capabilities of the CPU */ in = fopen("/proc/cpuinfo", "r"); if (in) { while (!feof(in)) { @@ -41,15 +51,21 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix) else if (!strncmp(line, "flags", 5)) { if (strstr(line, " mmx")) mmx = 1; + if (strstr(line, " cmov")) + cmov = 1; } } fclose(in); } - // printf("MMX: %i, SMP: %i\n", mmx, smp); if (mmx) { dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp_mmx : mix_areas1_mmx; } else { dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp : mix_areas1; } dmix->u.dmix.mix_areas2 = smp > 1 ? mix_areas2_smp : mix_areas2; + if (cmov) { + dmix->u.dmix.mix_areas3 = smp > 1 ? mix_areas3_smp_cmov : mix_areas3_cmov; + } else { + dmix->u.dmix.mix_areas3 = smp > 1 ? mix_areas3_smp: mix_areas3; + } } diff --git a/src/pcm/pcm_dmix_i386.h b/src/pcm/pcm_dmix_i386.h index aaa04ffd..6875b311 100644 --- a/src/pcm/pcm_dmix_i386.h +++ b/src/pcm/pcm_dmix_i386.h @@ -352,3 +352,208 @@ static void MIX_AREAS2(unsigned int size, : "esi", "edi", "edx", "ecx", "eax" ); } + +/* + * 24-bit version for plain i386 + */ +static void MIX_AREAS3(unsigned int size, + volatile unsigned char *dst, unsigned char *src, + volatile signed int *sum, size_t dst_step, + size_t src_step, size_t sum_step) +{ + unsigned int old_ebx; + + /* + * ESI - src + * EDI - dst + * EBX - sum + * ECX - old sample + * EAX - sample / temporary + * EDX - temporary + */ + __asm__ __volatile__ ( + "\n" + + "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */ + /* + * initialization, load ESI, EDI, EBX registers + */ + "\tmovl %1, %%edi\n" + "\tmovl %2, %%esi\n" + "\tmovl %3, %%ebx\n" + "\tcmpl $0, %0\n" + "\tjnz 1f\n" + "\tjmp 6f\n" + + "\t.p2align 4,,15\n" + + "1:" + + /* + * sample = *src; + * sum_sample = *sum; + * if (test_and_set_bit(0, dst) == 0) + * sample -= sum_sample; + * *sum += sample; + */ + "\tmovsbl 2(%%esi), %%eax\n" + "\tmovzwl (%%esi), %%ecx\n" + "\tmovl (%%ebx), %%edx\n" + "\tsall $16, %%eax\n" + "\t" LOCK_PREFIX "btsl $0, (%%edi)\n" + "\tleal (%%ecx,%%eax,1), %%ecx\n" + "\tjc 2f\n" + "\tsubl %%edx, %%ecx\n" + "2:" + "\t" LOCK_PREFIX "addl %%ecx, (%%ebx)\n" + + /* + * do { + * sample = old_sample = *sum; + * saturate(sample); + * *dst = sample | 1; + * } while (old_sample != *sum); + */ + + "3:" + "\tmovl (%%ebx), %%ecx\n" + /* + * if (sample > 0x7fffff) + */ + "\tmovl $0x7fffff, %%eax\n" + "\tcmpl %%eax, %%ecx\n" + "\tjg 4f\n" + /* + * if (sample < -0x7fffff) + */ + "\tmovl $-0x7fffff, %%eax\n" + "\tcmpl %%eax, %%ecx\n" + "\tjl 4f\n" + "\tmovl %%ecx, %%eax\n" + "\torl $1, %%eax\n" + "4:" + "\tmovw %%ax, (%%edi)\n" + "\tshrl $16, %%eax\n" + "\tmovb %%al, 2(%%edi)\n" + "\tcmpl %%ecx, (%%ebx)\n" + "\tjnz 3b\n" + + /* + * while (size-- > 0) + */ + "\tdecl %0\n" + "\tjz 6f\n" + "\tadd %4, %%edi\n" + "\tadd %5, %%esi\n" + "\tadd %6, %%ebx\n" + "\tjmp 1b\n" + + "6:" + "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */ + + : /* no output regs */ + : "m" (size), "m" (dst), "m" (src), + "m" (sum), "m" (dst_step), "m" (src_step), + "m" (sum_step), "m" (old_ebx) + : "esi", "edi", "edx", "ecx", "eax" + ); +} + +/* + * 24-bit version for Pentium Pro/II + */ +static void MIX_AREAS3_CMOV(unsigned int size, + volatile unsigned char *dst, unsigned char *src, + volatile signed int *sum, size_t dst_step, + size_t src_step, size_t sum_step) +{ + unsigned int old_ebx; + + /* + * ESI - src + * EDI - dst + * EBX - sum + * ECX - old sample + * EAX - sample / temporary + * EDX - temporary + */ + __asm__ __volatile__ ( + "\n" + + "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */ + /* + * initialization, load ESI, EDI, EBX registers + */ + "\tmovl %1, %%edi\n" + "\tmovl %2, %%esi\n" + "\tmovl %3, %%ebx\n" + "\tcmpl $0, %0\n" + "\tjz 6f\n" + + "\t.p2align 4,,15\n" + + "1:" + + /* + * sample = *src; + * sum_sample = *sum; + * if (test_and_set_bit(0, dst) == 0) + * sample -= sum_sample; + * *sum += sample; + */ + "\tmovsbl 2(%%esi), %%eax\n" + "\tmovzwl (%%esi), %%ecx\n" + "\tmovl (%%ebx), %%edx\n" + "\tsall $16, %%eax\n" + "\t" LOCK_PREFIX "btsl $0, (%%edi)\n" + "\tleal (%%ecx,%%eax,1), %%ecx\n" + "\tjc 2f\n" + "\tsubl %%edx, %%ecx\n" + "2:" + "\t" LOCK_PREFIX "addl %%ecx, (%%ebx)\n" + + /* + * do { + * sample = old_sample = *sum; + * saturate(sample); + * *dst = sample | 1; + * } while (old_sample != *sum); + */ + + "3:" + "\tmovl (%%ebx), %%ecx\n" + + "\tmovl $0x7fffff, %%eax\n" + "\tmovl $-0x7fffff, %%edx\n" + "\tcmpl %%eax, %%ecx\n" + "\tcmovng %%ecx, %%eax\n" + "\tcmpl %%edx, %%ecx\n" + "\tcmovl %%edx, %%eax\n" + + "\torl $1, %%eax\n" + "\tmovw %%ax, (%%edi)\n" + "\tshrl $16, %%eax\n" + "\tmovb %%al, 2(%%edi)\n" + + "\tcmpl %%ecx, (%%ebx)\n" + "\tjnz 3b\n" + + /* + * while (size-- > 0) + */ + "\tadd %4, %%edi\n" + "\tadd %5, %%esi\n" + "\tadd %6, %%ebx\n" + "\tdecl %0\n" + "\tjnz 1b\n" + + "6:" + "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */ + + : /* no output regs */ + : "m" (size), "m" (dst), "m" (src), + "m" (sum), "m" (dst_step), "m" (src_step), + "m" (sum_step), "m" (old_ebx) + : "esi", "edi", "edx", "ecx", "eax" + ); +} diff --git a/src/pcm/pcm_dmix_x86_64.c b/src/pcm/pcm_dmix_x86_64.c index a64888f2..7632388c 100644 --- a/src/pcm/pcm_dmix_x86_64.c +++ b/src/pcm/pcm_dmix_x86_64.c @@ -4,22 +4,28 @@ #define MIX_AREAS1 mix_areas1 #define MIX_AREAS2 mix_areas2 +#define MIX_AREAS3 mix_areas3 #define LOCK_PREFIX "" #include "pcm_dmix_x86_64.h" #undef MIX_AREAS1 #undef MIX_AREAS2 +#undef MIX_AREAS3 #undef LOCK_PREFIX #define MIX_AREAS1 mix_areas1_smp #define MIX_AREAS2 mix_areas2_smp +#define MIX_AREAS3 mix_areas3_smp #define LOCK_PREFIX "lock ; " #include "pcm_dmix_x86_64.h" #undef MIX_AREAS1 #undef MIX_AREAS2 +#undef MIX_AREAS3 #undef LOCK_PREFIX static unsigned long long dmix_supported_format = - (1ULL << SND_PCM_FORMAT_S16_LE) | (1ULL << SND_PCM_FORMAT_S32_LE); + (1ULL << SND_PCM_FORMAT_S16_LE) | + (1ULL << SND_PCM_FORMAT_S32_LE) | + (1ULL << SND_PCM_FORMAT_S24_3LE); static void mix_select_callbacks(snd_pcm_direct_t *dmix) { @@ -40,4 +46,5 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix) // printf("SMP: %i\n", smp); dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp : mix_areas1; dmix->u.dmix.mix_areas2 = smp > 1 ? mix_areas2_smp : mix_areas2; + dmix->u.dmix.mix_areas3 = smp > 1 ? mix_areas3_smp : mix_areas3; } diff --git a/src/pcm/pcm_dmix_x86_64.h b/src/pcm/pcm_dmix_x86_64.h index 973ed3f9..13486138 100644 --- a/src/pcm/pcm_dmix_x86_64.h +++ b/src/pcm/pcm_dmix_x86_64.h @@ -237,3 +237,105 @@ static void MIX_AREAS2(unsigned int size, ); } +/* + * 24-bit version + */ +static void MIX_AREAS3(unsigned int size, + volatile unsigned char *dst, unsigned char *src, + volatile signed int *sum, size_t dst_step, + size_t src_step, size_t sum_step) +{ + unsigned long long old_rbx; + + /* + * RSI - src + * RDI - dst + * RBX - sum + * ECX - old sample + * EAX - sample / temporary + * EDX - temporary + */ + __asm__ __volatile__ ( + "\n" + + "\tmovq %%rbx, %7\n" + /* + * initialization, load ESI, EDI, EBX registers + */ + "\tmovq %1, %%rdi\n" + "\tmovq %2, %%rsi\n" + "\tmovq %3, %%rbx\n" + + /* + * while (size-- > 0) { + */ + "\tcmpl $0, %0\n" + "jz 6f\n" + + "\t.p2align 4,,15\n" + + "1:" + + /* + * sample = *src; + * sum_sample = *sum; + * if (test_and_set_bit(0, dst) == 0) + * sample -= sum_sample; + * *sum += sample; + */ + "\tmovsbl 2(%%rsi), %%eax\n" + "\tmovswl (%%rsi), %%ecx\n" + "\tmovl (%%rbx), %%edx\n" + "\tsall $16, %%eax\n" + "\t" LOCK_PREFIX "btsl $0, (%%rdi)\n" + "\tleal (%%ecx,%%eax,1), %%ecx\n" + "\tjc 2f\n" + "\tsubl %%edx, %%ecx\n" + "2:" + "\t" LOCK_PREFIX "addl %%ecx, (%%rbx)\n" + + /* + * do { + * sample = old_sample = *sum; + * saturate(sample); + * *dst = sample | 1; + * } while (old_sample != *sum); + */ + + "3:" + "\tmovl (%%rbx), %%ecx\n" + + "\tmovl $0x7fffff, %%eax\n" + "\tmovl $-0x7fffff, %%edx\n" + "\tcmpl %%eax, %%ecx\n" + "\tcmovng %%ecx, %%eax\n" + "\tcmpl %%edx, %%ecx\n" + "\tcmovl %%edx, %%eax\n" + + "\torl $1, %%eax\n" + "\tmovw %%ax, (%%rdi)\n" + "\tshrl $16, %%eax\n" + "\tmovb %%al, 2(%%rdi)\n" + + "\tcmpl %%ecx, (%%rbx)\n" + "\tjnz 3b\n" + + /* + * while (size-- > 0) + */ + "\tadd %4, %%rdi\n" + "\tadd %5, %%rsi\n" + "\tadd %6, %%rbx\n" + "\tdecl %0\n" + "\tjnz 1b\n" + + "6:" + "\tmovq %7, %%rbx\n" + + : /* no output regs */ + : "m" (size), "m" (dst), "m" (src), + "m" (sum), "m" (dst_step), "m" (src_step), + "m" (sum_step), "m" (old_rbx) + : "rsi", "rdi", "edx", "ecx", "eax" + ); +} -- 2.47.1