case SND_PCM_FORMAT_S32_BE:
case SND_PCM_FORMAT_S16_LE:
case SND_PCM_FORMAT_S16_BE:
+ case SND_PCM_FORMAT_S24_3LE:
break;
default:
SNDERR("invalid format");
volatile signed int *sum, size_t dst_step,
size_t src_step, size_t sum_step);
+typedef void (mix_areas3_t)(unsigned int size,
+ volatile unsigned char *dst, unsigned char *src,
+ volatile signed int *sum, size_t dst_step,
+ size_t src_step, size_t sum_step);
+
struct slave_params {
snd_pcm_format_t format;
int rate;
signed int *sum_buffer; /* shared sum buffer */
mix_areas1_t *mix_areas1;
mix_areas2_t *mix_areas2;
+ mix_areas3_t *mix_areas3;
} dmix;
struct {
} dsnoop;
sum = dmix->u.dmix.sum_buffer + channels * dst_ofs + chn;
dmix->u.dmix.mix_areas1(size, dst, src, sum, dst_step, src_step, channels * sizeof(signed int));
}
- } else {
+ } else if (dmix->shmptr->s.format == SND_PCM_FORMAT_S32_LE ||
+ dmix->shmptr->s.format == SND_PCM_FORMAT_S32_BE) {
signed int *src;
volatile signed int *dst;
if (dmix->interleaved) {
sum = dmix->u.dmix.sum_buffer + channels * dst_ofs + chn;
dmix->u.dmix.mix_areas2(size, dst, src, sum, dst_step, src_step, channels * sizeof(signed int));
}
+ } else { /* SND_PCM_FORMAT_S24_3LE */
+ unsigned char *src;
+ volatile unsigned char *dst;
+ if (dmix->interleaved) {
+ /*
+ * process all areas in one loop
+ * it optimizes the memory accesses for this case
+ */
+ dmix->u.dmix.mix_areas3(size * channels,
+ ((char *)dst_areas[0].addr) + 3 * dst_ofs * channels,
+ ((char *)src_areas[0].addr) + 3 * src_ofs * channels,
+ dmix->u.dmix.sum_buffer + (dst_ofs * channels),
+ 3, 3, sizeof(signed int));
+ return;
+ }
+ for (chn = 0; chn < channels; chn++) {
+ dchn = dmix->bindings ? dmix->bindings[chn] : chn;
+ if (dchn >= dmix->shmptr->s.channels)
+ continue;
+ src_step = src_areas[chn].step / 8;
+ dst_step = dst_areas[dchn].step / 8;
+ src = (unsigned char *)(((char *)src_areas[chn].addr + src_areas[chn].first / 8) + (src_ofs * src_step));
+ dst = (unsigned char *)(((char *)dst_areas[dchn].addr + dst_areas[dchn].first / 8) + (dst_ofs * dst_step));
+ sum = dmix->u.dmix.sum_buffer + channels * dst_ofs + chn;
+ dmix->u.dmix.mix_areas3(size, dst, src, sum, dst_step, src_step, channels * sizeof(signed int));
+ }
}
}
/* non-concurrent version, supporting both endians */
static unsigned long long dmix_supported_format =
(1ULL << SND_PCM_FORMAT_S16_LE) | (1ULL << SND_PCM_FORMAT_S32_LE) |
- (1ULL << SND_PCM_FORMAT_S16_BE) | (1ULL << SND_PCM_FORMAT_S32_BE);
+ (1ULL << SND_PCM_FORMAT_S16_BE) | (1ULL << SND_PCM_FORMAT_S32_BE) |
+ (1ULL << SND_PCM_FORMAT_S24_3LE);
#include <byteswap.h>
}
}
+/* always little endian */
+static void mix_areas3(unsigned int size,
+ volatile unsigned char *dst, unsigned char *src,
+ volatile signed int *sum, size_t dst_step,
+ size_t src_step, size_t sum_step)
+{
+ register signed int sample;
+
+ for (;;) {
+ sample = src[0] | (src[1] << 8) | (((signed char *)src)[2] << 16);
+ if (!(dst[0] | dst[1] | dst[2])) {
+ *sum = sample;
+ } else {
+ sample += *sum;
+ *sum = sample;
+ if (sample > 0x7fffff)
+ sample = 0x7fffff;
+ else if (sample < -0x800000)
+ sample = -0x800000;
+ }
+ dst[0] = sample;
+ dst[1] = sample >> 8;
+ dst[2] = sample >> 16;
+ if (!--size)
+ return;
+ dst += dst_step;
+ src += src_step;
+ sum = (signed int *) ((char *)sum + sum_step);
+ }
+}
+
static void mix_select_callbacks(snd_pcm_direct_t *dmix)
{
dmix->u.dmix.mix_areas1 = mix_areas1_swap;
dmix->u.dmix.mix_areas2 = mix_areas2_swap;
}
+ dmix->u.dmix.mix_areas3 = mix_areas3;
}
#endif
#define MIX_AREAS1 mix_areas1
#define MIX_AREAS1_MMX mix_areas1_mmx
#define MIX_AREAS2 mix_areas2
+#define MIX_AREAS3 mix_areas3
+#define MIX_AREAS3_CMOV mix_areas3_cmov
#define LOCK_PREFIX ""
#include "pcm_dmix_i386.h"
#undef MIX_AREAS1
#undef MIX_AREAS1_MMX
#undef MIX_AREAS2
+#undef MIX_AREAS3
+#undef MIX_AREAS3_CMOV
#undef LOCK_PREFIX
#define MIX_AREAS1 mix_areas1_smp
#define MIX_AREAS1_MMX mix_areas1_smp_mmx
#define MIX_AREAS2 mix_areas2_smp
+#define MIX_AREAS3 mix_areas3_smp
+#define MIX_AREAS3_CMOV mix_areas3_smp_cmov
#define LOCK_PREFIX "lock ; "
#include "pcm_dmix_i386.h"
#undef MIX_AREAS1
#undef MIX_AREAS1_MMX
#undef MIX_AREAS2
+#undef MIX_AREAS3
+#undef MIX_AREAS3_CMOV
#undef LOCK_PREFIX
static unsigned long long dmix_supported_format =
- (1ULL << SND_PCM_FORMAT_S16_LE) | (1ULL << SND_PCM_FORMAT_S32_LE);
+ (1ULL << SND_PCM_FORMAT_S16_LE) |
+ (1ULL << SND_PCM_FORMAT_S32_LE) |
+ (1ULL << SND_PCM_FORMAT_S24_3LE);
static void mix_select_callbacks(snd_pcm_direct_t *dmix)
{
FILE *in;
char line[255];
- int smp = 0, mmx = 0;
+ int smp = 0, mmx = 0, cmov = 0;
- /* try to determine, if we have a MMX capable CPU */
+ /* try to determine the capabilities of the CPU */
in = fopen("/proc/cpuinfo", "r");
if (in) {
while (!feof(in)) {
else if (!strncmp(line, "flags", 5)) {
if (strstr(line, " mmx"))
mmx = 1;
+ if (strstr(line, " cmov"))
+ cmov = 1;
}
}
fclose(in);
}
- // printf("MMX: %i, SMP: %i\n", mmx, smp);
if (mmx) {
dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp_mmx : mix_areas1_mmx;
} else {
dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp : mix_areas1;
}
dmix->u.dmix.mix_areas2 = smp > 1 ? mix_areas2_smp : mix_areas2;
+ if (cmov) {
+ dmix->u.dmix.mix_areas3 = smp > 1 ? mix_areas3_smp_cmov : mix_areas3_cmov;
+ } else {
+ dmix->u.dmix.mix_areas3 = smp > 1 ? mix_areas3_smp: mix_areas3;
+ }
}
: "esi", "edi", "edx", "ecx", "eax"
);
}
+
+/*
+ * 24-bit version for plain i386
+ */
+static void MIX_AREAS3(unsigned int size,
+ volatile unsigned char *dst, unsigned char *src,
+ volatile signed int *sum, size_t dst_step,
+ size_t src_step, size_t sum_step)
+{
+ unsigned int old_ebx;
+
+ /*
+ * ESI - src
+ * EDI - dst
+ * EBX - sum
+ * ECX - old sample
+ * EAX - sample / temporary
+ * EDX - temporary
+ */
+ __asm__ __volatile__ (
+ "\n"
+
+ "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */
+ /*
+ * initialization, load ESI, EDI, EBX registers
+ */
+ "\tmovl %1, %%edi\n"
+ "\tmovl %2, %%esi\n"
+ "\tmovl %3, %%ebx\n"
+ "\tcmpl $0, %0\n"
+ "\tjnz 1f\n"
+ "\tjmp 6f\n"
+
+ "\t.p2align 4,,15\n"
+
+ "1:"
+
+ /*
+ * sample = *src;
+ * sum_sample = *sum;
+ * if (test_and_set_bit(0, dst) == 0)
+ * sample -= sum_sample;
+ * *sum += sample;
+ */
+ "\tmovsbl 2(%%esi), %%eax\n"
+ "\tmovzwl (%%esi), %%ecx\n"
+ "\tmovl (%%ebx), %%edx\n"
+ "\tsall $16, %%eax\n"
+ "\t" LOCK_PREFIX "btsl $0, (%%edi)\n"
+ "\tleal (%%ecx,%%eax,1), %%ecx\n"
+ "\tjc 2f\n"
+ "\tsubl %%edx, %%ecx\n"
+ "2:"
+ "\t" LOCK_PREFIX "addl %%ecx, (%%ebx)\n"
+
+ /*
+ * do {
+ * sample = old_sample = *sum;
+ * saturate(sample);
+ * *dst = sample | 1;
+ * } while (old_sample != *sum);
+ */
+
+ "3:"
+ "\tmovl (%%ebx), %%ecx\n"
+ /*
+ * if (sample > 0x7fffff)
+ */
+ "\tmovl $0x7fffff, %%eax\n"
+ "\tcmpl %%eax, %%ecx\n"
+ "\tjg 4f\n"
+ /*
+ * if (sample < -0x7fffff)
+ */
+ "\tmovl $-0x7fffff, %%eax\n"
+ "\tcmpl %%eax, %%ecx\n"
+ "\tjl 4f\n"
+ "\tmovl %%ecx, %%eax\n"
+ "\torl $1, %%eax\n"
+ "4:"
+ "\tmovw %%ax, (%%edi)\n"
+ "\tshrl $16, %%eax\n"
+ "\tmovb %%al, 2(%%edi)\n"
+ "\tcmpl %%ecx, (%%ebx)\n"
+ "\tjnz 3b\n"
+
+ /*
+ * while (size-- > 0)
+ */
+ "\tdecl %0\n"
+ "\tjz 6f\n"
+ "\tadd %4, %%edi\n"
+ "\tadd %5, %%esi\n"
+ "\tadd %6, %%ebx\n"
+ "\tjmp 1b\n"
+
+ "6:"
+ "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */
+
+ : /* no output regs */
+ : "m" (size), "m" (dst), "m" (src),
+ "m" (sum), "m" (dst_step), "m" (src_step),
+ "m" (sum_step), "m" (old_ebx)
+ : "esi", "edi", "edx", "ecx", "eax"
+ );
+}
+
+/*
+ * 24-bit version for Pentium Pro/II
+ */
+static void MIX_AREAS3_CMOV(unsigned int size,
+ volatile unsigned char *dst, unsigned char *src,
+ volatile signed int *sum, size_t dst_step,
+ size_t src_step, size_t sum_step)
+{
+ unsigned int old_ebx;
+
+ /*
+ * ESI - src
+ * EDI - dst
+ * EBX - sum
+ * ECX - old sample
+ * EAX - sample / temporary
+ * EDX - temporary
+ */
+ __asm__ __volatile__ (
+ "\n"
+
+ "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */
+ /*
+ * initialization, load ESI, EDI, EBX registers
+ */
+ "\tmovl %1, %%edi\n"
+ "\tmovl %2, %%esi\n"
+ "\tmovl %3, %%ebx\n"
+ "\tcmpl $0, %0\n"
+ "\tjz 6f\n"
+
+ "\t.p2align 4,,15\n"
+
+ "1:"
+
+ /*
+ * sample = *src;
+ * sum_sample = *sum;
+ * if (test_and_set_bit(0, dst) == 0)
+ * sample -= sum_sample;
+ * *sum += sample;
+ */
+ "\tmovsbl 2(%%esi), %%eax\n"
+ "\tmovzwl (%%esi), %%ecx\n"
+ "\tmovl (%%ebx), %%edx\n"
+ "\tsall $16, %%eax\n"
+ "\t" LOCK_PREFIX "btsl $0, (%%edi)\n"
+ "\tleal (%%ecx,%%eax,1), %%ecx\n"
+ "\tjc 2f\n"
+ "\tsubl %%edx, %%ecx\n"
+ "2:"
+ "\t" LOCK_PREFIX "addl %%ecx, (%%ebx)\n"
+
+ /*
+ * do {
+ * sample = old_sample = *sum;
+ * saturate(sample);
+ * *dst = sample | 1;
+ * } while (old_sample != *sum);
+ */
+
+ "3:"
+ "\tmovl (%%ebx), %%ecx\n"
+
+ "\tmovl $0x7fffff, %%eax\n"
+ "\tmovl $-0x7fffff, %%edx\n"
+ "\tcmpl %%eax, %%ecx\n"
+ "\tcmovng %%ecx, %%eax\n"
+ "\tcmpl %%edx, %%ecx\n"
+ "\tcmovl %%edx, %%eax\n"
+
+ "\torl $1, %%eax\n"
+ "\tmovw %%ax, (%%edi)\n"
+ "\tshrl $16, %%eax\n"
+ "\tmovb %%al, 2(%%edi)\n"
+
+ "\tcmpl %%ecx, (%%ebx)\n"
+ "\tjnz 3b\n"
+
+ /*
+ * while (size-- > 0)
+ */
+ "\tadd %4, %%edi\n"
+ "\tadd %5, %%esi\n"
+ "\tadd %6, %%ebx\n"
+ "\tdecl %0\n"
+ "\tjnz 1b\n"
+
+ "6:"
+ "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */
+
+ : /* no output regs */
+ : "m" (size), "m" (dst), "m" (src),
+ "m" (sum), "m" (dst_step), "m" (src_step),
+ "m" (sum_step), "m" (old_ebx)
+ : "esi", "edi", "edx", "ecx", "eax"
+ );
+}
#define MIX_AREAS1 mix_areas1
#define MIX_AREAS2 mix_areas2
+#define MIX_AREAS3 mix_areas3
#define LOCK_PREFIX ""
#include "pcm_dmix_x86_64.h"
#undef MIX_AREAS1
#undef MIX_AREAS2
+#undef MIX_AREAS3
#undef LOCK_PREFIX
#define MIX_AREAS1 mix_areas1_smp
#define MIX_AREAS2 mix_areas2_smp
+#define MIX_AREAS3 mix_areas3_smp
#define LOCK_PREFIX "lock ; "
#include "pcm_dmix_x86_64.h"
#undef MIX_AREAS1
#undef MIX_AREAS2
+#undef MIX_AREAS3
#undef LOCK_PREFIX
static unsigned long long dmix_supported_format =
- (1ULL << SND_PCM_FORMAT_S16_LE) | (1ULL << SND_PCM_FORMAT_S32_LE);
+ (1ULL << SND_PCM_FORMAT_S16_LE) |
+ (1ULL << SND_PCM_FORMAT_S32_LE) |
+ (1ULL << SND_PCM_FORMAT_S24_3LE);
static void mix_select_callbacks(snd_pcm_direct_t *dmix)
{
// printf("SMP: %i\n", smp);
dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp : mix_areas1;
dmix->u.dmix.mix_areas2 = smp > 1 ? mix_areas2_smp : mix_areas2;
+ dmix->u.dmix.mix_areas3 = smp > 1 ? mix_areas3_smp : mix_areas3;
}
);
}
+/*
+ * 24-bit version
+ */
+static void MIX_AREAS3(unsigned int size,
+ volatile unsigned char *dst, unsigned char *src,
+ volatile signed int *sum, size_t dst_step,
+ size_t src_step, size_t sum_step)
+{
+ unsigned long long old_rbx;
+
+ /*
+ * RSI - src
+ * RDI - dst
+ * RBX - sum
+ * ECX - old sample
+ * EAX - sample / temporary
+ * EDX - temporary
+ */
+ __asm__ __volatile__ (
+ "\n"
+
+ "\tmovq %%rbx, %7\n"
+ /*
+ * initialization, load ESI, EDI, EBX registers
+ */
+ "\tmovq %1, %%rdi\n"
+ "\tmovq %2, %%rsi\n"
+ "\tmovq %3, %%rbx\n"
+
+ /*
+ * while (size-- > 0) {
+ */
+ "\tcmpl $0, %0\n"
+ "jz 6f\n"
+
+ "\t.p2align 4,,15\n"
+
+ "1:"
+
+ /*
+ * sample = *src;
+ * sum_sample = *sum;
+ * if (test_and_set_bit(0, dst) == 0)
+ * sample -= sum_sample;
+ * *sum += sample;
+ */
+ "\tmovsbl 2(%%rsi), %%eax\n"
+ "\tmovswl (%%rsi), %%ecx\n"
+ "\tmovl (%%rbx), %%edx\n"
+ "\tsall $16, %%eax\n"
+ "\t" LOCK_PREFIX "btsl $0, (%%rdi)\n"
+ "\tleal (%%ecx,%%eax,1), %%ecx\n"
+ "\tjc 2f\n"
+ "\tsubl %%edx, %%ecx\n"
+ "2:"
+ "\t" LOCK_PREFIX "addl %%ecx, (%%rbx)\n"
+
+ /*
+ * do {
+ * sample = old_sample = *sum;
+ * saturate(sample);
+ * *dst = sample | 1;
+ * } while (old_sample != *sum);
+ */
+
+ "3:"
+ "\tmovl (%%rbx), %%ecx\n"
+
+ "\tmovl $0x7fffff, %%eax\n"
+ "\tmovl $-0x7fffff, %%edx\n"
+ "\tcmpl %%eax, %%ecx\n"
+ "\tcmovng %%ecx, %%eax\n"
+ "\tcmpl %%edx, %%ecx\n"
+ "\tcmovl %%edx, %%eax\n"
+
+ "\torl $1, %%eax\n"
+ "\tmovw %%ax, (%%rdi)\n"
+ "\tshrl $16, %%eax\n"
+ "\tmovb %%al, 2(%%rdi)\n"
+
+ "\tcmpl %%ecx, (%%rbx)\n"
+ "\tjnz 3b\n"
+
+ /*
+ * while (size-- > 0)
+ */
+ "\tadd %4, %%rdi\n"
+ "\tadd %5, %%rsi\n"
+ "\tadd %6, %%rbx\n"
+ "\tdecl %0\n"
+ "\tjnz 1b\n"
+
+ "6:"
+ "\tmovq %7, %%rbx\n"
+
+ : /* no output regs */
+ : "m" (size), "m" (dst), "m" (src),
+ "m" (sum), "m" (dst_step), "m" (src_step),
+ "m" (sum_step), "m" (old_rbx)
+ : "rsi", "rdi", "edx", "ecx", "eax"
+ );
+}