From 243027a824eab8daa10a9c6dfe59fdb59b01f257 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 17 Sep 2003 17:09:45 +0000 Subject: [PATCH] - support of dmix asm code for x86-64. --- src/pcm/Makefile.am | 2 +- src/pcm/pcm_direct.h | 8 +- src/pcm/pcm_dmix.c | 72 +++++++++--- src/pcm/pcm_dmix_i386.h | 18 +-- src/pcm/pcm_dmix_x86_64.h | 228 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 298 insertions(+), 30 deletions(-) create mode 100644 src/pcm/pcm_dmix_x86_64.h diff --git a/src/pcm/Makefile.am b/src/pcm/Makefile.am index 6d7281b8..c66b8560 100644 --- a/src/pcm/Makefile.am +++ b/src/pcm/Makefile.am @@ -14,7 +14,7 @@ libpcm_la_SOURCES = atomic.c mask.c interval.c \ pcm_symbols.c noinst_HEADERS = pcm_local.h pcm_plugin.h mask.h mask_inline.h \ interval.h interval_inline.h plugin_ops.h ladspa.h \ - pcm_direct.h pcm_dmix_i386.h + pcm_direct.h pcm_dmix_i386.h pcm_dmix_x86_64.h alsadir = $(datadir)/alsa diff --git a/src/pcm/pcm_direct.h b/src/pcm/pcm_direct.h index 48f18998..39c4349d 100644 --- a/src/pcm/pcm_direct.h +++ b/src/pcm/pcm_direct.h @@ -33,13 +33,13 @@ typedef void (mix_areas1_t)(unsigned int size, volatile signed short *dst, signed short *src, - volatile signed int *sum, unsigned int dst_step, - unsigned int src_step, unsigned int sum_step); + volatile signed int *sum, size_t dst_step, + size_t src_step, size_t sum_step); typedef void (mix_areas2_t)(unsigned int size, volatile signed int *dst, signed int *src, - volatile signed int *sum, unsigned int dst_step, - unsigned int src_step, unsigned int sum_step); + volatile signed int *sum, size_t dst_step, + size_t src_step, size_t sum_step); struct slave_params { snd_pcm_format_t format; diff --git a/src/pcm/pcm_dmix.c b/src/pcm/pcm_dmix.c index f57b3ab3..cda70440 100644 --- a/src/pcm/pcm_dmix.c +++ b/src/pcm/pcm_dmix.c @@ -138,22 +138,20 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix) char line[255]; int smp = 0, mmx = 0; - /* safe settings for all i386 CPUs */ - dmix->u.dmix.mix_areas1 = mix_areas1_smp; /* try to determine, if we have a MMX capable CPU */ in = fopen("/proc/cpuinfo", "r"); - if (in == NULL) - return; - while (!feof(in)) { - fgets(line, sizeof(line), in); - if (!strncmp(line, "processor", 9)) - smp++; - else if (!strncmp(line, "flags", 5)) { - if (strstr(line, " mmx")) - mmx = 1; + if (in) { + while (!feof(in)) { + fgets(line, sizeof(line), in); + if (!strncmp(line, "processor", 9)) + smp++; + else if (!strncmp(line, "flags", 5)) { + if (strstr(line, " mmx")) + mmx = 1; + } } + fclose(in); } - fclose(in); // printf("MMX: %i, SMP: %i\n", mmx, smp); if (mmx) { dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp_mmx : mix_areas1_mmx; @@ -164,12 +162,54 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix) } #endif +#ifdef __x86_64__ +#define ADD_AND_SATURATE + +#define MIX_AREAS1 mix_areas1 +#define MIX_AREAS2 mix_areas2 +#define LOCK_PREFIX "" +#include "pcm_dmix_x86_64.h" +#undef MIX_AREAS1 +#undef MIX_AREAS2 +#undef LOCK_PREFIX + +#define MIX_AREAS1 mix_areas1_smp +#define MIX_AREAS2 mix_areas2_smp +#define LOCK_PREFIX "lock ; " +#include "pcm_dmix_x86_64.h" +#undef MIX_AREAS1 +#undef MIX_AREAS2 +#undef LOCK_PREFIX + +static void mix_select_callbacks(snd_pcm_direct_t *dmix) +{ + FILE *in; + char line[255]; + int smp = 0; + + /* try to determine, if we have SMP */ + in = fopen("/proc/cpuinfo", "r"); + if (in) { + while (!feof(in)) { + fgets(line, sizeof(line), in); + if (!strncmp(line, "processor", 9)) + smp++; + } + fclose(in); + } + // printf("SMP: %i\n", smp); + dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp : mix_areas1; + dmix->u.dmix.mix_areas2 = smp > 1 ? mix_areas2_smp : mix_areas2; +} +#endif + + #ifndef ADD_AND_SATURATE #warning Please, recode mix_areas1() routine to your architecture... static void mix_areas1(unsigned int size, volatile signed short *dst, signed short *src, - volatile signed int *sum, unsigned int dst_step, - unsigned int src_step, unsigned int sum_step) + volatile signed int *sum, size_t dst_step, + size_t src_step, size_t sum_step) { register signed int sample, old_sample; @@ -198,8 +238,8 @@ static void mix_areas1(unsigned int size, #warning Please, recode mix_areas2() routine to your architecture... static void mix_areas2(unsigned int size, volatile signed int *dst, signed int *src, - volatile signed int *sum, unsigned int dst_step, - unsigned int src_step, unsigned int sum_step) + volatile signed int *sum, size_t dst_step, + size_t src_step, size_t sum_step) { register signed int sample, old_sample; diff --git a/src/pcm/pcm_dmix_i386.h b/src/pcm/pcm_dmix_i386.h index 81cfcf4f..15a8e714 100644 --- a/src/pcm/pcm_dmix_i386.h +++ b/src/pcm/pcm_dmix_i386.h @@ -31,8 +31,8 @@ */ static void MIX_AREAS1(unsigned int size, volatile signed short *dst, signed short *src, - volatile signed int *sum, unsigned int dst_step, - unsigned int src_step, unsigned int sum_step) + volatile signed int *sum, size_t dst_step, + size_t src_step, size_t sum_step) { /* * ESI - src @@ -55,7 +55,7 @@ static void MIX_AREAS1(unsigned int size, /* * while (size-- > 0) { */ - "\tcmp $0, %0\n" + "\tcmpl $0, %0\n" "jz 6f\n" "\t.p2align 4,,15\n" @@ -154,8 +154,8 @@ static void MIX_AREAS1(unsigned int size, */ static void MIX_AREAS1_MMX(unsigned int size, volatile signed short *dst, signed short *src, - volatile signed int *sum, unsigned int dst_step, - unsigned int src_step, unsigned int sum_step) + volatile signed int *sum, size_t dst_step, + size_t src_step, size_t sum_step) { /* * ESI - src @@ -178,7 +178,7 @@ static void MIX_AREAS1_MMX(unsigned int size, /* * while (size-- > 0) { */ - "\tcmp $0, %0\n" + "\tcmpl $0, %0\n" "jz 6f\n" "\t.p2align 4,,15\n" @@ -244,8 +244,8 @@ static void MIX_AREAS1_MMX(unsigned int size, */ static void MIX_AREAS2(unsigned int size, volatile signed int *dst, signed int *src, - volatile signed int *sum, unsigned int dst_step, - unsigned int src_step, unsigned int sum_step) + volatile signed int *sum, size_t dst_step, + size_t src_step, size_t sum_step) { /* * ESI - src @@ -268,7 +268,7 @@ static void MIX_AREAS2(unsigned int size, /* * while (size-- > 0) { */ - "\tcmp $0, %0\n" + "\tcmpl $0, %0\n" "jz 6f\n" "\t.p2align 4,,15\n" diff --git a/src/pcm/pcm_dmix_x86_64.h b/src/pcm/pcm_dmix_x86_64.h new file mode 100644 index 00000000..f4f5ba34 --- /dev/null +++ b/src/pcm/pcm_dmix_x86_64.h @@ -0,0 +1,228 @@ +/** + * \file pcm/pcm_dmix_x86_64.h + * \ingroup PCM_Plugins + * \brief PCM Direct Stream Mixing (dmix) Plugin Interface - X86-64 assembler code + * \author Takashi Iwai + * \date 2003 + */ +/* + * PCM - Direct Stream Mixing + * Copyright (c) 2003 by Jaroslav Kysela + * Takashi Iwai + * + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* + * MMX optimized + */ +static void MIX_AREAS1(unsigned int size, + volatile signed short *dst, signed short *src, + volatile signed int *sum, size_t dst_step, + size_t src_step, size_t sum_step) +{ + /* + * ESI - src + * EDI - dst + * EBX - sum + * ECX - old sample + * EAX - sample / temporary + * EDX - temporary + */ + __asm__ __volatile__ ( + "\n" + + /* + * initialization, load ESI, EDI, EBX registers + */ + "\tmovq %1, %%rdi\n" + "\tmovq %2, %%rsi\n" + "\tmovq %3, %%rbx\n" + + /* + * while (size-- > 0) { + */ + "\tcmpl $0, %0\n" + "jz 6f\n" + + "\t.p2align 4,,15\n" + + "1:" + + /* + * sample = *src; + * sum_sample = *sum; + * if (cmpxchg(*dst, 0, 1) == 0) + * sample -= sum_sample; + * xadd(*sum, sample); + */ + "\tmovw $0, %%ax\n" + "\tmovw $1, %%cx\n" + "\tmovl (%%rbx), %%edx\n" + "\t" LOCK_PREFIX "cmpxchgw %%cx, (%%rdi)\n" + "\tmovswl (%%rsi), %%ecx\n" + "\tjnz 2f\n" + "\tsubl %%edx, %%ecx\n" + "2:" + "\t" LOCK_PREFIX "addl %%ecx, (%%rbx)\n" + + /* + * do { + * sample = old_sample = *sum; + * saturate(v); + * *dst = sample; + * } while (v != *sum); + */ + + "3:" + "\tmovl (%%rbx), %%ecx\n" + "\tmovd %%ecx, %%mm0\n" + "\tpackssdw %%mm1, %%mm0\n" + "\tmovd %%mm0, %%eax\n" + "\tmovw %%ax, (%%rdi)\n" + "\tcmpl %%ecx, (%%rbx)\n" + "\tjnz 3b\n" + + /* + * while (size-- > 0) + */ + "\tadd %4, %%rdi\n" + "\tadd %5, %%rsi\n" + "\tadd %6, %%rbx\n" + "\tdecl %0\n" + "\tjnz 1b\n" + "\tjmp 6f\n" + + "6:" + + "\temms\n" + + : /* no output regs */ + : "m" (size), "m" (dst), "m" (src), "m" (sum), "m" (dst_step), "m" (src_step), "m" (sum_step) + : "rsi", "rdi", "edx", "ecx", "rbx", "eax" + ); +} + +/* + * 32-bit version (24-bit resolution) + */ +static void MIX_AREAS2(unsigned int size, + volatile signed int *dst, signed int *src, + volatile signed int *sum, size_t dst_step, + size_t src_step, size_t sum_step) +{ + /* + * ESI - src + * EDI - dst + * EBX - sum + * ECX - old sample + * EAX - sample / temporary + * EDX - temporary + */ + __asm__ __volatile__ ( + "\n" + + /* + * initialization, load ESI, EDI, EBX registers + */ + "\tmovq %1, %%rdi\n" + "\tmovq %2, %%rsi\n" + "\tmovq %3, %%rbx\n" + + /* + * while (size-- > 0) { + */ + "\tcmpl $0, %0\n" + "jz 6f\n" + + "\t.p2align 4,,15\n" + + "1:" + + /* + * sample = *src; + * sum_sample = *sum; + * if (cmpxchg(*dst, 0, 1) == 0) + * sample -= sum_sample; + * xadd(*sum, sample); + */ + "\tmovl $0, %%eax\n" + "\tmovl $1, %%ecx\n" + "\tmovl (%%rbx), %%edx\n" + "\t" LOCK_PREFIX "cmpxchgl %%ecx, (%%rdi)\n" + "\tjnz 2f\n" + "\tmovl (%%rsi), %%ecx\n" + /* sample >>= 8 */ + "\tsarl $8, %%ecx\n" + "\tsubl %%edx, %%ecx\n" + "\tjmp 21f\n" + "2:" + "\tmovl (%%rsi), %%ecx\n" + /* sample >>= 8 */ + "\tsarl $8, %%ecx\n" + "21:" + "\t" LOCK_PREFIX "addl %%ecx, (%%rbx)\n" + + /* + * do { + * sample = old_sample = *sum; + * saturate(v); + * *dst = sample; + * } while (v != *sum); + */ + + "3:" + "\tmovl (%%rbx), %%ecx\n" + /* + * if (sample > 0x7fff00) + */ + "\tmovl $0x7fffff, %%eax\n" + "\tcmpl %%eax, %%ecx\n" + "\tjg 4f\n" + /* + * if (sample < -0x800000) + */ + "\tmovl $-0x800000, %%eax\n" + "\tcmpl %%eax, %%ecx\n" + "\tjl 4f\n" + "\tmovl %%ecx, %%eax\n" + "4:" + /* + * sample <<= 8; + */ + "\tsall $8, %%eax\n" + "\tmovl %%eax, (%%rdi)\n" + "\tcmpl %%ecx, (%%rbx)\n" + "\tjnz 3b\n" + + /* + * while (size-- > 0) + */ + "\tadd %4, %%rdi\n" + "\tadd %5, %%rsi\n" + "\tadd %6, %%rbx\n" + "\tdecl %0\n" + "\tjnz 1b\n" + // "\tjmp 6f\n" + + "6:" + : /* no output regs */ + : "m" (size), "m" (dst), "m" (src), "m" (sum), "m" (dst_step), "m" (src_step), "m" (sum_step) + : "rsi", "rdi", "edx", "ecx", "rbx", "eax" + ); +} + -- 2.47.1