From: Jaroslav Kysela Date: Thu, 20 Feb 2003 17:10:22 +0000 (+0000) Subject: Moved asm i386 code to pcm_dmix_i386.h; UP optimization X-Git-Tag: v1.0.3~259 X-Git-Url: https://git.alsa-project.org/?a=commitdiff_plain;h=1c908a75c166aa70e302556aecad223e6fd349df;p=alsa-lib.git Moved asm i386 code to pcm_dmix_i386.h; UP optimization --- diff --git a/src/pcm/pcm_dmix.c b/src/pcm/pcm_dmix.c index d45f1d5a..5ab09a7a 100644 --- a/src/pcm/pcm_dmix.c +++ b/src/pcm/pcm_dmix.c @@ -571,241 +571,50 @@ static int check_interleave(snd_pcm_dmix_t *dmix) #define ADD_AND_SATURATE -/* - * for plain i386 - */ -static void mix_areas1(unsigned int size, - volatile signed short *dst, signed short *src, - volatile signed int *sum, unsigned int dst_step, - unsigned int src_step, unsigned int sum_step) -{ - /* - * ESI - src - * EDI - dst - * EBX - sum - * ECX - old sample - * EAX - sample / temporary - * EDX - size - */ - __asm__ __volatile__ ( - "\n" - - /* - * initialization, load EDX, ESI, EDI, EBX registers - */ - "\tmovl %0, %%edx\n" - "\tmovl %1, %%edi\n" - "\tmovl %2, %%esi\n" - "\tmovl %3, %%ebx\n" - - /* - * while (size-- > 0) { - */ - "\tcmp $0, %%edx\n" - "jz 6f\n" - - "\t.p2align 4,,15\n" - - "1:" - - /* - * sample = *src; - * if (cmpxchg(*dst, 0, 1) == 0) - * sample -= *sum; - * xadd(*sum, sample); - */ - "\tmovw $0, %%ax\n" - "\tmovw $1, %%cx\n" - "\tlock; cmpxchgw %%cx, (%%edi)\n" - "\tmovswl (%%esi), %%ecx\n" - "\tjnz 2f\n" - "\tsubl (%%ebx), %%ecx\n" - "2:" - "\tlock; addl %%ecx, (%%ebx)\n" - - /* - * do { - * sample = old_sample = *sum; - * saturate(v); - * *dst = sample; - * } while (v != *sum); - */ - - "3:" - "\tmovl (%%ebx), %%ecx\n" - "\tcmpl $0x7fff,%%ecx\n" - "\tjg 4f\n" - "\tcmpl $-0x8000,%%ecx\n" - "\tjl 5f\n" - "\tmovw %%cx, (%%edi)\n" - "\tcmpl %%ecx, (%%ebx)\n" - "\tjnz 3b\n" - - /* - * while (size-- > 0) - */ - "\tadd %4, %%edi\n" - "\tadd %5, %%esi\n" - "\tadd %6, %%ebx\n" - "\tdecl %%edx\n" - "\tjnz 1b\n" - "\tjmp 6f\n" - - /* - * sample > 0x7fff - */ - - "\t.p2align 4,,15\n" - - "4:" - "\tmovw $0x7fff, %%ax\n" - "\tmovw %%ax, (%%edi)\n" - "\tcmpl %%ecx,(%%ebx)\n" - "\tjnz 3b\n" - "\tadd %4, %%edi\n" - "\tadd %5, %%esi\n" - "\tadd %6, %%ebx\n" - "\tdecl %%edx\n" - "\tjnz 1b\n" - "\tjmp 6f\n" - - /* - * sample < -0x8000 - */ - - "\t.p2align 4,,15\n" - - "5:" - "\tmovw $-0x8000, %%ax\n" - "\tmovw %%ax, (%%edi)\n" - "\tcmpl %%ecx, (%%ebx)\n" - "\tjnz 3b\n" - "\tadd %4, %%edi\n" - "\tadd %5, %%esi\n" - "\tadd %6, %%ebx\n" - "\tdecl %%edx\n" - "\tjnz 1b\n" - // "\tjmp 6f\n" - - "6:" - - : /* no output regs */ - : "m" (size), "m" (dst), "m" (src), "m" (sum), "m" (dst_step), "m" (src_step), "m" (sum_step) - : "esi", "edi", "edx", "ecx", "ebx", "eax" - ); -} - -/* - * MMX optimized - */ -static void mix_areas1_mmx(unsigned int size, - volatile signed short *dst, signed short *src, - volatile signed int *sum, unsigned int dst_step, - unsigned int src_step, unsigned int sum_step) -{ - /* - * ESI - src - * EDI - dst - * EBX - sum - * ECX - old sample - * EAX - sample / temporary - * EDX - size - */ - __asm__ __volatile__ ( - "\n" - - /* - * initialization, load EDX, ESI, EDI, EBX registers - */ - "\tmovl %0, %%edx\n" - "\tmovl %1, %%edi\n" - "\tmovl %2, %%esi\n" - "\tmovl %3, %%ebx\n" - - /* - * while (size-- > 0) { - */ - "\tcmp $0, %%edx\n" - "jz 6f\n" - - "\t.p2align 4,,15\n" - - "1:" - - /* - * sample = *src; - * if (cmpxchg(*dst, 0, 1) == 0) - * sample -= *sum; - * xadd(*sum, sample); - */ - "\tmovw $0, %%ax\n" - "\tmovw $1, %%cx\n" - "\tlock; cmpxchgw %%cx, (%%edi)\n" - "\tmovswl (%%esi), %%ecx\n" - "\tjnz 2f\n" - "\tsubl (%%ebx), %%ecx\n" - "2:" - "\tlock; addl %%ecx, (%%ebx)\n" - - /* - * do { - * sample = old_sample = *sum; - * saturate(v); - * *dst = sample; - * } while (v != *sum); - */ - - "3:" - "\tmovl (%%ebx), %%ecx\n" - "\tmovd %%ecx, %%mm0\n" - "\tpackssdw %%mm1, %%mm0\n" - "\tmovd %%mm0, %%eax\n" - "\tmovw %%ax, (%%edi)\n" - "\tcmpl %%ecx, (%%ebx)\n" - "\tjnz 3b\n" - - /* - * while (size-- > 0) - */ - "\tadd %4, %%edi\n" - "\tadd %5, %%esi\n" - "\tadd %6, %%ebx\n" - "\tdecl %%edx\n" - "\tjnz 1b\n" - "\tjmp 6f\n" - - "6:" - - "\temms\n" - - : /* no output regs */ - : "m" (size), "m" (dst), "m" (src), "m" (sum), "m" (dst_step), "m" (src_step), "m" (sum_step) - : "esi", "edi", "edx", "ecx", "ebx", "eax" - ); -} +#define MIX_AREAS1 mix_areas1 +#define MIX_AREAS1_MMX mix_areas1_mmx +#define LOCK_PREFIX "" +#include "pcm_dmix_i386.h" +#undef MIX_AREAS1 +#undef MIX_AREAS1_MMX +#undef LOCK_PREFIX + +#define MIX_AREAS1 mix_areas1_smp +#define MIX_AREAS1_MMX mix_areas1_smp_mmx +#define LOCK_PREFIX "lock ; " +#include "pcm_dmix_i386.h" +#undef MIX_AREAS1 +#undef MIX_AREAS1_MMX +#undef LOCK_PREFIX static void mix_select_callbacks(snd_pcm_dmix_t *dmix) { FILE *in; char line[255]; + int smp = 0, mmx = 0; /* safe settings for all i386 CPUs */ - dmix->mix_areas1 = mix_areas1; + dmix->mix_areas1 = mix_areas1_smp; /* try to determine, if we have a MMX capable CPU */ in = fopen("/proc/cpuinfo", "r"); if (in == NULL) return; while (!feof(in)) { fgets(line, sizeof(line), in); - if (!strncmp(line, "flags", 5)) { - fclose(in); - if (strstr(line, " mmx")) { - // printf("Selecting MMX mix_areas1\n"); - dmix->mix_areas1 = mix_areas1_mmx; - } - return; + if (!strncmp(line, "processor", 9)) + smp++; + else if (!strncmp(line, "flags", 5)) { + if (strstr(line, " mmx")) + mmx = 1; } } + fclose(in); + printf("MMX: %i, SMP: %i\n", mmx, smp); + if (mmx) { + dmix->mix_areas1 = smp > 1 ? mix_areas1_smp_mmx : mix_areas1_mmx; + } else { + dmix->mix_areas1 = smp > 1 ? mix_areas1_smp : mix_areas1; + } } #endif diff --git a/src/pcm/pcm_dmix_i386.h b/src/pcm/pcm_dmix_i386.h new file mode 100644 index 00000000..d860f81a --- /dev/null +++ b/src/pcm/pcm_dmix_i386.h @@ -0,0 +1,240 @@ +/** + * \file pcm/pcm_dmix_i386.h + * \ingroup PCM_Plugins + * \brief PCM Direct Stream Mixing (dmix) Plugin Interface - I386 assembler code + * \author Jaroslav Kysela + * \date 2002 + */ +/* + * PCM - Direct Stream Mixing + * Copyright (c) 2000 by Jaroslav Kysela + * + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* + * for plain i386 + */ +static void MIX_AREAS1(unsigned int size, + volatile signed short *dst, signed short *src, + volatile signed int *sum, unsigned int dst_step, + unsigned int src_step, unsigned int sum_step) +{ + /* + * ESI - src + * EDI - dst + * EBX - sum + * ECX - old sample + * EAX - sample / temporary + * EDX - size + */ + __asm__ __volatile__ ( + "\n" + + /* + * initialization, load EDX, ESI, EDI, EBX registers + */ + "\tmovl %0, %%edx\n" + "\tmovl %1, %%edi\n" + "\tmovl %2, %%esi\n" + "\tmovl %3, %%ebx\n" + + /* + * while (size-- > 0) { + */ + "\tcmp $0, %%edx\n" + "jz 6f\n" + + "\t.p2align 4,,15\n" + + "1:" + + /* + * sample = *src; + * if (cmpxchg(*dst, 0, 1) == 0) + * sample -= *sum; + * xadd(*sum, sample); + */ + "\tmovw $0, %%ax\n" + "\tmovw $1, %%cx\n" + "\t" LOCK_PREFIX "cmpxchgw %%cx, (%%edi)\n" + "\tmovswl (%%esi), %%ecx\n" + "\tjnz 2f\n" + "\tsubl (%%ebx), %%ecx\n" + "2:" + "\t" LOCK_PREFIX "addl %%ecx, (%%ebx)\n" + + /* + * do { + * sample = old_sample = *sum; + * saturate(v); + * *dst = sample; + * } while (v != *sum); + */ + + "3:" + "\tmovl (%%ebx), %%ecx\n" + "\tcmpl $0x7fff,%%ecx\n" + "\tjg 4f\n" + "\tcmpl $-0x8000,%%ecx\n" + "\tjl 5f\n" + "\tmovw %%cx, (%%edi)\n" + "\tcmpl %%ecx, (%%ebx)\n" + "\tjnz 3b\n" + + /* + * while (size-- > 0) + */ + "\tadd %4, %%edi\n" + "\tadd %5, %%esi\n" + "\tadd %6, %%ebx\n" + "\tdecl %%edx\n" + "\tjnz 1b\n" + "\tjmp 6f\n" + + /* + * sample > 0x7fff + */ + + "\t.p2align 4,,15\n" + + "4:" + "\tmovw $0x7fff, %%ax\n" + "\tmovw %%ax, (%%edi)\n" + "\tcmpl %%ecx,(%%ebx)\n" + "\tjnz 3b\n" + "\tadd %4, %%edi\n" + "\tadd %5, %%esi\n" + "\tadd %6, %%ebx\n" + "\tdecl %%edx\n" + "\tjnz 1b\n" + "\tjmp 6f\n" + + /* + * sample < -0x8000 + */ + + "\t.p2align 4,,15\n" + + "5:" + "\tmovw $-0x8000, %%ax\n" + "\tmovw %%ax, (%%edi)\n" + "\tcmpl %%ecx, (%%ebx)\n" + "\tjnz 3b\n" + "\tadd %4, %%edi\n" + "\tadd %5, %%esi\n" + "\tadd %6, %%ebx\n" + "\tdecl %%edx\n" + "\tjnz 1b\n" + // "\tjmp 6f\n" + + "6:" + + : /* no output regs */ + : "m" (size), "m" (dst), "m" (src), "m" (sum), "m" (dst_step), "m" (src_step), "m" (sum_step) + : "esi", "edi", "edx", "ecx", "ebx", "eax" + ); +} + +/* + * MMX optimized + */ +static void MIX_AREAS1_MMX(unsigned int size, + volatile signed short *dst, signed short *src, + volatile signed int *sum, unsigned int dst_step, + unsigned int src_step, unsigned int sum_step) +{ + /* + * ESI - src + * EDI - dst + * EBX - sum + * ECX - old sample + * EAX - sample / temporary + * EDX - size + */ + __asm__ __volatile__ ( + "\n" + + /* + * initialization, load EDX, ESI, EDI, EBX registers + */ + "\tmovl %0, %%edx\n" + "\tmovl %1, %%edi\n" + "\tmovl %2, %%esi\n" + "\tmovl %3, %%ebx\n" + + /* + * while (size-- > 0) { + */ + "\tcmp $0, %%edx\n" + "jz 6f\n" + + "\t.p2align 4,,15\n" + + "1:" + + /* + * sample = *src; + * if (cmpxchg(*dst, 0, 1) == 0) + * sample -= *sum; + * xadd(*sum, sample); + */ + "\tmovw $0, %%ax\n" + "\tmovw $1, %%cx\n" + "\t" LOCK_PREFIX "cmpxchgw %%cx, (%%edi)\n" + "\tmovswl (%%esi), %%ecx\n" + "\tjnz 2f\n" + "\tsubl (%%ebx), %%ecx\n" + "2:" + "\t" LOCK_PREFIX "addl %%ecx, (%%ebx)\n" + + /* + * do { + * sample = old_sample = *sum; + * saturate(v); + * *dst = sample; + * } while (v != *sum); + */ + + "3:" + "\tmovl (%%ebx), %%ecx\n" + "\tmovd %%ecx, %%mm0\n" + "\tpackssdw %%mm1, %%mm0\n" + "\tmovd %%mm0, %%eax\n" + "\tmovw %%ax, (%%edi)\n" + "\tcmpl %%ecx, (%%ebx)\n" + "\tjnz 3b\n" + + /* + * while (size-- > 0) + */ + "\tadd %4, %%edi\n" + "\tadd %5, %%esi\n" + "\tadd %6, %%ebx\n" + "\tdecl %%edx\n" + "\tjnz 1b\n" + "\tjmp 6f\n" + + "6:" + + "\temms\n" + + : /* no output regs */ + : "m" (size), "m" (dst), "m" (src), "m" (sum), "m" (dst_step), "m" (src_step), "m" (sum_step) + : "esi", "edi", "edx", "ecx", "ebx", "eax" + ); +}