]> git.alsa-project.org Git - alsa-lib.git/commitdiff
- support of dmix asm code for x86-64.
authorTakashi Iwai <tiwai@suse.de>
Wed, 17 Sep 2003 17:09:45 +0000 (17:09 +0000)
committerTakashi Iwai <tiwai@suse.de>
Wed, 17 Sep 2003 17:09:45 +0000 (17:09 +0000)
src/pcm/Makefile.am
src/pcm/pcm_direct.h
src/pcm/pcm_dmix.c
src/pcm/pcm_dmix_i386.h
src/pcm/pcm_dmix_x86_64.h [new file with mode: 0644]

index 6d7281b8425f344e75f29818bf726a4a9eb50973..c66b8560d2488508d680627f87f8a566cf8a0aea 100644 (file)
@@ -14,7 +14,7 @@ libpcm_la_SOURCES = atomic.c mask.c interval.c \
                    pcm_symbols.c
 noinst_HEADERS = pcm_local.h pcm_plugin.h mask.h mask_inline.h \
                 interval.h interval_inline.h plugin_ops.h ladspa.h \
-                pcm_direct.h pcm_dmix_i386.h
+                pcm_direct.h pcm_dmix_i386.h pcm_dmix_x86_64.h
 
 alsadir = $(datadir)/alsa
 
index 48f18998a60274d05871b5c04c3e43a783f3edf1..39c4349df7558156fe9cc23bfd3f868f64a53f26 100644 (file)
 
 typedef void (mix_areas1_t)(unsigned int size,
                        volatile signed short *dst, signed short *src,
-                       volatile signed int *sum, unsigned int dst_step,
-                       unsigned int src_step, unsigned int sum_step);
+                       volatile signed int *sum, size_t dst_step,
+                       size_t src_step, size_t sum_step);
 
 typedef void (mix_areas2_t)(unsigned int size,
                        volatile signed int *dst, signed int *src,
-                       volatile signed int *sum, unsigned int dst_step,
-                       unsigned int src_step, unsigned int sum_step);
+                       volatile signed int *sum, size_t dst_step,
+                       size_t src_step, size_t sum_step);
 
 struct slave_params {
        snd_pcm_format_t format;
index f57b3ab36e4370ca666ede2561f783686ddf6ece..cda70440195a8310e3c6fc4037bbf346e97d26c7 100644 (file)
@@ -138,22 +138,20 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix)
        char line[255];
        int smp = 0, mmx = 0;
        
-       /* safe settings for all i386 CPUs */
-       dmix->u.dmix.mix_areas1 = mix_areas1_smp;
        /* try to determine, if we have a MMX capable CPU */
        in = fopen("/proc/cpuinfo", "r");
-       if (in == NULL)
-               return;
-       while (!feof(in)) {
-               fgets(line, sizeof(line), in);
-               if (!strncmp(line, "processor", 9))
-                       smp++;
-               else if (!strncmp(line, "flags", 5)) {
-                       if (strstr(line, " mmx"))
-                               mmx = 1;
+       if (in) {
+               while (!feof(in)) {
+                       fgets(line, sizeof(line), in);
+                       if (!strncmp(line, "processor", 9))
+                               smp++;
+                       else if (!strncmp(line, "flags", 5)) {
+                               if (strstr(line, " mmx"))
+                                       mmx = 1;
+                       }
                }
+               fclose(in);
        }
-       fclose(in);
        // printf("MMX: %i, SMP: %i\n", mmx, smp);
        if (mmx) {
                dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp_mmx : mix_areas1_mmx;
@@ -164,12 +162,54 @@ static void mix_select_callbacks(snd_pcm_direct_t *dmix)
 }
 #endif
 
+#ifdef __x86_64__
+#define ADD_AND_SATURATE
+
+#define MIX_AREAS1 mix_areas1
+#define MIX_AREAS2 mix_areas2
+#define LOCK_PREFIX ""
+#include "pcm_dmix_x86_64.h"
+#undef MIX_AREAS1
+#undef MIX_AREAS2
+#undef LOCK_PREFIX
+
+#define MIX_AREAS1 mix_areas1_smp
+#define MIX_AREAS2 mix_areas2_smp
+#define LOCK_PREFIX "lock ; "
+#include "pcm_dmix_x86_64.h"
+#undef MIX_AREAS1
+#undef MIX_AREAS2
+#undef LOCK_PREFIX
+static void mix_select_callbacks(snd_pcm_direct_t *dmix)
+{
+       FILE *in;
+       char line[255];
+       int smp = 0;
+       
+       /* try to determine, if we have SMP */
+       in = fopen("/proc/cpuinfo", "r");
+       if (in) {
+               while (!feof(in)) {
+                       fgets(line, sizeof(line), in);
+                       if (!strncmp(line, "processor", 9))
+                               smp++;
+               }
+               fclose(in);
+       }
+       // printf("SMP: %i\n", smp);
+       dmix->u.dmix.mix_areas1 = smp > 1 ? mix_areas1_smp : mix_areas1;
+       dmix->u.dmix.mix_areas2 = smp > 1 ? mix_areas2_smp : mix_areas2;
+}
+#endif
+
+
 #ifndef ADD_AND_SATURATE
 #warning Please, recode mix_areas1() routine to your architecture...
 static void mix_areas1(unsigned int size,
                       volatile signed short *dst, signed short *src,
-                      volatile signed int *sum, unsigned int dst_step,
-                      unsigned int src_step, unsigned int sum_step)
+                      volatile signed int *sum, size_t dst_step,
+                      size_t src_step, size_t sum_step)
 {
        register signed int sample, old_sample;
 
@@ -198,8 +238,8 @@ static void mix_areas1(unsigned int size,
 #warning Please, recode mix_areas2() routine to your architecture...
 static void mix_areas2(unsigned int size,
                       volatile signed int *dst, signed int *src,
-                      volatile signed int *sum, unsigned int dst_step,
-                      unsigned int src_step, unsigned int sum_step)
+                      volatile signed int *sum, size_t dst_step,
+                      size_t src_step, size_t sum_step)
 {
        register signed int sample, old_sample;
 
index 81cfcf4f71c59abdce6955af40d6ab30d35069a8..15a8e714daf8d87987234dc3754c20e684fb8a68 100644 (file)
@@ -31,8 +31,8 @@
  */
 static void MIX_AREAS1(unsigned int size,
                       volatile signed short *dst, signed short *src,
-                      volatile signed int *sum, unsigned int dst_step,
-                      unsigned int src_step, unsigned int sum_step)
+                      volatile signed int *sum, size_t dst_step,
+                      size_t src_step, size_t sum_step)
 {
        /*
         *  ESI - src
@@ -55,7 +55,7 @@ static void MIX_AREAS1(unsigned int size,
                /*
                 * while (size-- > 0) {
                 */
-               "\tcmp $0, %0\n"
+               "\tcmpl $0, %0\n"
                "jz 6f\n"
 
                "\t.p2align 4,,15\n"
@@ -154,8 +154,8 @@ static void MIX_AREAS1(unsigned int size,
  */
 static void MIX_AREAS1_MMX(unsigned int size,
                           volatile signed short *dst, signed short *src,
-                          volatile signed int *sum, unsigned int dst_step,
-                          unsigned int src_step, unsigned int sum_step)
+                          volatile signed int *sum, size_t dst_step,
+                          size_t src_step, size_t sum_step)
 {
        /*
         *  ESI - src
@@ -178,7 +178,7 @@ static void MIX_AREAS1_MMX(unsigned int size,
                /*
                 * while (size-- > 0) {
                 */
-               "\tcmp $0, %0\n"
+               "\tcmpl $0, %0\n"
                "jz 6f\n"
 
                "\t.p2align 4,,15\n"
@@ -244,8 +244,8 @@ static void MIX_AREAS1_MMX(unsigned int size,
  */
 static void MIX_AREAS2(unsigned int size,
                       volatile signed int *dst, signed int *src,
-                      volatile signed int *sum, unsigned int dst_step,
-                      unsigned int src_step, unsigned int sum_step)
+                      volatile signed int *sum, size_t dst_step,
+                      size_t src_step, size_t sum_step)
 {
        /*
         *  ESI - src
@@ -268,7 +268,7 @@ static void MIX_AREAS2(unsigned int size,
                /*
                 * while (size-- > 0) {
                 */
-               "\tcmp $0, %0\n"
+               "\tcmpl $0, %0\n"
                "jz 6f\n"
 
                "\t.p2align 4,,15\n"
diff --git a/src/pcm/pcm_dmix_x86_64.h b/src/pcm/pcm_dmix_x86_64.h
new file mode 100644 (file)
index 0000000..f4f5ba3
--- /dev/null
@@ -0,0 +1,228 @@
+/**
+ * \file pcm/pcm_dmix_x86_64.h
+ * \ingroup PCM_Plugins
+ * \brief PCM Direct Stream Mixing (dmix) Plugin Interface - X86-64 assembler code
+ * \author Takashi Iwai <tiwai@suse.de>
+ * \date 2003
+ */
+/*
+ *  PCM - Direct Stream Mixing
+ *  Copyright (c) 2003 by Jaroslav Kysela <perex@suse.cz>
+ *                        Takashi Iwai <tiwai@suse.de>
+ *
+ *
+ *   This library is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU Lesser General Public License as
+ *   published by the Free Software Foundation; either version 2.1 of
+ *   the License, or (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU Lesser General Public License for more details.
+ *
+ *   You should have received a copy of the GNU Lesser General Public
+ *   License along with this library; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ */
+
+/*
+ *  MMX optimized
+ */
+static void MIX_AREAS1(unsigned int size,
+                      volatile signed short *dst, signed short *src,
+                      volatile signed int *sum, size_t dst_step,
+                      size_t src_step, size_t sum_step)
+{
+       /*
+        *  ESI - src
+        *  EDI - dst
+        *  EBX - sum
+        *  ECX - old sample
+        *  EAX - sample / temporary
+        *  EDX - temporary
+        */
+       __asm__ __volatile__ (
+               "\n"
+
+               /*
+                *  initialization, load ESI, EDI, EBX registers
+                */
+               "\tmovq %1, %%rdi\n"
+               "\tmovq %2, %%rsi\n"
+               "\tmovq %3, %%rbx\n"
+
+               /*
+                * while (size-- > 0) {
+                */
+               "\tcmpl $0, %0\n"
+               "jz 6f\n"
+
+               "\t.p2align 4,,15\n"
+
+               "1:"
+
+               /*
+                *   sample = *src;
+                *   sum_sample = *sum;
+                *   if (cmpxchg(*dst, 0, 1) == 0)
+                *     sample -= sum_sample;
+                *   xadd(*sum, sample);
+                */
+               "\tmovw $0, %%ax\n"
+               "\tmovw $1, %%cx\n"
+               "\tmovl (%%rbx), %%edx\n"
+               "\t" LOCK_PREFIX "cmpxchgw %%cx, (%%rdi)\n"
+               "\tmovswl (%%rsi), %%ecx\n"
+               "\tjnz 2f\n"
+               "\tsubl %%edx, %%ecx\n"
+               "2:"
+               "\t" LOCK_PREFIX "addl %%ecx, (%%rbx)\n"
+
+               /*
+                *   do {
+                *     sample = old_sample = *sum;
+                *     saturate(v);
+                *     *dst = sample;
+                *   } while (v != *sum);
+                */
+
+               "3:"
+               "\tmovl (%%rbx), %%ecx\n"
+               "\tmovd %%ecx, %%mm0\n"
+               "\tpackssdw %%mm1, %%mm0\n"
+               "\tmovd %%mm0, %%eax\n"
+               "\tmovw %%ax, (%%rdi)\n"
+               "\tcmpl %%ecx, (%%rbx)\n"
+               "\tjnz 3b\n"
+
+               /*
+                * while (size-- > 0)
+                */
+               "\tadd %4, %%rdi\n"
+               "\tadd %5, %%rsi\n"
+               "\tadd %6, %%rbx\n"
+               "\tdecl %0\n"
+               "\tjnz 1b\n"
+               "\tjmp 6f\n"
+
+               "6:"
+               
+               "\temms\n"
+
+               : /* no output regs */
+               : "m" (size), "m" (dst), "m" (src), "m" (sum), "m" (dst_step), "m" (src_step), "m" (sum_step)
+               : "rsi", "rdi", "edx", "ecx", "rbx", "eax"
+       );
+}
+
+/*
+ *  32-bit version (24-bit resolution)
+ */
+static void MIX_AREAS2(unsigned int size,
+                      volatile signed int *dst, signed int *src,
+                      volatile signed int *sum, size_t dst_step,
+                      size_t src_step, size_t sum_step)
+{
+       /*
+        *  ESI - src
+        *  EDI - dst
+        *  EBX - sum
+        *  ECX - old sample
+        *  EAX - sample / temporary
+        *  EDX - temporary
+        */
+       __asm__ __volatile__ (
+               "\n"
+
+               /*
+                *  initialization, load ESI, EDI, EBX registers
+                */
+               "\tmovq %1, %%rdi\n"
+               "\tmovq %2, %%rsi\n"
+               "\tmovq %3, %%rbx\n"
+
+               /*
+                * while (size-- > 0) {
+                */
+               "\tcmpl $0, %0\n"
+               "jz 6f\n"
+
+               "\t.p2align 4,,15\n"
+
+               "1:"
+
+               /*
+                *   sample = *src;
+                *   sum_sample = *sum;
+                *   if (cmpxchg(*dst, 0, 1) == 0)
+                *     sample -= sum_sample;
+                *   xadd(*sum, sample);
+                */
+               "\tmovl $0, %%eax\n"
+               "\tmovl $1, %%ecx\n"
+               "\tmovl (%%rbx), %%edx\n"
+               "\t" LOCK_PREFIX "cmpxchgl %%ecx, (%%rdi)\n"
+               "\tjnz 2f\n"
+               "\tmovl (%%rsi), %%ecx\n"
+               /* sample >>= 8 */
+               "\tsarl $8, %%ecx\n"
+               "\tsubl %%edx, %%ecx\n"
+               "\tjmp 21f\n"
+               "2:"
+               "\tmovl (%%rsi), %%ecx\n"
+               /* sample >>= 8 */
+               "\tsarl $8, %%ecx\n"
+               "21:"
+               "\t" LOCK_PREFIX "addl %%ecx, (%%rbx)\n"
+
+               /*
+                *   do {
+                *     sample = old_sample = *sum;
+                *     saturate(v);
+                *     *dst = sample;
+                *   } while (v != *sum);
+                */
+
+               "3:"
+               "\tmovl (%%rbx), %%ecx\n"
+               /*
+                *  if (sample > 0x7fff00)
+                */
+               "\tmovl $0x7fffff, %%eax\n"
+               "\tcmpl %%eax, %%ecx\n"
+               "\tjg 4f\n"
+               /*
+                *  if (sample < -0x800000)
+                */
+               "\tmovl $-0x800000, %%eax\n"
+               "\tcmpl %%eax, %%ecx\n"
+               "\tjl 4f\n"
+               "\tmovl %%ecx, %%eax\n"
+               "4:"
+               /*
+                *  sample <<= 8;
+                */
+               "\tsall $8, %%eax\n"
+               "\tmovl %%eax, (%%rdi)\n"
+               "\tcmpl %%ecx, (%%rbx)\n"
+               "\tjnz 3b\n"
+
+               /*
+                * while (size-- > 0)
+                */
+               "\tadd %4, %%rdi\n"
+               "\tadd %5, %%rsi\n"
+               "\tadd %6, %%rbx\n"
+               "\tdecl %0\n"
+               "\tjnz 1b\n"
+               // "\tjmp 6f\n"
+               
+               "6:"
+               : /* no output regs */
+               : "m" (size), "m" (dst), "m" (src), "m" (sum), "m" (dst_step), "m" (src_step), "m" (sum_step)
+               : "rsi", "rdi", "edx", "ecx", "rbx", "eax"
+       );
+}
+