]> git.alsa-project.org Git - alsa-lib.git/commitdiff
Added code.c to measure times for critical part of dmix plugin
authorJaroslav Kysela <perex@perex.cz>
Wed, 19 Feb 2003 21:00:45 +0000 (21:00 +0000)
committerJaroslav Kysela <perex@perex.cz>
Wed, 19 Feb 2003 21:00:45 +0000 (21:00 +0000)
test/code.c [new file with mode: 0644]

diff --git a/test/code.c b/test/code.c
new file mode 100644 (file)
index 0000000..1d4e0ac
--- /dev/null
@@ -0,0 +1,352 @@
+#include <stdlib.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define rdtscll(val) \
+     __asm__ __volatile__("rdtsc" : "=A" (val))
+
+#define likely(x)       __builtin_expect((x),1)
+#define unlikely(x)     __builtin_expect((x),0)
+
+typedef short int s16;
+typedef int s32;
+
+#define CONFIG_SMP
+
+#ifdef CONFIG_SMP
+#define LOCK_PREFIX "lock ; "
+#else
+#define LOCK_PREFIX ""
+#endif
+
+struct __xchg_dummy { unsigned long a[100]; };
+#define __xg(x) ((struct __xchg_dummy *)(x))
+
+static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+                                     unsigned long new, int size)
+{
+       unsigned long prev;
+       switch (size) {
+       case 1:
+               __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
+                                    : "=a"(prev)
+                                    : "q"(new), "m"(*__xg(ptr)), "0"(old)
+                                    : "memory");
+               return prev;
+       case 2:
+               __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
+                                    : "=a"(prev)
+                                    : "q"(new), "m"(*__xg(ptr)), "0"(old)
+                                    : "memory");
+               return prev;
+       case 4:
+               __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
+                                    : "=a"(prev)
+                                    : "q"(new), "m"(*__xg(ptr)), "0"(old)
+                                    : "memory");
+               return prev;
+       }
+       return old;
+}
+
+#define cmpxchg(ptr,o,n)\
+       ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
+                                       (unsigned long)(n),sizeof(*(ptr))))
+
+static inline void atomic_add(volatile int *dst, int v)
+{
+       __asm__ __volatile__(
+               LOCK_PREFIX "addl %1,%0"
+               :"=m" (*dst)
+               :"ir" (v));
+}
+
+void mix_areas0(unsigned int size,
+               volatile s16 *dst, s16 *src,
+               volatile s32 *sum,
+               unsigned int dst_step,
+               unsigned int src_step,
+               unsigned int sum_step)
+{
+       while (size-- > 0) {
+               s32 sample = *dst + *src;
+               if (unlikely(sample & 0xffff0000))
+                       *dst = sample > 0 ? 0x7fff : -0x8000;
+               else
+                       *dst = sample;
+               ((char *)dst) += dst_step;
+               ((char *)src) += src_step;
+       }
+}
+
+void mix_areas1(unsigned int size,
+               volatile s16 *dst, s16 *src,
+               volatile s32 *sum, unsigned int dst_step,
+               unsigned int src_step, unsigned int sum_step)
+{
+       /*
+        *  ESI - src
+        *  EDI - dst
+        *  EBX - sum
+        *  ECX - old sample
+        *  EAX - sample / temporary
+        *  EDX - size
+        */
+       __asm__ __volatile__ (
+               "\n"
+
+               /*
+                *  initialization, load EDX, ESI, EDI, EBX registers
+                */
+               "\tmovl %0, %%edx\n"
+               "\tmovl %1, %%edi\n"
+               "\tmovl %2, %%esi\n"
+               "\tmovl %3, %%ebx\n"
+
+               /*
+                * while (size-- > 0) {
+                */
+               "\tcmp $0, %%edx\n"
+               "jz 6f\n"
+
+               "1:"
+
+               /*
+                *   sample = *src;
+                *   if (cmpxchg(*dst, 0, 1) == 0)
+                *     sample -= *sum;
+                *   xadd(*sum, sample);
+                */
+               "\tmovw $0, %%ax\n"
+               "\tmovw $1, %%cx\n"
+               "\tlock; cmpxchgw %%cx, (%%edi)\n"
+               "\tmovswl (%%esi), %%ecx\n"
+               "\tjnz 2f\n"
+               "\tsubl (%%ebx), %%ecx\n"
+               "2:"
+               "\tlock; addl %%ecx, (%%ebx)\n"
+
+               /*
+                *   do {
+                *     sample = old_sample = *sum;
+                *     saturate(v);
+                *     *dst = sample;
+                *   } while (v != *sum);
+                */
+
+               "3:"
+               "\tmovl (%%ebx), %%ecx\n"
+               "\tcmpl $0x7fff,%%ecx\n"
+               "\tjg 4f\n"
+               "\tcmpl $-0x8000,%%ecx\n"
+               "\tjl 5f\n"
+               "\tmovw %%cx, (%%edi)\n"
+               "\tcmpl %%ecx, (%%ebx)\n"
+               "\tjnz 3b\n"
+
+               /*
+                * while (size-- > 0)
+                */
+               "\tadd %4, %%edi\n"
+               "\tadd %5, %%esi\n"
+               "\tadd %6, %%ebx\n"
+               "\tdecl %%edx\n"
+               "\tjnz 1b\n"
+               "\tjmp 6f\n"
+
+               /*
+                *  sample > 0x7fff
+                */
+
+               "4:"
+               "\tmovw $0x7fff, %%ax\n"
+               "\tmovw %%ax, (%%edi)\n"
+               "\tcmpl %%ecx,(%%ebx)\n"
+               "\tjnz 3b\n"
+               "\tadd %4, %%edi\n"
+               "\tadd %5, %%esi\n"
+               "\tadd %6, %%ebx\n"
+               "\tdecl %%edx\n"
+               "\tjnz 1b\n"
+               "\tjmp 6f\n"
+
+               /*
+                *  sample < -0x8000
+                */
+
+               "5:"
+               "\tmovw $-0x8000, %%ax\n"
+               "\tmovw %%ax, (%%edi)\n"
+               "\tcmpl %%ecx, (%%ebx)\n"
+               "\tjnz 3b\n"
+               "\tadd %4, %%edi\n"
+               "\tadd %5, %%esi\n"
+               "\tadd %6, %%ebx\n"
+               "\tdecl %%edx\n"
+               "\tjnz 1b\n"
+               // "\tjmp 6f\n"
+               
+               "6:"
+
+               : /* no output regs */
+               : "m" (size), "m" (dst), "m" (src), "m" (sum), "m" (dst_step), "m" (src_step), "m" (sum_step)
+               : "esi", "edi", "edx", "ecx", "ebx", "eax"
+       );
+}
+
+
+void mix_areas1_mmx(unsigned int size,
+                   volatile s16 *dst, s16 *src,
+                   volatile s32 *sum, unsigned int dst_step,
+                   unsigned int src_step, unsigned int sum_step)
+{
+       /*
+        *  ESI - src
+        *  EDI - dst
+        *  EBX - sum
+        *  ECX - old sample
+        *  EAX - sample / temporary
+        *  EDX - size
+        */
+       __asm__ __volatile__ (
+               "\n"
+
+               /*
+                *  initialization, load EDX, ESI, EDI, EBX registers
+                */
+               "\tmovl %0, %%edx\n"
+               "\tmovl %1, %%edi\n"
+               "\tmovl %2, %%esi\n"
+               "\tmovl %3, %%ebx\n"
+
+               /*
+                * while (size-- > 0) {
+                */
+               "\tcmp $0, %%edx\n"
+               "jz 6f\n"
+
+               "1:"
+
+               /*
+                *   sample = *src;
+                *   if (cmpxchg(*dst, 0, 1) == 0)
+                *     sample -= *sum;
+                *   xadd(*sum, sample);
+                */
+               "\tmovw $0, %%ax\n"
+               "\tmovw $1, %%cx\n"
+               "\tlock; cmpxchgw %%cx, (%%edi)\n"
+               "\tmovswl (%%esi), %%ecx\n"
+               "\tjnz 2f\n"
+               "\tsubl (%%ebx), %%ecx\n"
+               "2:"
+               "\tlock; addl %%ecx, (%%ebx)\n"
+
+               /*
+                *   do {
+                *     sample = old_sample = *sum;
+                *     saturate(v);
+                *     *dst = sample;
+                *   } while (v != *sum);
+                */
+
+               "3:"
+               "\tmovl (%%ebx), %%ecx\n"
+               "\tmovd %%ecx, %%mm0\n"
+               "\tpackssdw %%mm1, %%mm0\n"
+               "\tmovd %%mm0, %%eax\n"
+               "\tmovw %%ax, (%%edi)\n"
+               "\tcmpl %%ecx, (%%ebx)\n"
+               "\tjnz 3b\n"
+
+               /*
+                * while (size-- > 0)
+                */
+               "\tadd %4, %%edi\n"
+               "\tadd %5, %%esi\n"
+               "\tadd %6, %%ebx\n"
+               "\tdecl %%edx\n"
+               "\tjnz 1b\n"
+               "\tjmp 6f\n"
+               
+               "6:"
+
+               "\temms\n"
+
+               : /* no output regs */
+               : "m" (size), "m" (dst), "m" (src), "m" (sum), "m" (dst_step), "m" (src_step), "m" (sum_step)
+               : "esi", "edi", "edx", "ecx", "ebx", "eax"
+       );
+}
+
+
+void mix_areas2(unsigned int size,
+               volatile s16 *dst, s16 *src,
+               volatile s32 *sum,
+               unsigned int dst_step,
+               unsigned int src_step,
+               unsigned int sum_step)
+{
+       while (size-- > 0) {
+               s32 sample = *src;
+               if (cmpxchg(dst, 0, 1) == 0)
+                       sample -= *sum;
+               atomic_add(sum, sample);
+               do {
+                       sample = *sum;
+                       s16 s;
+                       if (unlikely(sample & 0x7fff0000))
+                               s = sample > 0 ? 0x7fff : -0x8000;
+                       else
+                               s = sample;
+                       *dst = s;
+               } while (unlikely(sample != *sum));
+               ((char *)sum) += sum_step;
+               ((char *)dst) += dst_step;
+               ((char *)src) += src_step;
+       }
+}
+
+int main(int argc, char **argv)
+{
+       int size = atoi(argv[1]);
+       int n = atoi(argv[2]);
+       int max = atoi(argv[3]);
+       int i;
+       unsigned long long begin, end;
+       s16 *dst = malloc(sizeof(*dst) * size);
+       s32 *sum = calloc(size, sizeof(*sum));
+       s16 **srcs = malloc(sizeof(*srcs) * n);
+       for (i = 0; i < n; i++) {
+               int k;
+               s16 *s;
+               srcs[i] = s = malloc(sizeof(s16) * size);
+               for (k = 0; k < size; ++k, ++s) {
+                       *s = (rand() % (max * 2)) - max;
+               }
+       }
+       rdtscll(begin);
+       for (i = 0; i < n; i++) {
+               mix_areas0(size, dst, srcs[i], sum, 2, 2, 4);
+       }
+       rdtscll(end);
+       printf("mix_areas0    : %lld\n", end - begin);
+       rdtscll(begin);
+       for (i = 0; i < n; i++) {
+               mix_areas1(size, dst, srcs[i], sum, 2, 2, 4);
+       }
+       rdtscll(end);
+       printf("mix_areas1    : %lld\n", end - begin);
+       rdtscll(begin);
+       for (i = 0; i < n; i++) {
+               mix_areas1_mmx(size, dst, srcs[i], sum, 2, 2, 4);
+       }
+       rdtscll(end);
+       printf("mix_areas1_mmx: %lld\n", end - begin);
+       rdtscll(begin);
+       for (i = 0; i < n; i++) {
+               mix_areas2(size, dst, srcs[i], sum, 2, 2, 4);
+       }
+       rdtscll(end);
+       printf("mix_areas2    : %lld\n", end - begin);
+}