From: Jaroslav Kysela Date: Wed, 19 Feb 2003 21:00:45 +0000 (+0000) Subject: Added code.c to measure times for critical part of dmix plugin X-Git-Tag: v1.0.3~269 X-Git-Url: https://git.alsa-project.org/?a=commitdiff_plain;h=2322f9fec3995a217f2c327cecdfe54e44494c9f;p=alsa-lib.git Added code.c to measure times for critical part of dmix plugin --- diff --git a/test/code.c b/test/code.c new file mode 100644 index 00000000..1d4e0acc --- /dev/null +++ b/test/code.c @@ -0,0 +1,352 @@ +#include +#include +#include + +#define rdtscll(val) \ + __asm__ __volatile__("rdtsc" : "=A" (val)) + +#define likely(x) __builtin_expect((x),1) +#define unlikely(x) __builtin_expect((x),0) + +typedef short int s16; +typedef int s32; + +#define CONFIG_SMP + +#ifdef CONFIG_SMP +#define LOCK_PREFIX "lock ; " +#else +#define LOCK_PREFIX "" +#endif + +struct __xchg_dummy { unsigned long a[100]; }; +#define __xg(x) ((struct __xchg_dummy *)(x)) + +static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, + unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: + __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: + __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + } + return old; +} + +#define cmpxchg(ptr,o,n)\ + ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ + (unsigned long)(n),sizeof(*(ptr)))) + +static inline void atomic_add(volatile int *dst, int v) +{ + __asm__ __volatile__( + LOCK_PREFIX "addl %1,%0" + :"=m" (*dst) + :"ir" (v)); +} + +void mix_areas0(unsigned int size, + volatile s16 *dst, s16 *src, + volatile s32 *sum, + unsigned int dst_step, + unsigned int src_step, + unsigned int sum_step) +{ + while (size-- > 0) { + s32 sample = *dst + *src; + if (unlikely(sample & 0xffff0000)) + *dst = sample > 0 ? 0x7fff : -0x8000; + else + *dst = sample; + ((char *)dst) += dst_step; + ((char *)src) += src_step; + } +} + +void mix_areas1(unsigned int size, + volatile s16 *dst, s16 *src, + volatile s32 *sum, unsigned int dst_step, + unsigned int src_step, unsigned int sum_step) +{ + /* + * ESI - src + * EDI - dst + * EBX - sum + * ECX - old sample + * EAX - sample / temporary + * EDX - size + */ + __asm__ __volatile__ ( + "\n" + + /* + * initialization, load EDX, ESI, EDI, EBX registers + */ + "\tmovl %0, %%edx\n" + "\tmovl %1, %%edi\n" + "\tmovl %2, %%esi\n" + "\tmovl %3, %%ebx\n" + + /* + * while (size-- > 0) { + */ + "\tcmp $0, %%edx\n" + "jz 6f\n" + + "1:" + + /* + * sample = *src; + * if (cmpxchg(*dst, 0, 1) == 0) + * sample -= *sum; + * xadd(*sum, sample); + */ + "\tmovw $0, %%ax\n" + "\tmovw $1, %%cx\n" + "\tlock; cmpxchgw %%cx, (%%edi)\n" + "\tmovswl (%%esi), %%ecx\n" + "\tjnz 2f\n" + "\tsubl (%%ebx), %%ecx\n" + "2:" + "\tlock; addl %%ecx, (%%ebx)\n" + + /* + * do { + * sample = old_sample = *sum; + * saturate(v); + * *dst = sample; + * } while (v != *sum); + */ + + "3:" + "\tmovl (%%ebx), %%ecx\n" + "\tcmpl $0x7fff,%%ecx\n" + "\tjg 4f\n" + "\tcmpl $-0x8000,%%ecx\n" + "\tjl 5f\n" + "\tmovw %%cx, (%%edi)\n" + "\tcmpl %%ecx, (%%ebx)\n" + "\tjnz 3b\n" + + /* + * while (size-- > 0) + */ + "\tadd %4, %%edi\n" + "\tadd %5, %%esi\n" + "\tadd %6, %%ebx\n" + "\tdecl %%edx\n" + "\tjnz 1b\n" + "\tjmp 6f\n" + + /* + * sample > 0x7fff + */ + + "4:" + "\tmovw $0x7fff, %%ax\n" + "\tmovw %%ax, (%%edi)\n" + "\tcmpl %%ecx,(%%ebx)\n" + "\tjnz 3b\n" + "\tadd %4, %%edi\n" + "\tadd %5, %%esi\n" + "\tadd %6, %%ebx\n" + "\tdecl %%edx\n" + "\tjnz 1b\n" + "\tjmp 6f\n" + + /* + * sample < -0x8000 + */ + + "5:" + "\tmovw $-0x8000, %%ax\n" + "\tmovw %%ax, (%%edi)\n" + "\tcmpl %%ecx, (%%ebx)\n" + "\tjnz 3b\n" + "\tadd %4, %%edi\n" + "\tadd %5, %%esi\n" + "\tadd %6, %%ebx\n" + "\tdecl %%edx\n" + "\tjnz 1b\n" + // "\tjmp 6f\n" + + "6:" + + : /* no output regs */ + : "m" (size), "m" (dst), "m" (src), "m" (sum), "m" (dst_step), "m" (src_step), "m" (sum_step) + : "esi", "edi", "edx", "ecx", "ebx", "eax" + ); +} + + +void mix_areas1_mmx(unsigned int size, + volatile s16 *dst, s16 *src, + volatile s32 *sum, unsigned int dst_step, + unsigned int src_step, unsigned int sum_step) +{ + /* + * ESI - src + * EDI - dst + * EBX - sum + * ECX - old sample + * EAX - sample / temporary + * EDX - size + */ + __asm__ __volatile__ ( + "\n" + + /* + * initialization, load EDX, ESI, EDI, EBX registers + */ + "\tmovl %0, %%edx\n" + "\tmovl %1, %%edi\n" + "\tmovl %2, %%esi\n" + "\tmovl %3, %%ebx\n" + + /* + * while (size-- > 0) { + */ + "\tcmp $0, %%edx\n" + "jz 6f\n" + + "1:" + + /* + * sample = *src; + * if (cmpxchg(*dst, 0, 1) == 0) + * sample -= *sum; + * xadd(*sum, sample); + */ + "\tmovw $0, %%ax\n" + "\tmovw $1, %%cx\n" + "\tlock; cmpxchgw %%cx, (%%edi)\n" + "\tmovswl (%%esi), %%ecx\n" + "\tjnz 2f\n" + "\tsubl (%%ebx), %%ecx\n" + "2:" + "\tlock; addl %%ecx, (%%ebx)\n" + + /* + * do { + * sample = old_sample = *sum; + * saturate(v); + * *dst = sample; + * } while (v != *sum); + */ + + "3:" + "\tmovl (%%ebx), %%ecx\n" + "\tmovd %%ecx, %%mm0\n" + "\tpackssdw %%mm1, %%mm0\n" + "\tmovd %%mm0, %%eax\n" + "\tmovw %%ax, (%%edi)\n" + "\tcmpl %%ecx, (%%ebx)\n" + "\tjnz 3b\n" + + /* + * while (size-- > 0) + */ + "\tadd %4, %%edi\n" + "\tadd %5, %%esi\n" + "\tadd %6, %%ebx\n" + "\tdecl %%edx\n" + "\tjnz 1b\n" + "\tjmp 6f\n" + + "6:" + + "\temms\n" + + : /* no output regs */ + : "m" (size), "m" (dst), "m" (src), "m" (sum), "m" (dst_step), "m" (src_step), "m" (sum_step) + : "esi", "edi", "edx", "ecx", "ebx", "eax" + ); +} + + +void mix_areas2(unsigned int size, + volatile s16 *dst, s16 *src, + volatile s32 *sum, + unsigned int dst_step, + unsigned int src_step, + unsigned int sum_step) +{ + while (size-- > 0) { + s32 sample = *src; + if (cmpxchg(dst, 0, 1) == 0) + sample -= *sum; + atomic_add(sum, sample); + do { + sample = *sum; + s16 s; + if (unlikely(sample & 0x7fff0000)) + s = sample > 0 ? 0x7fff : -0x8000; + else + s = sample; + *dst = s; + } while (unlikely(sample != *sum)); + ((char *)sum) += sum_step; + ((char *)dst) += dst_step; + ((char *)src) += src_step; + } +} + +int main(int argc, char **argv) +{ + int size = atoi(argv[1]); + int n = atoi(argv[2]); + int max = atoi(argv[3]); + int i; + unsigned long long begin, end; + s16 *dst = malloc(sizeof(*dst) * size); + s32 *sum = calloc(size, sizeof(*sum)); + s16 **srcs = malloc(sizeof(*srcs) * n); + for (i = 0; i < n; i++) { + int k; + s16 *s; + srcs[i] = s = malloc(sizeof(s16) * size); + for (k = 0; k < size; ++k, ++s) { + *s = (rand() % (max * 2)) - max; + } + } + rdtscll(begin); + for (i = 0; i < n; i++) { + mix_areas0(size, dst, srcs[i], sum, 2, 2, 4); + } + rdtscll(end); + printf("mix_areas0 : %lld\n", end - begin); + rdtscll(begin); + for (i = 0; i < n; i++) { + mix_areas1(size, dst, srcs[i], sum, 2, 2, 4); + } + rdtscll(end); + printf("mix_areas1 : %lld\n", end - begin); + rdtscll(begin); + for (i = 0; i < n; i++) { + mix_areas1_mmx(size, dst, srcs[i], sum, 2, 2, 4); + } + rdtscll(end); + printf("mix_areas1_mmx: %lld\n", end - begin); + rdtscll(begin); + for (i = 0; i < n; i++) { + mix_areas2(size, dst, srcs[i], sum, 2, 2, 4); + } + rdtscll(end); + printf("mix_areas2 : %lld\n", end - begin); +}