#include <stdlib.h>
#include <stdio.h>
#include <string.h>
+#include <unistd.h>
+#include <sched.h>
+#include <sys/time.h>
#define rdtscll(val) \
__asm__ __volatile__("rdtsc" : "=A" (val))
typedef short int s16;
typedef int s32;
+#if 0
#define CONFIG_SMP
+#endif
#ifdef CONFIG_SMP
#define LOCK_PREFIX "lock ; "
:"ir" (v));
}
+static double detect_cpu_clock()
+{
+ struct timeval tm_begin, tm_end;
+ unsigned long long tsc_begin, tsc_end;
+
+ /* Warm cache */
+ gettimeofday(&tm_begin, 0);
+
+ rdtscll(tsc_begin);
+ gettimeofday(&tm_begin, 0);
+
+ usleep(1000000);
+
+ rdtscll(tsc_end);
+ gettimeofday(&tm_end, 0);
+
+ return (tsc_end - tsc_begin) / (tm_end.tv_sec - tm_begin.tv_sec + (tm_end.tv_usec - tm_begin.tv_usec) / 1e6);
+}
+
void mix_areas0(unsigned int size,
volatile s16 *dst, s16 *src,
volatile s32 *sum,
{
while (size-- > 0) {
s32 sample = *dst + *src;
- if (unlikely(sample & 0xffff0000))
- *dst = sample > 0 ? 0x7fff : -0x8000;
+ if (unlikely(sample < -0x8000))
+ *dst = -0x8000;
+ else if (unlikely(sample > 0x7fff))
+ *dst = 0x7fff;
else
*dst = sample;
((char *)dst) += dst_step;
((char *)src) += src_step;
+ ((char *)sum) += sum_step;
}
}
"\tcmp $0, %%edx\n"
"jz 6f\n"
+ "\t.p2align 4,,15\n"
+
"1:"
/*
*/
"\tmovw $0, %%ax\n"
"\tmovw $1, %%cx\n"
- "\tlock; cmpxchgw %%cx, (%%edi)\n"
+ "\t" LOCK_PREFIX "cmpxchgw %%cx, (%%edi)\n"
"\tmovswl (%%esi), %%ecx\n"
"\tjnz 2f\n"
"\tsubl (%%ebx), %%ecx\n"
"2:"
- "\tlock; addl %%ecx, (%%ebx)\n"
+ "\t" LOCK_PREFIX "addl %%ecx, (%%ebx)\n"
/*
* do {
* sample > 0x7fff
*/
+ "\t.p2align 4,,15\n"
+
"4:"
- "\tmovw $0x7fff, %%ax\n"
- "\tmovw %%ax, (%%edi)\n"
+ "\tmovw $0x7fff, (%%edi)\n"
"\tcmpl %%ecx,(%%ebx)\n"
"\tjnz 3b\n"
"\tadd %4, %%edi\n"
* sample < -0x8000
*/
+ "\t.p2align 4,,15\n"
+
"5:"
- "\tmovw $-0x8000, %%ax\n"
- "\tmovw %%ax, (%%edi)\n"
+ "\tmovw $-0x8000, (%%edi)\n"
"\tcmpl %%ecx, (%%ebx)\n"
"\tjnz 3b\n"
"\tadd %4, %%edi\n"
* while (size-- > 0) {
*/
"\tcmp $0, %%edx\n"
- "jz 6f\n"
+ "\tjz 6f\n"
+
+ "\t.p2align 4,,15\n"
"1:"
*/
"\tmovw $0, %%ax\n"
"\tmovw $1, %%cx\n"
- "\tlock; cmpxchgw %%cx, (%%edi)\n"
+ "\t" LOCK_PREFIX "cmpxchgw %%cx, (%%edi)\n"
"\tmovswl (%%esi), %%ecx\n"
"\tjnz 2f\n"
"\tsubl (%%ebx), %%ecx\n"
"2:"
- "\tlock; addl %%ecx, (%%ebx)\n"
+ "\t" LOCK_PREFIX "addl %%ecx, (%%ebx)\n"
/*
* do {
void mix_areas2(unsigned int size,
- volatile s16 *dst, s16 *src,
+ volatile s16 *dst, const s16 *src,
volatile s32 *sum,
unsigned int dst_step,
- unsigned int src_step,
- unsigned int sum_step)
+ unsigned int src_step)
{
while (size-- > 0) {
s32 sample = *src;
atomic_add(sum, sample);
do {
sample = *sum;
- s16 s;
- if (unlikely(sample & 0x7fff0000))
- s = sample > 0 ? 0x7fff : -0x8000;
+ if (unlikely(sample < -0x8000))
+ *dst = -0x8000;
+ else if (unlikely(sample > 0x7fff))
+ *dst = 0x7fff;
else
- s = sample;
- *dst = s;
+ *dst = sample;
} while (unlikely(sample != *sum));
- ((char *)sum) += sum_step;
+ sum++;
((char *)dst) += dst_step;
((char *)src) += src_step;
}
}
-int main(int argc, char **argv)
+void setscheduler(void)
{
- int size = 2048, n = 4, max = 0x7fff;
- int i;
- unsigned long long begin, end;
+ struct sched_param sched_param;
+ if (sched_getparam(0, &sched_param) < 0) {
+ printf("Scheduler getparam failed...\n");
+ return;
+ }
+ sched_param.sched_priority = sched_get_priority_max(SCHED_RR);
+ if (!sched_setscheduler(0, SCHED_RR, &sched_param)) {
+ printf("Scheduler set to Round Robin with priority %i...\n", sched_param.sched_priority);
+ fflush(stdout);
+ return;
+ }
+ printf("!!!Scheduler set to Round Robin with priority %i FAILED!!!\n", sched_param.sched_priority);
+}
+
+#define CACHE_SIZE (1024*1024)
+
+void init(s16 *dst, s32 *sum, int size)
+{
+ int count;
+ char *a;
+
+ for (count = size - 1; count >= 0; count--)
+ *sum++ = 0;
+ for (count = size - 1; count >= 0; count--)
+ *dst++ = 0;
+ a = malloc(CACHE_SIZE);
+ for (count = CACHE_SIZE - 1; count >= 0; count--) {
+ a[count] = count & 0xff;
+ a[count] ^= 0x55;
+ a[count] ^= 0xaa;
+ }
+ free(a);
+}
+
+int main(int argc, char **argv)
+{
+ int size = 2048, n = 4, max = 32267;
+ int LOOP = 30;
+ int i, t;
+ unsigned long long begin, end, diff, diff0, diff1, diff1_mmx, diff2;
+ double cpu_clock = detect_cpu_clock();
+
+ setscheduler();
+ printf("CPU clock: %fMhz\n\n", cpu_clock / 10e5);
if (argc == 4) {
size = atoi(argv[1]);
n = atoi(argv[2]);
*s = (rand() % (max * 2)) - max;
}
}
- rdtscll(begin);
- for (i = 0; i < n; i++) {
- mix_areas0(size, dst, srcs[i], sum, 2, 2, 4);
+
+ for (t = 0, diff0 = -1; t < LOOP; t++) {
+ init(dst, sum, size);
+ rdtscll(begin);
+ for (i = 0; i < n; i++) {
+ mix_areas0(size, dst, srcs[i], sum, 2, 2, 4);
+ }
+ rdtscll(end);
+ diff = end - begin;
+ if (diff < diff0)
+ diff0 = diff;
+ printf("mix_areas0 : %lld \r", diff); fflush(stdout);
}
- rdtscll(end);
- printf("mix_areas0 : %lld\n", end - begin);
- rdtscll(begin);
- for (i = 0; i < n; i++) {
- mix_areas1(size, dst, srcs[i], sum, 2, 2, 4);
+
+ for (t = 0, diff1 = -1; t < LOOP; t++) {
+ init(dst, sum, size);
+ rdtscll(begin);
+ for (i = 0; i < n; i++) {
+ mix_areas1(size, dst, srcs[i], sum, 2, 2, 4);
+ }
+ rdtscll(end);
+ diff = end - begin;
+ if (diff < diff1)
+ diff1 = diff;
+ printf("mix_areas1 : %lld \r", diff); fflush(stdout);
}
- rdtscll(end);
- printf("mix_areas1 : %lld\n", end - begin);
- rdtscll(begin);
- for (i = 0; i < n; i++) {
- mix_areas1_mmx(size, dst, srcs[i], sum, 2, 2, 4);
+
+ for (t = 0, diff1_mmx = -1; t < LOOP; t++) {
+ init(dst, sum, size);
+ rdtscll(begin);
+ for (i = 0; i < n; i++) {
+ mix_areas1_mmx(size, dst, srcs[i], sum, 2, 2, 4);
+ }
+ rdtscll(end);
+ diff = end - begin;
+ if (diff < diff1_mmx)
+ diff1_mmx = diff;
+ printf("mix_areas1_mmx: %lld \r", diff); fflush(stdout);
}
- rdtscll(end);
- printf("mix_areas1_mmx: %lld\n", end - begin);
- rdtscll(begin);
- for (i = 0; i < n; i++) {
- mix_areas2(size, dst, srcs[i], sum, 2, 2, 4);
+
+ for (t = 0, diff2 = -1; t < LOOP; t++) {
+ init(dst, sum, size);
+ rdtscll(begin);
+ for (i = 0; i < n; i++) {
+ mix_areas2(size, dst, srcs[i], sum, 2, 2);
+ }
+ rdtscll(end);
+ diff = end - begin;
+ if (diff < diff2)
+ diff2 = diff;
+ printf("mix_areas2 : %lld \r", diff); fflush(stdout);
}
- rdtscll(end);
- printf("mix_areas2 : %lld\n", end - begin);
+
+ printf(" \r");
+ printf("Summary (the best times):\n");
+ printf("mix_areas0 : %lld\n", diff0);
+ printf("mix_areas1 : %lld\n", diff1);
+ printf("mix_areas1_mmx: %lld\n", diff1_mmx);
+ printf("mix_areas2 : %lld\n", diff2);
+
return 0;
}