blob: 9705c65636da9e78aa32b9819e6cd994af6a1d00 [file] [log] [blame]
Duane Sandd4a80982012-10-12 14:25:19 -07001#include <stdio.h>
2#include <stdlib.h>
3#include <assert.h>
4#include <cutils/memory.h>
5#include <time.h>
6
7/*
8 * All systems must implement or emulate the rdhwr instruction to read
9 * the userlocal register. Systems that emulate also return teh count register
10 * when accessing register $2 so this should work on most systems
11 */
12#define USE_RDHWR
13
14#ifdef USE_RDHWR
15#define UNITS "cycles"
16#define SCALE 2 /* Most CPU's */
17static inline uint32_t
18get_count(void)
19{
20 uint32_t res;
21 asm volatile (".set push; .set mips32r2; rdhwr %[res],$2; .set pop" : [res] "=r" (res) : : "memory");
22 return res;
23}
24#else
25#define UNITS "ns"
26#define SCALE 1
27static inline uint32_t
28get_count(void)
29{
30 struct timespec now;
31 uint32_t res;
32 clock_gettime(CLOCK_REALTIME, &now);
33 res = (uint32_t)(now.tv_sec * 1000000000LL + now.tv_nsec);
34 // printf ("now=%d.%09d res=%d\n", (int)now.tv_sec, (int)now.tv_nsec, res);
35 return res;
36}
37#endif
38
39uint32_t overhead;
40void
41measure_overhead(void)
42{
43 int i;
44 uint32_t start, stop, delta;
45 for (i = 0; i < 32; i++) {
46 start = get_count();
47 stop = get_count();
48 delta = stop - start;
49 if (overhead == 0 || delta < overhead)
50 overhead = delta;
51 }
52 printf("overhead is %d"UNITS"\n", overhead);
53}
54
55uint32_t
56timeone(void (*fn)(), void *d, uint32_t val, uint32_t bytes)
57{
58 uint32_t start, stop, delta;
59 start = get_count();
60 (*fn)(d, val, bytes);
61 stop = get_count();
62 delta = stop - start - overhead;
63 // printf ("start=0x%08x stop=0x%08x delta=0x%08x\n", start, stop, delta);
64 return delta * SCALE;
65}
66
67/* define VERIFY to check that memset only touches the bytes it's supposed to */
68/*#define VERIFY*/
69
70/*
71 * Using a big arena means that memset will most likely miss in the cache
72 * NB Enabling verification effectively warms up the cache...
73 */
74#define ARENASIZE 0x1000000
75#ifdef VERIFY
76char arena[ARENASIZE+8]; /* Allow space for guard words */
77#else
78char arena[ARENASIZE];
79#endif
80
81void
82testone(char *tag, void (*fn)(), int trials, int minbytes, int maxbytes, int size, int threshold)
83{
84 int offset;
85 void *d;
86 void *p;
87 uint32_t v, notv = 0;
88 uint32_t n;
89 int i, units;
90 int totalunits = 0, totalbytes = 0, samples = 0;
91
92 /* Reset RNG to ensure each test uses same random values */
93 srand(0); /* FIXME should be able to use some other seed than 0 */
94
95 for (i = 0; i < trials; i++) {
96 n = minbytes + (rand() % (maxbytes-minbytes)); /* How many bytes to do */
97 offset = ((rand() % (ARENASIZE-n))); /* Where to start */
98
99#ifdef VERIFY
100 offset += 4; /* Allow space for guard word at beginning */
101#endif
102 v = rand();
103
104 /* Adjust alignment and sizes based on transfer size */
105 switch (size) {
106 case 1:
107 v &= 0xff;
108 notv = ~v & 0xff;
109 break;
110 case 2:
111 v &= 0xffff;
112 notv = ~v & 0xffff;
113 offset &= ~1;
114 n &= ~1;
115 break;
116 case 4:
117 notv = ~v;
118 offset &= ~3;
119 n &= ~3;
120 break;
121 }
122
123 d = &arena[offset];
124
125#ifdef VERIFY
126 /* Initialise the area and guard words */
127 for (p = &arena[offset-4]; p < (void *)&arena[offset+n+4]; p = (void *)((uint32_t)p + size)) {
128 if (size == 1)
129 *(uint8_t *)p = notv;
130 else if (size == 2)
131 *(uint16_t *)p = notv;
132 else if (size == 4)
133 *(uint32_t *)p = notv;
134 }
135#endif
136 units = timeone(fn, d, v, n);
137#ifdef VERIFY
138 /* Check the area and guard words */
139 for (p = &arena[offset-4]; p < (void *)&arena[offset+n+4]; p = (void *)((uint32_t)p + size)) {
140 uint32_t got = 0;
141 if (size == 1)
142 got = *(uint8_t *)p;
143 else if (size == 2)
144 got = *(uint16_t *)p;
145 else if (size == 4)
146 got = *(uint32_t *)p;
147 if (p < (void *)&arena[offset]) {
148 if (got != notv)
149 printf ("%s: verify failure: preguard:%p d=%p v=%08x got=%08x n=%d\n", tag, p, d, v, got, n);
150 }
151 else if (p < (void *)&arena[offset+n]) {
152 if (got != v)
153 printf ("%s: verify failure: arena:%p d=%p v=%08x got=%08x n=%d\n", tag, p, d, v, n);
154 }
155 else {
156 if (got != notv)
157 printf ("%s: verify failure: postguard:%p d=%p v=%08x got=%08x n=%d\n", tag, p, d, v, n);
158 }
159 }
160#endif
161
162 /* If the cycle count looks reasonable include it in the statistics */
163 if (units < threshold) {
164 totalbytes += n;
165 totalunits += units;
166 samples++;
167 }
168 }
169
170 printf("%s: samples=%d avglen=%d avg" UNITS "=%d bp"UNITS"=%g\n",
171 tag, samples, totalbytes/samples, totalunits/samples, (double)totalbytes/(double)totalunits);
172}
173
174extern void android_memset32_dumb(uint32_t* dst, uint32_t value, size_t size);
175extern void android_memset16_dumb(uint32_t* dst, uint16_t value, size_t size);
176extern void android_memset32_test(uint32_t* dst, uint32_t value, size_t size);
177extern void android_memset16_test(uint32_t* dst, uint16_t value, size_t size);
178extern void memset_cmips(void* dst, int value, size_t size);
179extern void memset_omips(void* dst, int value, size_t size);
180
181int
182main(int argc, char **argv)
183{
184 int i;
185 struct {
186 char *type;
187 int trials;
188 int minbytes, maxbytes;
189 } *pp, params[] = {
190 {"small", 10000, 0, 64},
191 {"medium", 10000, 64, 512},
192 {"large", 10000, 512, 1280},
193 {"varied", 10000, 0, 1280},
194 };
195#define NPARAMS (sizeof(params)/sizeof(params[0]))
196 struct {
197 char *name;
198 void (*fn)();
199 int size;
200 } *fp, functions[] = {
201 {"dmemset16", (void (*)())android_memset16_dumb, 2},
202 {"tmemset16", (void (*)())android_memset16_test, 2},
203 {"lmemset16", (void (*)())android_memset16, 2},
204
205 {"dmemset32", (void (*)())android_memset32_dumb, 4},
206 {"tmemset32", (void (*)())android_memset32_test, 4},
207 {"lmemset32", (void (*)())android_memset32, 4},
208
209 {"cmemset", (void (*)())memset_cmips, 1},
210 {"omemset", (void (*)())memset_omips, 1},
211 {"lmemset", (void (*)())memset, 1},
212 };
213#define NFUNCTIONS (sizeof(functions)/sizeof(functions[0]))
214 char tag[40];
215 int threshold;
216
217 measure_overhead();
218
219 /* Warm up the page cache */
220 memset(arena, 0xff, ARENASIZE); /* use 0xff now to avoid COW later */
221
222 for (fp = functions; fp < &functions[NFUNCTIONS]; fp++) {
223 (fp->fn)(arena, 0xffffffff, ARENASIZE); /* one call to get the code into Icache */
224 for (pp = params; pp < &params[NPARAMS]; pp++) {
225 sprintf(tag, "%10s: %7s %4d-%4d", fp->name, pp->type, pp->minbytes, pp->maxbytes);
226
227 /* Set the cycle threshold */
228 threshold = pp->maxbytes * 4 * 10; /* reasonable for cycles and ns */
229 testone(tag, fp->fn, pp->trials, pp->minbytes, pp->maxbytes, fp->size, threshold);
230 }
231 printf ("\n");
232 }
233
234 return 0;
235}