Windows2003-3790/sdktools/vmtests/color.c
2020-09-30 16:53:55 +02:00

317 lines
10 KiB
C

/* memtest.c, Robert Nix, December, 1993
* nix@vliw.enet.dec.com
* based on:
* cbash.c
* kirk johnson @ MIT
* february 1993
*
* RCS $Id: cbash.c,v 1.2 1993/08/12 15:30:17 tuna Exp $
*
* Usage: memtest <machname> <iterations> <max-mem>
* machname - a short indentifier for the machine being tested.
* iterations - target number of iterations to run for stable timing.
* max-mem - maximum working set size to test.
*
* Iterations and max-mem can be specified with a "k" or "m" suffix
* for kilo or mega iterations/mem.
*
* Example: Test of a Gateway 60 Mhz Pentium system
* Command Line: memtest gp560 8m 4m
* Output:
*
--------------------------------------------------------------------------------
* 4k 8k 16k 32k 64k 128k 256k 512k 1m 2m 4m
* L gp560 4 68 68 86 86 86 93 104 111 111 111 122
* L gp560 8 68 68 107 107 107 114 139 165 154 154 154
* L gp560 16 89 68 143 143 143 161 204 232 240 243 243
* L gp560 32 68 68 172 168 168 207 290 347 365 365 365
* L gp560 64 68 72 168 168 168 207 290 350 368 368 368
* L gp560 128 72 75 168 168 168 211 293 358 379 418 379
* L gp560 256 75 79 168 168 168 207 293 379 397 401 401
* L gp560 512 86 86 172 168 168 215 297 418 440 443 494
* L gp560 1k 100 104 175 172 168 218 304 501 522 529 529
* L gp560 2k 136 139 179 172 172 222 322 665 687 755 701
* L gp560 4k 132 243 232 225 222 286 401 991 1016 1094 1048
* L gp560 8k 132 136 243 232 225 290 350 923 973 1034 1109
* L gp560 16k 132 136 132 243 232 225 333 937 908 994 1041
* L gp560 32k 136 132 136 136 243 232 304 833 919 930 1012
*
--------------------------------------------------------------------------------
* Explanation of output.
*
* There are three kinds of tests.
*
* L - Load latency test.
* Measures the average repetition rate, in ns, of a latency-oriented load
* loop. The two main variables are:
*
* (1) working set, or the amount of memory touched by the loop. This
* varies across the columns in the output above, from a low of 4k
* bytes to a high of max-mem, or 16m bytes.
*
* (2) stride, or the the number of bytes separating successive loads.
* This is the number in the 3rd column of each of the "L" rows
* in the output above, and varies from 4 bytes to 32k bytes.
*
* Interpreting the results. This is easiest on a 3d chart in Excel.
* Two strides are always particularly interesting:
*
* - The cache line or block size stride (32 bytes above).
* Big changes in latencies across the columns show the sizes
* and basic performance of the load side of the cache hierarchy.
*
* If you don't know the cache line size: look across the first row
* for the first column that takes a big jump up in latency (the jump
* from 68ns to 86ns between the 8k column and 16k columnabove), then (b)
* scan down the rows of that column for the first relativelystable value
* (172ns in the 32 byte stride row above). The row containing
* that stable value is probably the cache line size.
*
* Look across the cache line size row. Access time jumps at 16K --
* so the L1 cache is 8K -- and then jumps again at 512K -- so the L2
* cache is 256K. The slope between 64K and 512K could be caused
* by a thrash in the L2 cache; page coloring could remove this thrash.
*
* - The page size stride (4k above).
* Big changes in latencies across the columns expose the tbsize and the cost
* of a tb refill.
*
* Scan the 4k line. It takes a big jump in latency at the 512K working
* set (and actually starts to thrash at the 256K working set). This test says the TB
* can map somewhere in the neighborhood of 64 4K pages. The TB fill time
* looks to be somewhere around 650-700 ns (subtract large working set entries
* in the 32-byte stride line from corresponding entries in the 4k stride line).
*
* The output always contains a little noise:
*
* - Boost the "iterations" command line parameter to remove timing jitter.
*
* - All entries contain some loop overhead. Its fair to normalize results by subtracting
* out the difference between the reported times and the known latency to the fastest level
* of the memory hierarchy.
*
* - The entries in the lower-left hand corner of the table (large
* strides in small memory) are dominated by loop overhead; ignore them.
*
* - Implement a good page coloring algorithm to remove jitter caused by cache
* thrashing. Look at the cache-line sized stride to see the frequency of thrashing.
*
*/
#include <stdio.h>
#include <malloc.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <time.h>
#define DEF_MAXMEM 16777216
#define MINMEM 4096
#define ITYPE signed int
signed long max_mem;
char *mach_name;
#if defined(_WIN64)
typedef unsigned __int64 ULONG_PTR;
#else
typedef unsigned long ULONG_PTR;
#endif
#define MAXSTRIDE 32768
#define MINSTRIDE 4
char *version_string = "1.0 (20 Dec 1993)";
extern ITYPE arg_to_int(char *);
extern double bash(char *, long, long, long);
extern int bash_loop(char *, long, long, long);
extern void allocate_memory(char *, long);
extern void usage(char *);
int __cdecl main(
int argc,
char *argv[]
)
{
ITYPE nbytes;
ITYPE stride;
ITYPE iters;
char *region;
if ((argc > 1) && (strcmp(argv[1], "-v") == 0)) {
fprintf(stderr, "This is memtest version %s.\n", version_string);
exit(1);
}
if (argc < 3)
usage(argv[0]);
mach_name = argv[1];
iters = arg_to_int(argv[2]);
if (argc < 4) {
max_mem = DEF_MAXMEM;
} else {
max_mem = arg_to_int(argv[3]);
}
region = (char *) malloc(max_mem+(128*1024));
region = (char *) ((((ULONG_PTR) region) + (128*1024-1)) & ~((128*1024)-1));
if (region == NULL) {
perror("malloc failed");
exit(1);
}
printf(" %8s", "");
printf("%8s", "");
for (nbytes = MINMEM; nbytes <= max_mem; nbytes += nbytes) {
if (nbytes >= (1024 * 1024))
printf("%4dm", nbytes / (1024 * 1024));
else if (nbytes >= 1024)
printf("%4dk", nbytes / 1024);
else
printf("%5d", nbytes);
}
printf("\n");
for (stride = MINSTRIDE; stride <= MAXSTRIDE; stride += stride) {
printf("L %-8s", mach_name);
if (stride >= (1024 * 1024))
printf("%7dm", stride / (1024 * 1024));
else if (stride >= 1024)
printf("%7dk", stride / 1024);
else
printf("%8d", stride);
for (nbytes = MINMEM; nbytes <= max_mem; nbytes += nbytes) {
double ns_ref = bash(region, nbytes, stride, iters);
printf("%5.0f", ns_ref);
fflush(stdout);
}
printf("\n");
}
exit(0);
return 0;
}
ITYPE
arg_to_int(char *arg)
{
ITYPE rslt = 0;
ITYPE mult = 1;
switch (arg[strlen(arg) - 1]) {
case 'k':
case 'K':
mult = 1024;
break;
case 'm':
case 'M':
mult = 1024 * 1024;
break;
default:
mult = 1;
break;
}
if (!((arg[0] >= '0') && arg[0] <= '9')) {
fprintf(stderr, "Argument %s not a number\n", arg);
usage("memtest");
exit(1);
}
if (sscanf(arg, "%ld", &rslt) != 1) {
fprintf(stderr, "Argument %s not a number\n", arg);
usage("memtest");
exit(1);
}
rslt *= mult;
return rslt;
}
double
bash(
char *region,
long nbytes, /* size of region to bash (bytes) */
long stride, /* stride through region (bytes) */
long iters /* target # of loop iterations */
)
{
signed long count;
signed long reps;
clock_t start, stop;
double utime, stime;
count = ((nbytes - sizeof(int)) / stride) + 1;
if (! (((count - 1) * stride + (long)sizeof(int)) <= nbytes)) {
fprintf(stderr, "trip count problem\n");
exit(1);
}
reps = (iters + count - 1) / count;
if (reps <= 0)
reps = 1;
iters = reps * count;
/* make sure the memory is allocated */
memset(region, 0, nbytes);
memset(region, 1, nbytes);
allocate_memory(region, nbytes);
memset(region, 0, nbytes);
/* warm up the cache */
(void) bash_loop(region, count, stride, 1L);
/* run the bash loop */
start = clock();
(void) bash_loop(region, count, stride, reps);
stop = clock();
utime = (double) (stop - start) / CLOCKS_PER_SEC;
stime = 0.0;
return 1e9 * ((utime + stime) / iters);
}
/* Your virtual memory pagesize must be at least this big */
#define MIN_PAGESIZE 256
void
allocate_memory(
char *region, /* memory region to be bashed */
long nbytes)
{ /* size of region (bytes) */
long i;
for (i = 0; i < nbytes; i += MIN_PAGESIZE)
*((int *) (region + i)) = 0;
}
int
bash_loop(
char *region, /* memory region to be bashed */
long count, /* number of locations to bash */
long stride, /* stride between locations (bytes) */
long reps /* number of passes through region */
)
{
long i;
int rslt;
char *tmp;
rslt = 0;
for (; reps > 0; reps--) {
tmp = region;
for (i = count; i > 0; i--) {
rslt ^= *((int *) tmp);
tmp += stride;
}
}
return rslt;
}
void
usage(char *progname)
{
fprintf(stderr, "usage: %s <machname> <iters> [<maxmem>]\n", progname);
fprintf(stderr, " <machname> machine name\n");
fprintf(stderr, " <iters> target # of accesses\n");
fprintf(stderr, " <maxmem> maximum amount of mem to touch (def 16 Mb)\n");
exit(1);
}