Commit 7cbbc53a authored by David Schleef's avatar David Schleef

Add cache information

parent eb89bd7b
......@@ -59,6 +59,9 @@
#define USE_I386_CPUID
#endif
int orc_x86_family_id;
int orc_x86_model_id;
int orc_x86_stepping;
#ifdef USE_I386_CPUINFO
static unsigned int
......@@ -178,29 +181,31 @@ get_cpuid (orc_uint32 op, orc_uint32 *a, orc_uint32 *b, orc_uint32 *c, orc_uint3
#ifdef __i386__
static void
get_cpuid (orc_uint32 op, orc_uint32 *a, orc_uint32 *b, orc_uint32 *c, orc_uint32 *d)
get_cpuid (orc_uint32 op, orc_uint32 init_ecx, orc_uint32 *a, orc_uint32 *b,
orc_uint32 *c, orc_uint32 *d)
{
*a = op;
*c = init_ecx;
__asm__ (
" pushl %%ebx\n"
" cpuid\n"
" mov %%ebx, %%esi\n"
" popl %%ebx\n"
: "=a" (*a), "=S" (*b), "=c" (*c), "=d" (*d)
: "0" (op));
: "+a" (*a), "=S" (*b), "+c" (*c), "=d" (*d));
}
#endif
#ifdef __amd64__
static void
get_cpuid (orc_uint32 op, orc_uint32 *a, orc_uint32 *b, orc_uint32 *c, orc_uint32 *d)
get_cpuid (orc_uint32 op, orc_uint32 init_ecx, orc_uint32 *a, orc_uint32 *b,
orc_uint32 *c, orc_uint32 *d)
{
*a = op;
*c = init_ecx;
__asm__ (
" pushq %%rbx\n"
" cpuid\n"
" mov %%ebx, %%esi\n"
" popq %%rbx\n"
: "=a" (*a), "=S" (*b), "=c" (*c), "=d" (*d)
: "0" (op));
: "+a" (*a), "=b" (*b), "+c" (*c), "=d" (*d));
//ORC_ERROR("cpuid %08x %08x -> %08x %08x %08x %08x", op, init_ecx, *a, *b, *c, *d);
}
#endif
......@@ -214,12 +219,96 @@ test_cpuid (void *ignored)
}
#endif
struct desc_struct {
int desc;
int level;
int size;
};
struct desc_struct cache_descriptors[] = {
{ 0x0a, 1, 8*1024 },
{ 0x0c, 1, 16*1024 },
{ 0x0d, 1, 16*1024 },
{ 0x0e, 1, 24*1024 },
{ 0x21, 2, 256*1024 },
{ 0x22, 3, 512*1024 },
{ 0x23, 3, 1024*1024 },
{ 0x25, 3, 2*1024*1024 },
{ 0x29, 3, 4*1024*1024 },
{ 0x2c, 1, 32*1024 },
{ 0x41, 2, 128*1024 },
{ 0x42, 2, 256*1024 },
{ 0x43, 2, 512*1024 },
{ 0x44, 2, 1*1024*1024 },
{ 0x45, 2, 2*1024*1024 },
{ 0x46, 3, 4*1024*1024 },
{ 0x47, 3, 8*1024*1024 },
{ 0x48, 2, 3*1024*1024 },
{ 0x49, 2, 4*1024*1024 }, /* special case */
{ 0x4a, 3, 6*1024*1024 },
{ 0x4b, 3, 8*1024*1024 },
{ 0x4c, 3, 12*1024*1024 },
{ 0x4d, 3, 16*1024*1024 },
{ 0x4e, 2, 6*1024*1024 },
{ 0x60, 1, 16*1024 },
{ 0x66, 1, 8*1024 },
{ 0x67, 1, 16*1024 },
{ 0x68, 1, 32*1024 },
{ 0x78, 2, 1*1024*1024 },
{ 0x79, 2, 128*1024 },
{ 0x7a, 2, 256*1024 },
{ 0x7b, 2, 512*1024 },
{ 0x7c, 2, 1*1024*1024 },
{ 0x7d, 2, 2*1024*1024 },
{ 0x7f, 2, 512*1024 },
{ 0x80, 2, 512*1024 },
{ 0x82, 2, 256*1024 },
{ 0x83, 2, 512*1024 },
{ 0x84, 2, 1*1024*1024 },
{ 0x85, 2, 2*1024*1024 },
{ 0x86, 2, 512*1024 },
{ 0x87, 2, 1*1024*1024 },
{ 0xe4, 3, 8*1024*1024 }
};
static void
handle_cache_descriptor (unsigned int desc)
{
int i;
if (desc == 0) return;
/* special case */
if (desc == 0x49 && orc_x86_family_id == 0xf && orc_x86_model_id == 0x6) {
ORC_DEBUG("level %d size %d", 3, 4*1024*1024);
_orc_data_cache_size_level3 = 4*1024*1024;
return;
}
for(i=0;i<sizeof(cache_descriptors)/sizeof(cache_descriptors[0]);i++){
if (desc == cache_descriptors[i].desc) {
ORC_DEBUG("level %d size %d", cache_descriptors[i].level,
cache_descriptors[i].size);
switch (cache_descriptors[i].level) {
case 1:
_orc_data_cache_size_level1 = cache_descriptors[i].size;
break;
case 2:
_orc_data_cache_size_level2 = cache_descriptors[i].size;
break;
case 3:
_orc_data_cache_size_level3 = cache_descriptors[i].size;
break;
}
}
}
}
static unsigned int
orc_sse_detect_cpuid (void)
{
orc_uint32 eax, ebx, ecx, edx;
orc_uint32 level;
char vendor[13] = { 0 };
orc_uint32 vendor;
unsigned int sse_flags = 0;
#if 0
int ret;
......@@ -233,42 +322,125 @@ orc_sse_detect_cpuid (void)
}
#endif
get_cpuid (0x00000000, &level, (orc_uint32 *)(vendor+0),
(orc_uint32 *)(vendor+8), (orc_uint32 *)(vendor+4));
get_cpuid (0x00000000, 0, &level, &vendor, &ecx, &edx);
ORC_DEBUG("cpuid %d %s", level, vendor);
ORC_DEBUG("cpuid %d %08x %08x %08x", level, vendor, ecx, edx);
if (level < 1) {
return 0;
}
if (level >= 1) {
get_cpuid (0x00000001, 0, &eax, &ebx, &ecx, &edx);
/* generic flags */
if (edx & (1<<26)) {
sse_flags |= ORC_TARGET_SSE_SSE2;
}
if (ecx & (1<<0)) {
sse_flags |= ORC_TARGET_SSE_SSE3;
}
if (ecx & (1<<9)) {
sse_flags |= ORC_TARGET_SSE_SSSE3;
}
if (ecx & (1<<19)) {
sse_flags |= ORC_TARGET_SSE_SSE4_1;
}
if (ecx & (1<<20)) {
sse_flags |= ORC_TARGET_SSE_SSE4_2;
}
get_cpuid (0x00000001, &eax, &ebx, &ecx, &edx);
if (vendor == (('G'<<0)|('e'<<8)|('n'<<16)|('u'<<24))) {
int family_id = (eax>>8)&0xf;
int model_id = (eax>>4)&0xf;
int ext_family_id = (eax>>20)&0xff;
int ext_model_id = (eax>>16)&0xf;
#if 0
if (edx & (1<<4)) {
_orc_profile_stamp = orc_profile_stamp_rdtsc;
}
#endif
if (family_id == 0xf) {
orc_x86_family_id = family_id + ext_family_id;
} else {
orc_x86_family_id = family_id;
}
/* Intel flags */
if (edx & (1<<26)) {
sse_flags |= ORC_TARGET_SSE_SSE2;
}
if (ecx & (1<<0)) {
sse_flags |= ORC_TARGET_SSE_SSE3;
}
if (ecx & (1<<9)) {
sse_flags |= ORC_TARGET_SSE_SSSE3;
if (model_id == 0xf) {
orc_x86_model_id = model_id + ext_model_id;
} else {
orc_x86_model_id = model_id;
}
orc_x86_stepping = eax&0xf;
ORC_DEBUG("family_id %d model_id %d stepping %d",
orc_x86_family_id, orc_x86_model_id, orc_x86_stepping);
}
}
if (ecx & (1<<19)) {
sse_flags |= ORC_TARGET_SSE_SSE4_1;
if (level >= 2 && vendor == (('G'<<0)|('e'<<8)|('n'<<16)|('u'<<24))) {
get_cpuid (0x00000002, 0, &eax, &ebx, &ecx, &edx);
if ((eax&0x80000000) == 0) {
handle_cache_descriptor ((eax>>8)&0xff);
handle_cache_descriptor ((eax>>16)&0xff);
handle_cache_descriptor ((eax>>24)&0xff);
}
if ((ebx&0x80000000) == 0) {
handle_cache_descriptor (ebx&0xff);
handle_cache_descriptor ((ebx>>8)&0xff);
handle_cache_descriptor ((ebx>>16)&0xff);
handle_cache_descriptor ((ebx>>24)&0xff);
}
if ((ecx&0x80000000) == 0) {
handle_cache_descriptor (ecx&0xff);
handle_cache_descriptor ((ecx>>8)&0xff);
handle_cache_descriptor ((ecx>>16)&0xff);
handle_cache_descriptor ((ecx>>24)&0xff);
}
if ((edx&0x80000000) == 0) {
handle_cache_descriptor (edx&0xff);
handle_cache_descriptor ((edx>>8)&0xff);
handle_cache_descriptor ((edx>>16)&0xff);
handle_cache_descriptor ((edx>>24)&0xff);
}
}
if (ecx & (1<<20)) {
sse_flags |= ORC_TARGET_SSE_SSE4_2;
if (level >= 4 && vendor == (('G'<<0)|('e'<<8)|('n'<<16)|('u'<<24))) {
int i;
for(i=0;i<10;i++){
int type;
int level;
int l;
int p;
int w;
int s;
get_cpuid (0x00000004, i, &eax, &ebx, &ecx, &edx);
type = eax&0xf;
if (type == 0) break;
level = (eax>>5)&0x7;
l = ((ebx>>0)&0xfff)+1;
p = ((ebx>>12)&0x3ff)+1;
w = ((ebx>>22)&0x3ff)+1;
s = ecx + 1;
ORC_DEBUG("type %d level %d line size %d partitions %d ways %d sets %d",
type, level, l, p, w, s);
if (type == 1 || type == 3) {
switch (level) {
case 1:
_orc_data_cache_size_level1 = l*p*w*s;
break;
case 2:
_orc_data_cache_size_level2 = l*p*w*s;
break;
case 3:
_orc_data_cache_size_level3 = l*p*w*s;
break;
}
}
}
}
if (memcmp (vendor, "AuthenticAMD", 12) == 0) {
get_cpuid (0x80000001, &eax, &ebx, &ecx, &edx);
if (level >= 1 && vendor == (('A'<<0)|('u'<<8)|('t'<<16)|('h'<<24))) {
get_cpuid (0x80000001, 0, &eax, &ebx, &ecx, &edx);
/* AMD flags */
if (ecx & (1<<6)) {
......@@ -278,18 +450,17 @@ orc_sse_detect_cpuid (void)
sse_flags |= ORC_TARGET_SSE_SSE5;
}
#if 0
get_cpuid (0x80000005, &eax, &ebx, &ecx, &edx);
ORC_INFO("L1 D-cache: %d kbytes, %d-way, %d lines/tag, %d line size",
get_cpuid (0x80000005, 0, &eax, &ebx, &ecx, &edx);
_orc_data_cache_size_level1 = ((ecx>>24)&0xff) * 1024;
ORC_DEBUG ("L1 D-cache: %d kbytes, %d-way, %d lines/tag, %d line size",
(ecx>>24)&0xff, (ecx>>16)&0xff, (ecx>>8)&0xff, ecx&0xff);
ORC_INFO("L1 I-cache: %d kbytes, %d-way, %d lines/tag, %d line size",
ORC_DEBUG ("L1 I-cache: %d kbytes, %d-way, %d lines/tag, %d line size",
(edx>>24)&0xff, (edx>>16)&0xff, (edx>>8)&0xff, edx&0xff);
get_cpuid (0x80000006, &eax, &ebx, &ecx, &edx);
ORC_INFO("L2 cache: %d kbytes, %d assoc, %d lines/tag, %d line size",
get_cpuid (0x80000006, 0, &eax, &ebx, &ecx, &edx);
_orc_data_cache_size_level2 = ((ecx>>16)&0xffff) * 1024;
ORC_DEBUG ("L2 cache: %d kbytes, %d assoc, %d lines/tag, %d line size",
(ecx>>16)&0xffff, (ecx>>12)&0xf, (ecx>>8)&0xf, ecx&0xff);
#endif
}
if (orc_compiler_flag_check ("-sse2")) {
......@@ -325,7 +496,7 @@ orc_mmx_detect_cpuid (void)
char vendor[13] = { 0 };
unsigned int mmx_flags = 0;
get_cpuid (0x00000000, &level, (orc_uint32 *)(vendor+0),
get_cpuid (0x00000000, 0, &level, (orc_uint32 *)(vendor+0),
(orc_uint32 *)(vendor+8), (orc_uint32 *)(vendor+4));
ORC_DEBUG("cpuid %d %s", level, vendor);
......@@ -334,7 +505,7 @@ orc_mmx_detect_cpuid (void)
return 0;
}
get_cpuid (0x00000001, &eax, &ebx, &ecx, &edx);
get_cpuid (0x00000001, 0, &eax, &ebx, &ecx, &edx);
/* Intel flags */
if (edx & (1<<23)) {
......@@ -345,7 +516,7 @@ orc_mmx_detect_cpuid (void)
}
if (memcmp (vendor, "AuthenticAMD", 12) == 0) {
get_cpuid (0x80000001, &eax, &ebx, &ecx, &edx);
get_cpuid (0x80000001, 0, &eax, &ebx, &ecx, &edx);
/* AMD flags */
if (edx & (1<<22)) {
......
......@@ -54,6 +54,11 @@ static int _orc_compiler_flag_debug;
void
orc_sse_init (void)
{
#if defined(HAVE_AMD64) || defined(HAVE_I386)
/* initializes cache information */
orc_sse_get_cpu_flags ();
#endif
#if defined(HAVE_I386)
if (!(orc_sse_get_cpu_flags () & ORC_TARGET_SSE_SSE2)) {
sse_target.executable = FALSE;
......
......@@ -700,3 +700,22 @@ orc_program_get_max_accumulator_size (OrcProgram *program)
return max;
}
int _orc_data_cache_size_level1;
int _orc_data_cache_size_level2;
int _orc_data_cache_size_level3;
void
orc_get_data_cache_sizes (int *level1, int *level2, int *level3)
{
if (level1) {
*level1 = _orc_data_cache_size_level1;
}
if (level2) {
*level2 = _orc_data_cache_size_level2;
}
if (level3) {
*level3 = _orc_data_cache_size_level3;
}
}
......@@ -595,10 +595,16 @@ int orc_program_get_max_var_size (OrcProgram *program);
int orc_program_get_max_array_size (OrcProgram *program);
int orc_program_get_max_accumulator_size (OrcProgram *program);
void orc_get_data_cache_sizes (int *level1, int *level2, int *level3);
#ifdef ORC_ENABLE_UNSTABLE_API
int orc_compiler_flag_check (const char *flag);
extern int _orc_data_cache_size_level1;
extern int _orc_data_cache_size_level2;
extern int _orc_data_cache_size_level3;
#endif
#endif
......
......@@ -42,6 +42,8 @@ main(int argc, char *argv[])
double cpufreq;
int unalign;
OrcProgram *p;
int level1, level2, level3;
int max;
//const uint8_t zero = 0;
orc_init ();
......@@ -82,7 +84,16 @@ main(int argc, char *argv[])
result = orc_program_compile (p);
}
for(i=0;i<160;i++){
orc_get_data_cache_sizes (&level1, &level2, &level3);
if (level3 > 0) {
max = (log(level3)/M_LN2 - 6.0) * 10 + 20;
} else if (level3 > 0) {
max = (log(level2)/M_LN2 - 6.0) * 10 + 20;
} else {
max = 200;
}
for(i=0;i<max;i++){
double x = i*0.1 + 6.0;
int size = pow(2.0, x);
......
......@@ -59,6 +59,14 @@ main (int argc, char *argv[])
printf("Orc " VERSION " - integrated testing tool\n");
{
int level1, level2, level3;
orc_get_data_cache_sizes(&level1, &level2, &level3);
printf("L1 cache: %d\n", level1);
printf("L2 cache: %d\n", level2);
printf("L3 cache: %d\n", level3);
}
if (filename) {
int n;
int ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment