Commit 42fa8127 authored by Brian Paul's avatar Brian Paul

x86-64 transform optimizations (Mikko T.)

parent e3f684b7
......@@ -108,6 +108,7 @@ linux-x86 \
linux-x86-debug \
linux-x86-32 \
linux-x86-64 \
linux-x86-64-debug \
linux-x86-64-static \
linux-x86-glide \
linux-x86-static \
......
......@@ -8,14 +8,14 @@ CONFIG_NAME = linux-x86-64
CC = gcc
CXX = g++
CFLAGS = -m64 -Wall -O3 -ansi -pedantic -fPIC -D_POSIX_SOURCE -D_POSIX_C_SOURCE=199309L -D_SVID_SOURCE -D_BSD_SOURCE -DUSE_XSHM -DPTHREADS -I/usr/X11R6/include
CFLAGS = -m64 -Wall -O3 -std=c99 -pedantic -fPIC -D_REENTRANT -D_POSIX_SOURCE -D_POSIX_C_SOURCE=199309L -D_SVID_SOURCE -D_BSD_SOURCE -DUSE_XSHM -DPTHREADS -I/usr/X11R6/include -DUSE_X86_64_ASM
CXXFLAGS = -m64 -Wall -O3 -ansi -pedantic -fPIC -D_POSIX_SOURCE -D_POSIX_C_SOURCE=199309L -D_SVID_SOURCE -D_BSD_SOURCE
CXXFLAGS = -m64 -Wall -O3 -std=c99 -pedantic -fPIC -D_REENTRANT -D_POSIX_SOURCE -D_POSIX_C_SOURCE=199309L -D_SVID_SOURCE -D_BSD_SOURCE
GLUT_CFLAGS = -fexceptions
#ASM_SOURCES = $(X86_SOURCES)
ASM_SOURCES = $(X86-64_SOURCES)
LIB_DIR = $(TOP)/lib64
......
# Configuration for Linux for 64-bit X86 (Opteron)
include $(TOP)/configs/default
CONFIG_NAME = linux-x86-64-debug
# Compiler and flags
CC = gcc
CXX = g++
CFLAGS = -g -m64 -Wall -O3 -std=c99 -pedantic -fPIC -D_REENTRANT -D_POSIX_SOURCE -D_POSIX_C_SOURCE=199309L -D_SVID_SOURCE -D_BSD_SOURCE -DUSE_XSHM -DPTHREADS -I/usr/X11R6/include -DUSE_X86_64_ASM -DDEBUG -DMESA_DEBUG -DRUN_DEBUG_BENCHMARK
CXXFLAGS = -g -m64 -Wall -O3 -ansi -pedantic -fPIC -D_REENTRANT -D_POSIX_SOURCE -D_POSIX_C_SOURCE=199309L -D_SVID_SOURCE -D_BSD_SOURCE -DDEBUG -DMESA_DEBUG -DRUN_DEBUG_BENCHMARK
GLUT_CFLAGS = -fexceptions
ASM_SOURCES = $(X86-64_SOURCES)
LIB_DIR = $(TOP)/lib64
# Library/program dependencies
GL_LIB_DEPS = -L/usr/X11R6/lib64 -lX11 -lXext -lm -lpthread
GLUT_LIB_DEPS = -L$(LIB_DIR) -l$(GLU_LIB) -l$(GL_LIB) -L/usr/X11R6/lib64 -lX11 -lXmu -lXt -lXi -lm
GLW_LIB_DEPS = -L$(LIB_DIR) -l$(GL_LIB) -L/usr/X11R6/lib64 -lXt -lX11
APP_LIB_DEPS = -L$(LIB_DIR) -l$(GLUT_LIB) -l$(GLU_LIB) -l$(GL_LIB) -lm
......@@ -146,6 +146,7 @@ osmesa-only: depend subdirs $(LIB_DIR)/$(OSMESA_LIB_NAME)
subdirs:
@ (cd x86 ; $(MAKE))
@ (cd x86-64 ; $(MAKE))
# Make the GL library
$(LIB_DIR)/$(GL_LIB_NAME): $(STAND_ALONE_OBJECTS)
......@@ -223,5 +224,6 @@ clean:
-rm -f drivers/*/*.o
(cd drivers/dri ; $(MAKE) clean)
(cd x86 ; $(MAKE) clean)
(cd x86-64 ; $(MAKE) clean)
include depend
......@@ -185,6 +185,44 @@ extern char *mesa_profile;
#endif
#elif defined(__amd64__)
#define rdtscll(val) do { \
unsigned int a,d; \
__asm__ volatile("rdtsc" : "=a" (a), "=d" (d)); \
(val) = ((unsigned long)a) | (((unsigned long)d)<<32); \
} while(0)
/* Copied from i386 PIII version */
#define INIT_COUNTER() \
do { \
int cycle_i; \
counter_overhead = LONG_MAX; \
for ( cycle_i = 0 ; cycle_i < 16 ; cycle_i++ ) { \
unsigned long cycle_tmp1, cycle_tmp2; \
rdtscll(cycle_tmp1); \
rdtscll(cycle_tmp2); \
if ( counter_overhead > (cycle_tmp2 - cycle_tmp1) ) { \
counter_overhead = cycle_tmp2 - cycle_tmp1; \
} \
} \
} while (0)
#define BEGIN_RACE(x) \
x = LONG_MAX; \
for ( cycle_i = 0 ; cycle_i < 10 ; cycle_i++ ) { \
unsigned long cycle_tmp1, cycle_tmp2; \
rdtscll(cycle_tmp1); \
#define END_RACE(x) \
rdtscll(cycle_tmp2); \
if ( x > (cycle_tmp2 - cycle_tmp1) ) { \
x = cycle_tmp2 - cycle_tmp1; \
} \
} \
x -= counter_overhead;
#elif defined(__sparc__)
#define INIT_COUNTER() \
......
......@@ -166,7 +166,7 @@ ALIGN16(static GLfloat, d[TEST_COUNT][4]);
ALIGN16(static GLfloat, r[TEST_COUNT][4]);
static int test_transform_function( transform_func func, int psize,
int mtype, long *cycles )
int mtype, unsigned long *cycles )
{
GLvector4f source[1], dest[1], ref[1];
GLmatrix mat[1];
......@@ -187,7 +187,7 @@ static int test_transform_function( transform_func func, int psize,
mat->type = mtypes[mtype];
m = mat->m;
ASSERT( ((GLuint)m & 15) == 0 );
ASSERT( ((long)m & 15) == 0 );
init_matrix( m );
......@@ -279,7 +279,7 @@ static int test_transform_function( transform_func func, int psize,
void _math_test_all_transform_functions( char *description )
{
int psize, mtype;
long benchmark_tab[4][7];
unsigned long benchmark_tab[4][7];
static int first_time = 1;
if ( first_time ) {
......@@ -291,7 +291,7 @@ void _math_test_all_transform_functions( char *description )
if ( mesa_profile ) {
if ( !counter_overhead ) {
INIT_COUNTER();
_mesa_printf("counter overhead: %ld cycles\n\n", counter_overhead );
_mesa_printf("counter overhead: %lu cycles\n\n", counter_overhead );
}
_mesa_printf("transform results after hooking in %s functions:\n", description );
}
......@@ -310,7 +310,7 @@ void _math_test_all_transform_functions( char *description )
for ( mtype = 0 ; mtype < 7 ; mtype++ ) {
for ( psize = 1 ; psize <= 4 ; psize++ ) {
transform_func func = _mesa_transform_tab[psize][mtypes[mtype]];
long *cycles = &(benchmark_tab[psize-1][mtype]);
unsigned long *cycles = &(benchmark_tab[psize-1][mtype]);
if ( test_transform_function( func, psize, mtype, cycles ) == 0 ) {
char buf[100];
......
......@@ -51,6 +51,10 @@
#include "x86/common_x86_asm.h"
#endif
#ifdef USE_X86_64_ASM
#include "x86-64/x86-64.h"
#endif
#ifdef USE_SPARC_ASM
#include "sparc/sparc.h"
#endif
......@@ -212,6 +216,8 @@ _math_init_transformation( void )
_mesa_init_all_sparc_transform_asm();
#elif defined( USE_PPC_ASM )
_mesa_init_all_ppc_transform_asm();
#elif defined( USE_X86_64_ASM )
_mesa_init_all_x86_64_transform_asm();
#endif
}
......
......@@ -197,7 +197,8 @@ ASM_C_SOURCES = \
x86/3dnow.c \
x86/sse.c \
sparc/sparc.c \
ppc/common_ppc.c
ppc/common_ppc.c \
x86-64/x86-64.c
X86_SOURCES = \
x86/common_x86_asm.S \
......@@ -222,6 +223,9 @@ X86_SOURCES = \
X86_API = \
x86/glapi_x86.S
X86-64_SOURCES = \
x86-64/xform4.S
SPARC_SOURCES = \
sparc/clip.S \
sparc/norm.S \
......
# src/mesa/x86-64/Makefile
TOP = ../../..
include $(TOP)/configs/current
INCLUDE_DIRS = \
-I$(TOP)/include/GL \
-I$(TOP)/include \
-I.. \
-I../main \
-I../math \
-I../glapi \
-I../tnl
default: matypes.h
clean:
rm -f matypes.h
# need some special rules here, unfortunately
matypes.h: ../main/mtypes.h ../tnl/t_context.h ../x86/gen_matypes
../x86/gen_matypes | grep -v '#include "assyntax.h' > matypes.h
xform4.o: matypes.h
Register Usage
rax temporary register; with variable arguments passes information
about the number of SSE registers used; 1st return register
rbx* callee-saved register; optionally used as base pointer
rcx used to pass 4th integer argument to functions
rdx used to pass 3rd argument to functions 2nd return register
rsp* stack pointer
rbp* callee-saved register; optionally used as frame pointer
rsi used to pass 2nd argument to functions
rdi used to pass 1st argument to functions
r8 used to pass 5th argument to functions
r9 used to pass 6th argument to functions
r10 temporary register, used for passing a function's static chain pointer
r11 temporary register
r12-15* callee-saved registers
xmm0­1 used to pass and return floating point arguments
xmm2­7 used to pass floating point arguments
xmm8­15 temporary registers
mmx0­7 temporary registers
st0 temporary register; used to return long double arguments
st1 temporary registers; used to return long double arguments
st2­7 temporary registers
fs Reserved for system use (as thread specific data register)
*) must be preserved across function calls
Integer arguments from list: rdi,rsi,rdx,rcx,r8,r9,stack
Floating point arguments from list: xmm0-xmm7
\ No newline at end of file
/*
* This file is automatically generated from the Mesa internal type
* definitions. Do not edit directly.
*/
#ifndef __ASM_TYPES_H__
#define __ASM_TYPES_H__
/* =============================================================
* Offsets for GLcontext
*/
#define CTX_DRIVER_CTX 904
#define CTX_LIGHT_ENABLED 38592
#define CTX_LIGHT_SHADE_MODEL 38596
#define CTX_LIGHT_COLOR_MAT_FACE 38600
#define CTX_LIGHT_COLOR_MAT_MODE 38604
#define CTX_LIGHT_COLOR_MAT_MASK 38608
#define CTX_LIGHT_COLOR_MAT_ENABLED 38612
#define CTX_LIGHT_ENABLED_LIST 38616
#define CTX_LIGHT_NEED_VERTS 42973
#define CTX_LIGHT_FLAGS 42976
#define CTX_LIGHT_BASE_COLOR 42980
/* =============================================================
* Offsets for struct vertex_buffer
*/
#define VB_SIZE 0
#define VB_COUNT 4
#define VB_ELTS 8
#define VB_OBJ_PTR 12
#define VB_EYE_PTR 16
#define VB_CLIP_PTR 20
#define VB_PROJ_CLIP_PTR 24
#define VB_CLIP_OR_MASK 28
#define VB_CLIP_MASK 32
#define VB_NORMAL_PTR 36
#define VB_EDGE_FLAG 44
#define VB_TEX0_COORD_PTR 48
#define VB_TEX1_COORD_PTR 52
#define VB_TEX2_COORD_PTR 56
#define VB_TEX3_COORD_PTR 60
#define VB_INDEX_PTR 80
#define VB_COLOR_PTR 88
#define VB_SECONDARY_COLOR_PTR 96
#define VB_FOG_COORD_PTR 108
#define VB_POINT_SIZE_PTR 104
#define VB_PRIMITIVE 112
#define VB_LAST_CLIPPED 244
/*
* Flags for struct vertex_buffer
*/
#define VERT_BIT_OBJ 0x1
#define VERT_BIT_NORM 0x4
#define VERT_BIT_RGBA 0x8
#define VERT_BIT_SPEC_RGB 0x10
#define VERT_BIT_FOG_COORD 0x20
#define VERT_BIT_TEX0 0x100
#define VERT_BIT_TEX1 0x200
#define VERT_BIT_TEX2 0x400
#define VERT_BIT_TEX3 0x800
/* =============================================================
* Offsets for GLvector4f
*/
#define V4F_DATA 0
#define V4F_START 4
#define V4F_COUNT 8
#define V4F_STRIDE 12
#define V4F_SIZE 16
#define V4F_FLAGS 20
/*
* Flags for GLvector4f
*/
#define VEC_MALLOC 0x10
#define VEC_NOT_WRITEABLE 0x40
#define VEC_BAD_STRIDE 0x100
#define VEC_SIZE_1 0x1
#define VEC_SIZE_2 0x3
#define VEC_SIZE_3 0x7
#define VEC_SIZE_4 0xf
/* =============================================================
* Offsets for GLmatrix
*/
#define MATRIX_DATA 0
#define MATRIX_INV 4
#define MATRIX_FLAGS 8
#define MATRIX_TYPE 12
/* =============================================================
* Offsets for struct gl_light
*/
#define LIGHT_NEXT 0
#define LIGHT_PREV 4
#define LIGHT_AMBIENT 8
#define LIGHT_DIFFUSE 24
#define LIGHT_SPECULAR 40
#define LIGHT_EYE_POSITION 56
#define LIGHT_EYE_DIRECTION 72
#define LIGHT_SPOT_EXPONENT 88
#define LIGHT_SPOT_CUTOFF 92
#define LIGHT_COS_CUTOFF 96
#define LIGHT_CONST_ATTEN 100
#define LIGHT_LINEAR_ATTEN 104
#define LIGHT_QUADRATIC_ATTEN 108
#define LIGHT_ENABLED 112
#define LIGHT_FLAGS 116
#define LIGHT_POSITION 120
#define LIGHT_VP_INF_NORM 136
#define LIGHT_H_INF_NORM 148
#define LIGHT_NORM_DIRECTION 160
#define LIGHT_VP_INF_SPOT_ATTEN 176
#define LIGHT_SPOT_EXP_TABLE 180
#define LIGHT_MAT_AMBIENT 4276
#define LIGHT_MAT_DIFFUSE 4300
#define LIGHT_MAT_SPECULAR 4324
#define SIZEOF_GL_LIGHT 4356
/*
* Flags for struct gl_light
*/
#define LIGHT_SPOT 0x1
#define LIGHT_LOCAL_VIEWER 0x2
#define LIGHT_POSITIONAL 0x4
#define LIGHT_NEED_VERTICES 0x6
/* =============================================================
* Offsets for struct gl_lightmodel
*/
#define LIGHT_MODEL_AMBIENT 0
#define LIGHT_MODEL_LOCAL_VIEWER 16
#define LIGHT_MODEL_TWO_SIDE 17
#define LIGHT_MODEL_COLOR_CONTROL 20
#endif /* __ASM_TYPES_H__ */
/* $Id: x86-64.c,v 1.1 2005/05/07 16:59:59 brianp Exp $ */
/*
* Mesa 3-D graphics library
* Version: 6.3
*
* Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* x86-64 optimizations shamelessy converted from x86/sse/3dnow assembly by
* Mikko Tiihonen
*/
#ifdef USE_X86_64_ASM
#include "glheader.h"
#include "context.h"
#include "math/m_xform.h"
#include "tnl/t_context.h"
#include "x86-64.h"
#include "../x86/common_x86_macros.h"
#ifdef DEBUG
#include "math/m_debug.h"
#endif
DECLARE_XFORM_GROUP( x86_64, 4 )
#endif
/*
extern void _mesa_x86_64_transform_points4_general( XFORM_ARGS );
extern void _mesa_x86_64_transform_points4_identity( XFORM_ARGS );
extern void _mesa_x86_64_transform_points4_perspective( XFORM_ARGS );
extern void _mesa_x86_64_transform_points4_3d( XFORM_ARGS );
extern void _mesa_x86_64_transform_points4_3d_no_rot( XFORM_ARGS );
extern void _mesa_x86_64_transform_points4_2d_no_rot( XFORM_ARGS );
extern void _mesa_x86_64_transform_points4_2d( XFORM_ARGS );
*/
#ifdef USE_X86_64_ASM
static void message( const char *msg )
{
GLboolean debug;
#ifdef DEBUG
debug = GL_TRUE;
#else
if ( _mesa_getenv( "MESA_DEBUG" ) ) {
debug = GL_TRUE;
} else {
debug = GL_FALSE;
}
#endif
if ( debug ) {
fprintf( stderr, "%s", msg );
}
}
#endif
void _mesa_init_all_x86_64_transform_asm(void)
{
#ifdef USE_X86_64_ASM
if ( _mesa_getenv( "MESA_NO_ASM" ) ) {
return;
}
message("Initializing x86-64 optimizations\n");
ASSIGN_XFORM_GROUP( x86_64, 4 );
/*
_mesa_transform_tab[4][MATRIX_GENERAL] =
_mesa_x86_64_transform_points4_general;
_mesa_transform_tab[4][MATRIX_IDENTITY] =
_mesa_x86_64_transform_points4_identity;
_mesa_transform_tab[4][MATRIX_3D] =
_mesa_x86_64_transform_points4_3d;
_mesa_transform_tab[4][MATRIX_3D_NO_ROT] =
_mesa_x86_64_transform_points4_3d_no_rot;
_mesa_transform_tab[4][MATRIX_PERSPECTIVE] =
_mesa_x86_64_transform_points4_perspective;
_mesa_transform_tab[4][MATRIX_2D_NO_ROT] =
_mesa_x86_64_transform_points4_2d_no_rot;
_mesa_transform_tab[4][MATRIX_2D] =
_mesa_x86_64_transform_points4_2d;
*/
#ifdef DEBUG
_math_test_all_transform_functions("x86_64");
_math_test_all_cliptest_functions("x86_64");
_math_test_all_normal_transform_functions("x86_64");
#endif
#endif
}
/* $Id: x86-64.h,v 1.1 2005/05/07 16:59:59 brianp Exp $ */
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef __X86_64_ASM_H__
#define __X86_64_ASM_H__
extern void _mesa_init_all_x86_64_transform_asm( void );
#endif
This diff is collapsed.
......@@ -1730,11 +1730,17 @@ SECTION _DATA public align=16 class=DATA use32 flat
#define TLBL(a) CONCAT(a,$)
#endif
/* hidden symbol visibility support */
/* Hidden symbol visibility support.
* If we build with gcc's -fvisibility=hidden flag, we'll need to change
* the symbol visibility mode to 'default'.
*/
#if defined(GNU_ASSEMBLER) && !defined(__DJGPP__) && !defined(__MINGW32__)
#define HIDDEN(a) .hidden a
# define HIDDEN(x) .hidden x
#elif defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303
# pragma GCC visibility push(default)
# define HIDDEN(x) .hidden x
#else
#define HIDDEN(a)
# define HIDDEN(x)
#endif
#endif /* __ASSYNTAX_H__ */
......@@ -61,7 +61,7 @@ do { \
printf( "\n" ); \
} while (0)
#if defined(__BEOS__)
#if defined(__BEOS__) || defined(_LP64)
#define OFFSET( s, t, m ) \
printf( "#define %s\t%ld\n", s, offsetof( t, m ) );
#else
......@@ -69,7 +69,7 @@ do { \
printf( "#define %s\t%d\n", s, offsetof( t, m ) );
#endif
#if defined(__BEOS__)
#if defined(__BEOS__) || defined(_LP64)
#define SIZEOF( s, t ) \
printf( "#define %s\t%ld\n", s, sizeof(t) );
#else
......
......@@ -29,16 +29,6 @@
#include "assyntax.h"
#include "glapioffsets.h"
/* If we build with gcc's -fvisibility=hidden flag, we'll need to change
* the symbol visibility mode to 'default'.
*/
#if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303
# pragma GCC visibility push(default)
# define HIDDEN(x) .hidden x
#else
# define HIDDEN(x)
#endif
#ifndef __WIN32__
#if defined(STDCALL_API)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment