Commit a25093de authored by Tim Rowley's avatar Tim Rowley

swr/rast: Implement JIT shader caching to disk

Disabled by default; currently doesn't cache shaders (fs,gs,vs).
Reviewed-by: Bruce Cherniak's avatarBruce Cherniak <bruce.cherniak@intel.com>
parent 1c33dc77
......@@ -18,6 +18,7 @@
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
import sys
# Python source
KNOBS = [
......@@ -156,11 +157,25 @@ KNOBS = [
['DEBUG_OUTPUT_DIR', {
'type' : 'std::string',
'default' : '/tmp/Rast/DebugOutput',
'default' : r'%TEMP%\Rast\DebugOutput' if sys.platform == 'win32' else '/tmp/Rast/DebugOutput',
'desc' : ['Output directory for debug data.'],
'category' : 'debug',
}],
['JIT_ENABLE_CACHE', {
'type' : 'bool',
'default' : 'false',
'desc' : ['Enables caching of compiled shaders'],
'category' : 'debug',
}],
['JIT_CACHE_DIR', {
'type' : 'std::string',
'default' : r'%TEMP%\SWR\JitCache' if sys.platform == 'win32' else '${HOME}/.swr/jitcache',
'desc' : ['Cache directory for compiled shaders.'],
'category' : 'debug',
}],
['TOSS_DRAW', {
'type' : 'bool',
'default' : 'false',
......
......@@ -34,14 +34,44 @@
#pragma once
#include <string>
struct KnobBase
{
private:
// Update the input string.
static void autoExpandEnvironmentVariables(std::string &text);
protected:
// Leave input alone and return new string.
static std::string expandEnvironmentVariables(std::string const &input)
{
std::string text = input;
autoExpandEnvironmentVariables(text);
return text;
}
template <typename T>
static T expandEnvironmentVariables(T const &input)
{
return input;
}
};
template <typename T>
struct Knob
struct Knob : KnobBase
{
public:
const T& Value() const { return m_Value; }
const T& Value(const T& newValue) { m_Value = newValue; return Value(); }
const T& Value(T const &newValue)
{
m_Value = expandEnvironmentVariables(newValue);
return Value();
}
protected:
Knob(const T& defaultValue) : m_Value(defaultValue) {}
Knob(T const &defaultValue) :
m_Value(expandEnvironmentVariables(defaultValue))
{
}
private:
T m_Value;
......@@ -102,6 +132,34 @@ extern GlobalKnobs g_GlobalKnobs;
% for inc in includes:
#include <${inc}>
% endfor
#include <regex>
#include <core/utils.h>
//========================================================
// Implementation
//========================================================
void KnobBase::autoExpandEnvironmentVariables(std::string &text)
{
{
static std::regex env("\\$\\{([^}]+)\\}");
std::smatch match;
while (std::regex_search(text, match, env))
{
const std::string var = GetEnv(match[1].str());
text.replace(match[0].first, match[0].second, var);
}
}
{
static std::regex env("\\%([^}]+)\\%");
std::smatch match;
while (std::regex_search(text, match, env))
{
const std::string var = GetEnv(match[1].str());
text.replace(match[0].first, match[0].second, var);
}
}
}
//========================================================
// Static Data Members
......
......@@ -1224,4 +1224,30 @@ struct TemplateArgUnroller
}
};
//////////////////////////////////////////////////////////////////////////
/// Helpers used to get / set environment variable
//////////////////////////////////////////////////////////////////////////
static INLINE std::string GetEnv(const std::string& variableName)
{
std::string output;
#if defined(_WIN32)
DWORD valueSize = GetEnvironmentVariableA(variableName.c_str(), nullptr, 0);
if (!valueSize) return output;
output.resize(valueSize - 1); // valueSize includes null, output.resize() does not
GetEnvironmentVariableA(variableName.c_str(), &output[0], valueSize);
#else
output = getenv(variableName.c_str());
#endif
return output;
}
static INLINE void SetEnv(const std::string& variableName, const std::string& value)
{
#if defined(_WIN32)
SetEnvironmentVariableA(variableName.c_str(), value.c_str());
#else
setenv(variableName.c_str(), value.c_str(), true);
#endif
}
......@@ -46,6 +46,15 @@
#include "llvm/IRReader/IRReader.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/MemoryBuffer.h"
#if HAVE_LLVM < 0x400
#include "llvm/Bitcode/ReaderWriter.h"
#else
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/Bitcode/BitcodeReader.h"
#endif
#if LLVM_USE_INTEL_JITEVENTS
#include "llvm/ExecutionEngine/JITEventListener.h"
......@@ -71,6 +80,11 @@
#define JITTER_OUTPUT_DIR SWR_OUTPUT_DIR "\\Jitter"
#endif // _WIN32
#if defined(__APPLE) || defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__)
#include <pwd.h>
#include <sys/stat.h>
#endif
using namespace llvm;
using namespace SwrJit;
......@@ -101,9 +115,7 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
mCore = std::string(core);
std::transform(mCore.begin(), mCore.end(), mCore.begin(), ::tolower);
std::stringstream fnName("JitModule", std::ios_base::in | std::ios_base::out | std::ios_base::ate);
fnName << mJitNumber++;
std::unique_ptr<Module> newModule(new Module(fnName.str(), mContext));
std::unique_ptr<Module> newModule(new Module("", mContext));
mpCurrentModule = newModule.get();
StringRef hostCPUName;
......@@ -123,6 +135,12 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
.setMCPU(hostCPUName)
.create();
if (KNOB_JIT_ENABLE_CACHE)
{
mCache.SetCpu(hostCPUName);
mpExec->setObjectCache(&mCache);
}
#if LLVM_USE_INTEL_JITEVENTS
JITEventListener *vTune = JITEventListener::createIntelJITEventListener();
mpExec->RegisterJITEventListener(vTune);
......@@ -172,9 +190,7 @@ void JitManager::SetupNewModule()
{
SWR_ASSERT(mIsModuleFinalized == true && "Current module is not finalized!");
std::stringstream fnName("JitModule", std::ios_base::in | std::ios_base::out | std::ios_base::ate);
fnName << mJitNumber++;
std::unique_ptr<Module> newModule(new Module(fnName.str(), mContext));
std::unique_ptr<Module> newModule(new Module("", mContext));
mpCurrentModule = newModule.get();
#if defined(_WIN32)
// Needed for MCJIT on windows
......@@ -293,3 +309,194 @@ extern "C"
}
}
}
//////////////////////////////////////////////////////////////////////////
/// JitCache
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
/// JitCacheFileHeader
//////////////////////////////////////////////////////////////////////////
struct JitCacheFileHeader
{
void Init(uint32_t llCRC, uint32_t objCRC, const std::string& moduleID, const std::string& cpu, uint64_t bufferSize)
{
m_MagicNumber = JC_MAGIC_NUMBER;
m_BufferSize = bufferSize;
m_llCRC = llCRC;
m_platformKey = JC_PLATFORM_KEY;
m_objCRC = objCRC;
strncpy(m_ModuleID, moduleID.c_str(), JC_STR_MAX_LEN - 1);
m_ModuleID[JC_STR_MAX_LEN - 1] = 0;
strncpy(m_Cpu, cpu.c_str(), JC_STR_MAX_LEN - 1);
m_Cpu[JC_STR_MAX_LEN - 1] = 0;
}
bool IsValid(uint32_t llCRC, const std::string& moduleID, const std::string& cpu)
{
if ((m_MagicNumber != JC_MAGIC_NUMBER) ||
(m_llCRC != llCRC) ||
(m_platformKey != JC_PLATFORM_KEY))
{
return false;
}
m_ModuleID[JC_STR_MAX_LEN - 1] = 0;
if (strncmp(moduleID.c_str(), m_ModuleID, JC_STR_MAX_LEN - 1))
{
return false;
}
m_Cpu[JC_STR_MAX_LEN - 1] = 0;
if (strncmp(cpu.c_str(), m_Cpu, JC_STR_MAX_LEN - 1))
{
return false;
}
return true;
}
uint64_t GetBufferSize() const { return m_BufferSize; }
uint64_t GetBufferCRC() const { return m_objCRC; }
private:
static const uint64_t JC_MAGIC_NUMBER = 0xfedcba9876543211ULL;
static const size_t JC_STR_MAX_LEN = 32;
static const uint32_t JC_PLATFORM_KEY =
(LLVM_VERSION_MAJOR << 24) |
(LLVM_VERSION_MINOR << 16) |
(LLVM_VERSION_PATCH << 8) |
((sizeof(void*) > sizeof(uint32_t)) ? 1 : 0);
uint64_t m_MagicNumber;
uint64_t m_BufferSize;
uint32_t m_llCRC;
uint32_t m_platformKey;
uint32_t m_objCRC;
char m_ModuleID[JC_STR_MAX_LEN];
char m_Cpu[JC_STR_MAX_LEN];
};
static inline uint32_t ComputeModuleCRC(const llvm::Module* M)
{
std::string bitcodeBuffer;
raw_string_ostream bitcodeStream(bitcodeBuffer);
llvm::WriteBitcodeToFile(M, bitcodeStream);
//M->print(bitcodeStream, nullptr, false);
bitcodeStream.flush();
return ComputeCRC(0, bitcodeBuffer.data(), bitcodeBuffer.size());
}
/// constructor
JitCache::JitCache()
{
#if defined(__APPLE) || defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__)
if (strncmp(KNOB_JIT_CACHE_DIR.c_str(), "~/", 2) == 0) {
char *homedir;
if (!(homedir = getenv("HOME"))) {
homedir = getpwuid(getuid())->pw_dir;
}
mCacheDir = homedir;
mCacheDir += (KNOB_JIT_CACHE_DIR.c_str() + 1);
} else
#endif
{
mCacheDir = KNOB_JIT_CACHE_DIR;
}
}
/// notifyObjectCompiled - Provides a pointer to compiled code for Module M.
void JitCache::notifyObjectCompiled(const llvm::Module *M, llvm::MemoryBufferRef Obj)
{
const std::string& moduleID = M->getModuleIdentifier();
if (!moduleID.length())
{
return;
}
if (!llvm::sys::fs::exists(mCacheDir.str()) &&
llvm::sys::fs::create_directories(mCacheDir.str()))
{
SWR_INVALID("Unable to create directory: %s", mCacheDir.c_str());
return;
}
llvm::SmallString<MAX_PATH> filePath = mCacheDir;
llvm::sys::path::append(filePath, moduleID);
std::error_code err;
llvm::raw_fd_ostream fileObj(filePath.c_str(), err, llvm::sys::fs::F_None);
uint32_t objcrc = ComputeCRC(0, Obj.getBufferStart(), Obj.getBufferSize());
JitCacheFileHeader header;
header.Init(mCurrentModuleCRC, objcrc, moduleID, mCpu, Obj.getBufferSize());
fileObj.write((const char*)&header, sizeof(header));
fileObj << Obj.getBuffer();
fileObj.flush();
}
/// Returns a pointer to a newly allocated MemoryBuffer that contains the
/// object which corresponds with Module M, or 0 if an object is not
/// available.
std::unique_ptr<llvm::MemoryBuffer> JitCache::getObject(const llvm::Module* M)
{
const std::string& moduleID = M->getModuleIdentifier();
mCurrentModuleCRC = ComputeModuleCRC(M);
if (!moduleID.length())
{
return nullptr;
}
if (!llvm::sys::fs::exists(mCacheDir))
{
return nullptr;
}
llvm::SmallString<MAX_PATH> filePath = mCacheDir;
llvm::sys::path::append(filePath, moduleID);
FILE* fpIn = fopen(filePath.c_str(), "rb");
if (!fpIn)
{
return nullptr;
}
std::unique_ptr<llvm::MemoryBuffer> pBuf = nullptr;
do
{
JitCacheFileHeader header;
if (!fread(&header, sizeof(header), 1, fpIn))
{
break;
}
if (!header.IsValid(mCurrentModuleCRC, moduleID, mCpu))
{
break;
}
pBuf = llvm::MemoryBuffer::getNewUninitMemBuffer(size_t(header.GetBufferSize()));
if (!fread(const_cast<char*>(pBuf->getBufferStart()), header.GetBufferSize(), 1, fpIn))
{
pBuf = nullptr;
break;
}
if (header.GetBufferCRC() != ComputeCRC(0, pBuf->getBufferStart(), pBuf->getBufferSize()))
{
SWR_TRACE("Invalid object cache file, ignoring: %s", filePath.c_str());
pBuf = nullptr;
break;
}
} while (0);
fclose(fpIn);
return pBuf;
}
......@@ -44,6 +44,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/ExecutionEngine/ObjectCache.h"
#include "llvm/Config/llvm-config.h"
#ifndef LLVM_VERSION_MAJOR
......@@ -78,6 +79,8 @@ using PassManager = llvm::legacy::PassManager;
#include "common/os.h"
#include "common/isa.hpp"
#include <mutex>
#pragma pop_macro("DEBUG")
//////////////////////////////////////////////////////////////////////////
......@@ -133,6 +136,31 @@ struct JitLLVMContext : llvm::LLVMContext
{
};
//////////////////////////////////////////////////////////////////////////
/// JitCache
//////////////////////////////////////////////////////////////////////////
class JitCache : public llvm::ObjectCache
{
public:
/// constructor
JitCache();
virtual ~JitCache() {}
void SetCpu(const llvm::StringRef& cpu) { mCpu = cpu.str(); }
/// notifyObjectCompiled - Provides a pointer to compiled code for Module M.
virtual void notifyObjectCompiled(const llvm::Module *M, llvm::MemoryBufferRef Obj);
/// Returns a pointer to a newly allocated MemoryBuffer that contains the
/// object which corresponds with Module M, or 0 if an object is not
/// available.
virtual std::unique_ptr<llvm::MemoryBuffer> getObject(const llvm::Module* M);
private:
std::string mCpu;
llvm::SmallString<MAX_PATH> mCacheDir;
uint32_t mCurrentModuleCRC;
};
//////////////////////////////////////////////////////////////////////////
/// JitManager
......@@ -145,6 +173,7 @@ struct JitManager
JitLLVMContext mContext; ///< LLVM compiler
llvm::IRBuilder<> mBuilder; ///< LLVM IR Builder
llvm::ExecutionEngine* mpExec;
JitCache mCache;
// Need to be rebuilt after a JIT and before building new IR
llvm::Module* mpCurrentModule;
......
......@@ -514,10 +514,8 @@ struct BlendJit : public Builder
Function* Create(const BLEND_COMPILE_STATE& state)
{
static std::size_t jitNum = 0;
std::stringstream fnName("BlendShader", std::ios_base::in | std::ios_base::out | std::ios_base::ate);
fnName << jitNum++;
std::stringstream fnName("BlendShader_", std::ios_base::in | std::ios_base::out | std::ios_base::ate);
fnName << ComputeCRC(0, &state, sizeof(state));
// blend function signature
//typedef void(*PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*, simdvector&, simdvector&, uint32_t, BYTE*, simdvector&, simdscalari*, simdscalari*);
......@@ -536,6 +534,7 @@ struct BlendJit : public Builder
FunctionType* fTy = FunctionType::get(IRB()->getVoidTy(), args, false);
Function* blendFunc = Function::Create(fTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule);
blendFunc->getParent()->setModuleIdentifier(blendFunc->getName());
BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", blendFunc);
......
......@@ -91,12 +91,14 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
{
static std::size_t fetchNum = 0;
std::stringstream fnName("FetchShader", std::ios_base::in | std::ios_base::out | std::ios_base::ate);
fnName << fetchNum++;
std::stringstream fnName("FetchShader_", std::ios_base::in | std::ios_base::out | std::ios_base::ate);
fnName << ComputeCRC(0, &fetchState, sizeof(fetchState));
Function* fetch = Function::Create(JM()->mFetchShaderTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule);
BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", fetch);
fetch->getParent()->setModuleIdentifier(fetch->getName());
IRB()->SetInsertPoint(entry);
auto argitr = fetch->arg_begin();
......
......@@ -57,6 +57,7 @@ struct ShaderInfo;
struct JIT_COMPILE_INPUT
{
SWR_SHADER_TYPE type;
uint32_t crc;
const void* pIR; ///< Pointer to LLVM IR text.
size_t irLength;
......
......@@ -265,8 +265,8 @@ struct StreamOutJit : public Builder
{
static std::size_t soNum = 0;
std::stringstream fnName("SOShader", std::ios_base::in | std::ios_base::out | std::ios_base::ate);
fnName << soNum++;
std::stringstream fnName("SO_", std::ios_base::in | std::ios_base::out | std::ios_base::ate);
fnName << ComputeCRC(0, &state, sizeof(state));
// SO function signature
// typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT*)
......@@ -278,6 +278,8 @@ struct StreamOutJit : public Builder
FunctionType* fTy = FunctionType::get(IRB()->getVoidTy(), args, false);
Function* soFunc = Function::Create(fTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule);
soFunc->getParent()->setModuleIdentifier(soFunc->getName());
// create return basic block
BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", soFunc);
BasicBlock* returnBB = BasicBlock::Create(JM()->mContext, "return", soFunc);
......
......@@ -495,6 +495,7 @@ swr_create_vertex_elements_state(struct pipe_context *pipe,
assert(num_elements <= PIPE_MAX_ATTRIBS);
velems = new swr_vertex_element_state;
if (velems) {
memset(&velems->fsState, 0, sizeof(velems->fsState));
velems->fsState.bVertexIDOffsetEnable = true;
velems->fsState.numAttribs = num_elements;
for (unsigned i = 0; i < num_elements; i++) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment