Commit c4578d22 authored by Francisco Jerez's avatar Francisco Jerez
Browse files

clover: Migrate a bunch of pointers and references in the object tree to smart references.


Tested-by: default avatarTom Stellard <thomas.stellard@amd.com>
parent d82b39ce
......@@ -63,7 +63,7 @@ clWaitForEvents(cl_uint num_evs, const cl_event *d_evs) try {
auto evs = objs(d_evs, num_evs);
for (auto &ev : evs) {
if (ev.ctx != evs.front().ctx)
if (ev.context() != evs.front().context())
throw error(CL_INVALID_CONTEXT);
if (ev.status() < 0)
......@@ -73,7 +73,7 @@ clWaitForEvents(cl_uint num_evs, const cl_event *d_evs) try {
// Create a temporary soft event that depends on all the events in
// the wait list
intrusive_ptr<soft_event> sev =
transfer(new soft_event(evs.front().ctx, evs, true));
transfer(new soft_event(evs.front().context(), evs, true));
// ...and wait on it.
sev->wait();
......@@ -96,7 +96,7 @@ clGetEventInfo(cl_event d_ev, cl_event_info param,
break;
case CL_EVENT_CONTEXT:
buf.as_scalar<cl_context>() = desc(ev.ctx);
buf.as_scalar<cl_context>() = desc(ev.context());
break;
case CL_EVENT_COMMAND_TYPE:
......@@ -133,7 +133,7 @@ clSetEventCallback(cl_event d_ev, cl_int type,
// Create a temporary soft event that depends on ev, with
// pfn_notify as completion action.
intrusive_ptr<soft_event> sev = transfer(
new soft_event(ev.ctx, { ev }, true,
new soft_event(ev.context(), { ev }, true,
[=, &ev](event &) {
ev.wait();
pfn_notify(desc(ev), ev.status(), user_data);
......@@ -199,8 +199,8 @@ clEnqueueWaitForEvents(cl_command_queue d_q, cl_uint num_evs,
auto evs = objs(d_evs, num_evs);
for (auto &ev : evs) {
if (ev.ctx != q.ctx)
throw error(CL_INVALID_CONTEXT);
if (ev.context() != q.context())
throw error(CL_INVALID_CONTEXT);
}
// Create a hard event that depends on the events in the wait list:
......
......@@ -125,11 +125,11 @@ clGetKernelInfo(cl_kernel d_kern, cl_kernel_info param,
break;
case CL_KERNEL_CONTEXT:
buf.as_scalar<cl_context>() = desc(kern.prog.ctx);
buf.as_scalar<cl_context>() = desc(kern.program().context());
break;
case CL_KERNEL_PROGRAM:
buf.as_scalar<cl_program>() = desc(kern.prog);
buf.as_scalar<cl_program>() = desc(kern.program());
break;
default:
......@@ -148,9 +148,9 @@ clGetKernelWorkGroupInfo(cl_kernel d_kern, cl_device_id d_dev,
size_t size, void *r_buf, size_t *r_size) try {
property_buffer buf { r_buf, size, r_size };
auto &kern = obj(d_kern);
auto &dev = (d_dev ? *pobj(d_dev) : unique(kern.prog.devices()));
auto &dev = (d_dev ? *pobj(d_dev) : unique(kern.program().devices()));
if (!count(dev, kern.prog.devices()))
if (!count(dev, kern.program().devices()))
throw error(CL_INVALID_DEVICE);
switch (param) {
......@@ -194,9 +194,9 @@ namespace {
void
validate_common(const command_queue &q, kernel &kern,
const ref_vector<event> &deps) {
if (kern.prog.ctx != q.ctx ||
if (kern.program().context() != q.context() ||
any_of([&](const event &ev) {
return ev.ctx != q.ctx;
return ev.context() != q.context();
}, deps))
throw error(CL_INVALID_CONTEXT);
......@@ -205,7 +205,7 @@ namespace {
}, kern.args()))
throw error(CL_INVALID_KERNEL_ARGS);
if (!count(q.dev, kern.prog.devices()))
if (!count(q.device(), kern.program().devices()))
throw error(CL_INVALID_PROGRAM_EXECUTABLE);
}
......@@ -214,7 +214,7 @@ namespace {
const size_t *d_grid_size) {
auto grid_size = range(d_grid_size, dims);
if (dims < 1 || dims > q.dev.max_block_size().size())
if (dims < 1 || dims > q.device().max_block_size().size())
throw error(CL_INVALID_WORK_DIMENSION);
if (!d_grid_size || any_of(is_zero(), grid_size))
......@@ -242,14 +242,14 @@ namespace {
auto block_size = range(d_block_size, dims);
if (any_of(is_zero(), block_size) ||
any_of(greater(), block_size, q.dev.max_block_size()))
any_of(greater(), block_size, q.device().max_block_size()))
throw error(CL_INVALID_WORK_ITEM_SIZE);
if (any_of(modulus(), grid_size, block_size))
throw error(CL_INVALID_WORK_GROUP_SIZE);
if (fold(multiplies(), 1u, block_size) >
q.dev.max_threads_per_block())
q.device().max_threads_per_block())
throw error(CL_INVALID_WORK_GROUP_SIZE);
return block_size;
......
......@@ -230,12 +230,12 @@ clGetMemObjectInfo(cl_mem d_mem, cl_mem_info param,
break;
case CL_MEM_CONTEXT:
buf.as_scalar<cl_context>() = desc(mem.ctx);
buf.as_scalar<cl_context>() = desc(mem.context());
break;
case CL_MEM_ASSOCIATED_MEMOBJECT: {
sub_buffer *sub = dynamic_cast<sub_buffer *>(&mem);
buf.as_scalar<cl_mem>() = (sub ? desc(sub->parent) : NULL);
buf.as_scalar<cl_mem>() = (sub ? desc(sub->parent()) : NULL);
break;
}
case CL_MEM_OFFSET: {
......
......@@ -133,7 +133,7 @@ clBuildProgram(cl_program d_prog, cl_uint num_devs,
void *user_data) try {
auto &prog = obj(d_prog);
auto devs = (d_devs ? objs(d_devs, num_devs) :
ref_vector<device>(prog.ctx.devs()));
ref_vector<device>(prog.context().devs()));
auto opts = (p_opts ? p_opts : "");
if (bool(num_devs) != bool(d_devs) ||
......@@ -141,7 +141,7 @@ clBuildProgram(cl_program d_prog, cl_uint num_devs,
throw error(CL_INVALID_VALUE);
if (any_of([&](const device &dev) {
return !count(dev, prog.ctx.devs());
return !count(dev, prog.context().devs());
}, devs))
throw error(CL_INVALID_DEVICE);
......@@ -169,19 +169,19 @@ clGetProgramInfo(cl_program d_prog, cl_program_info param,
break;
case CL_PROGRAM_CONTEXT:
buf.as_scalar<cl_context>() = desc(prog.ctx);
buf.as_scalar<cl_context>() = desc(prog.context());
break;
case CL_PROGRAM_NUM_DEVICES:
buf.as_scalar<cl_uint>() = prog.devices().size() ?
prog.devices().size() :
prog.ctx.devs().size();
buf.as_scalar<cl_uint>() = (prog.devices().size() ?
prog.devices().size() :
prog.context().devs().size());
break;
case CL_PROGRAM_DEVICES:
buf.as_vector<cl_device_id>() = prog.devices().size() ?
descs(prog.devices()) :
descs(prog.ctx.devs());
buf.as_vector<cl_device_id>() = (prog.devices().size() ?
descs(prog.devices()) :
descs(prog.context().devs()));
break;
case CL_PROGRAM_SOURCE:
......@@ -226,7 +226,7 @@ clGetProgramBuildInfo(cl_program d_prog, cl_device_id d_dev,
auto &prog = obj(d_prog);
auto &dev = obj(d_dev);
if (!count(dev, prog.ctx.devs()))
if (!count(dev, prog.context().devs()))
return CL_INVALID_DEVICE;
switch (param) {
......
......@@ -75,11 +75,11 @@ clGetCommandQueueInfo(cl_command_queue d_q, cl_command_queue_info param,
switch (param) {
case CL_QUEUE_CONTEXT:
buf.as_scalar<cl_context>() = desc(q.ctx);
buf.as_scalar<cl_context>() = desc(q.context());
break;
case CL_QUEUE_DEVICE:
buf.as_scalar<cl_device_id>() = desc(q.dev);
buf.as_scalar<cl_device_id>() = desc(q.device());
break;
case CL_QUEUE_REFERENCE_COUNT:
......
......@@ -71,7 +71,7 @@ clGetSamplerInfo(cl_sampler d_s, cl_sampler_info param,
break;
case CL_SAMPLER_CONTEXT:
buf.as_scalar<cl_context>() = desc(s.ctx);
buf.as_scalar<cl_context>() = desc(s.context());
break;
case CL_SAMPLER_NORMALIZED_COORDS:
......
......@@ -24,7 +24,7 @@
#include "api/util.hpp"
#include "core/event.hpp"
#include "core/resource.hpp"
#include "core/memory.hpp"
using namespace clover;
......@@ -56,7 +56,7 @@ namespace {
validate_common(command_queue &q,
const ref_vector<event> &deps) {
if (any_of([&](const event &ev) {
return &ev.ctx != &q.ctx;
return ev.context() != q.context();
}, deps))
throw error(CL_INVALID_CONTEXT);
}
......@@ -67,7 +67,7 @@ namespace {
void
validate_object(command_queue &q, buffer &mem, const vector_t &origin,
const vector_t &pitch, const vector_t &region) {
if (mem.ctx != q.ctx)
if (mem.context() != q.context())
throw error(CL_INVALID_CONTEXT);
// The region must fit within the specified pitch,
......@@ -90,7 +90,7 @@ namespace {
const vector_t &orig, const vector_t &region) {
vector_t size = { img.width(), img.height(), img.depth() };
if (img.ctx != q.ctx)
if (img.context() != q.context())
throw error(CL_INVALID_CONTEXT);
if (any_of(greater(), orig + region, size))
......
......@@ -26,7 +26,7 @@ using namespace clover;
context::context(const property_list &props,
const ref_vector<device> &devs) :
_props(props), _devs(map(addresses(), devs)) {
_props(props), _devs(devs) {
}
bool
......@@ -46,5 +46,5 @@ context::props() const {
context::device_range
context::devs() const {
return map(derefs(), _devs);
return map(evals(), _devs);
}
......@@ -30,7 +30,9 @@
namespace clover {
class context : public ref_counter, public _cl_context {
private:
typedef adaptor_range<derefs, const std::vector<device *> &> device_range;
typedef adaptor_range<
evals, const std::vector<intrusive_ref<device>> &
> device_range;
typedef clover::property_list<cl_context_properties> property_list;
public:
......@@ -53,7 +55,7 @@ namespace clover {
private:
property_list _props;
const std::vector<clover::device *> _devs;
const std::vector<intrusive_ref<device>> _devs;
};
}
......
......@@ -21,6 +21,7 @@
//
#include "core/device.hpp"
#include "core/platform.hpp"
#include "pipe/p_screen.h"
#include "pipe/p_state.h"
......
......@@ -25,12 +25,12 @@
using namespace clover;
event::event(context &ctx, const ref_vector<event> &deps,
event::event(clover::context &ctx, const ref_vector<event> &deps,
action action_ok, action action_fail) :
ctx(ctx), _status(0), wait_count(1),
context(ctx), _status(0), wait_count(1),
action_ok(action_ok), action_fail(action_fail) {
for (auto &ev : deps)
ev.chain(this);
ev.chain(*this);
}
event::~event() {
......@@ -42,7 +42,7 @@ event::trigger() {
action_ok(*this);
while (!_chain.empty()) {
_chain.back()->trigger();
_chain.back()().trigger();
_chain.pop_back();
}
}
......@@ -54,7 +54,7 @@ event::abort(cl_int status) {
action_fail(*this);
while (!_chain.empty()) {
_chain.back()->abort(status);
_chain.back()().abort(status);
_chain.pop_back();
}
}
......@@ -65,33 +65,33 @@ event::signalled() const {
}
void
event::chain(event *ev) {
event::chain(event &ev) {
if (wait_count) {
ev->wait_count++;
ev.wait_count++;
_chain.push_back(ev);
}
ev->deps.push_back(this);
ev.deps.push_back(*this);
}
hard_event::hard_event(command_queue &q, cl_command_type command,
const ref_vector<event> &deps, action action) :
event(q.ctx, deps, profile(q, action), [](event &ev){}),
event(q.context(), deps, profile(q, action), [](event &ev){}),
_queue(q), _command(command), _fence(NULL) {
if (q.profiling_enabled())
_time_queued = timestamp::current(q);
q.sequence(this);
q.sequence(*this);
trigger();
}
hard_event::~hard_event() {
pipe_screen *screen = queue()->dev.pipe;
pipe_screen *screen = queue()->device().pipe;
screen->fence_reference(screen, &_fence, NULL);
}
cl_int
hard_event::status() const {
pipe_screen *screen = queue()->dev.pipe;
pipe_screen *screen = queue()->device().pipe;
if (_status < 0)
return _status;
......@@ -108,7 +108,7 @@ hard_event::status() const {
command_queue *
hard_event::queue() const {
return &_queue;
return &_queue();
}
cl_command_type
......@@ -118,7 +118,7 @@ hard_event::command() const {
void
hard_event::wait() const {
pipe_screen *screen = queue()->dev.pipe;
pipe_screen *screen = queue()->device().pipe;
if (status() == CL_QUEUED)
queue()->flush();
......@@ -150,7 +150,7 @@ hard_event::time_end() const {
void
hard_event::fence(pipe_fence_handle *fence) {
pipe_screen *screen = queue()->dev.pipe;
pipe_screen *screen = queue()->device().pipe;
screen->fence_reference(screen, &_fence, fence);
}
......@@ -173,7 +173,7 @@ hard_event::profile(command_queue &q, const action &action) const {
}
}
soft_event::soft_event(context &ctx, const ref_vector<event> &deps,
soft_event::soft_event(clover::context &ctx, const ref_vector<event> &deps,
bool _trigger, action action) :
event(ctx, deps, action, action) {
if (_trigger)
......@@ -186,8 +186,8 @@ soft_event::status() const {
return _status;
else if (!signalled() ||
any_of([](const intrusive_ptr<event> &ev) {
return ev->status() != CL_COMPLETE;
any_of([](const event &ev) {
return ev.status() != CL_COMPLETE;
}, deps))
return CL_SUBMITTED;
......@@ -207,8 +207,8 @@ soft_event::command() const {
void
soft_event::wait() const {
for (auto ev : deps)
ev->wait();
for (event &ev : deps)
ev.wait();
if (status() != CL_COMPLETE)
throw error(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST);
......
......@@ -53,7 +53,7 @@ namespace clover {
public:
typedef std::function<void (event &)> action;
event(context &ctx, const ref_vector<event> &deps,
event(clover::context &ctx, const ref_vector<event> &deps,
action action_ok, action action_fail);
virtual ~event();
......@@ -70,19 +70,19 @@ namespace clover {
virtual cl_command_type command() const = 0;
virtual void wait() const = 0;
context &ctx;
const intrusive_ref<clover::context> context;
protected:
void chain(event *ev);
void chain(event &ev);
cl_int _status;
std::vector<intrusive_ptr<event>> deps;
std::vector<intrusive_ref<event>> deps;
private:
unsigned wait_count;
action action_ok;
action action_fail;
std::vector<intrusive_ptr<event>> _chain;
std::vector<intrusive_ref<event>> _chain;
};
///
......@@ -120,7 +120,7 @@ namespace clover {
virtual void fence(pipe_fence_handle *fence);
action profile(command_queue &q, const action &action) const;
command_queue &_queue;
const intrusive_ref<command_queue> _queue;
cl_command_type _command;
pipe_fence_handle *_fence;
lazy<cl_ulong> _time_queued, _time_submit, _time_start, _time_end;
......@@ -135,7 +135,7 @@ namespace clover {
///
class soft_event : public event {
public:
soft_event(context &ctx, const ref_vector<event> &deps,
soft_event(clover::context &ctx, const ref_vector<event> &deps,
bool trigger, action action = [](event &){});
virtual cl_int status() const;
......
......@@ -28,9 +28,9 @@
using namespace clover;
kernel::kernel(program &prog, const std::string &name,
kernel::kernel(clover::program &prog, const std::string &name,
const std::vector<module::argument> &margs) :
prog(prog), _name(name), exec(*this) {
program(prog), _name(name), exec(*this) {
for (auto &marg : margs) {
if (marg.type == module::argument::scalar)
_args.emplace_back(new scalar_argument(marg.size));
......@@ -57,7 +57,7 @@ template<typename V>
static inline std::vector<uint>
pad_vector(command_queue &q, const V &v, uint x) {
std::vector<uint> w { v.begin(), v.end() };
w.resize(q.dev.max_block_size().size(), x);
w.resize(q.device().max_block_size().size(), x);
return w;
}
......@@ -66,7 +66,7 @@ kernel::launch(command_queue &q,
const std::vector<size_t> &grid_offset,
const std::vector<size_t> &grid_size,
const std::vector<size_t> &block_size) {
const auto m = prog.binary(q.dev);
const auto m = program().binary(q.device());
const auto reduced_grid_size =
map(divides(), grid_size, block_size);
void *st = exec.bind(&q);
......@@ -130,7 +130,7 @@ std::vector<size_t>
kernel::optimal_block_size(const command_queue &q,
const std::vector<size_t> &grid_size) const {
return factor::find_grid_optimal_factor<size_t>(
q.dev.max_threads_per_block(), q.dev.max_block_size(),
q.device().max_threads_per_block(), q.device().max_block_size(),
grid_size);
}
......@@ -151,7 +151,7 @@ kernel::args() const {
const module &
kernel::module(const command_queue &q) const {
return prog.binary(q.dev);
return program().binary(q.device());
}
kernel::exec_context::exec_context(kernel &kern) :
......@@ -164,11 +164,11 @@ kernel::exec_context::~exec_context() {
}
void *
kernel::exec_context::bind(command_queue *_q) {
kernel::exec_context::bind(intrusive_ptr<command_queue> _q) {
std::swap(q, _q);
// Bind kernel arguments.
auto &m = kern.prog.binary(q->dev);
auto &m = kern.program().binary(q->device());
auto margs = find(name_equals(kern.name()), m.syms).args;
auto msec = find(type_equals(module::section::text), m.secs);
......@@ -313,7 +313,7 @@ kernel::scalar_argument::bind(exec_context &ctx,
auto w = v;
extend(w, marg.ext_type, marg.target_size);
byteswap(w, ctx.q->dev.endianness());
byteswap(w, ctx.q->device().endianness());
align(ctx.input, marg.target_align);
insert(ctx.input, w);
}
......@@ -369,7 +369,7 @@ kernel::local_argument::bind(exec_context &ctx,
auto v = bytes(ctx.mem_local);
extend(v, module::argument::zero_ext, marg.target_size);
byteswap(v, ctx.q->dev.endianness());
byteswap(v, ctx.q->device().endianness());
align(ctx.input, marg.target_align);
insert(ctx.input, v);
......@@ -398,7 +398,7 @@ kernel::constant_argument::bind(exec_context &ctx,
auto v = bytes(ctx.resources.size() << 24);
extend(v, module::argument::zero_ext, marg.target_size);
byteswap(v, ctx.q->dev.endianness());
byteswap(v, ctx.q->device().endianness());
insert(ctx.input, v);
st = buf->resource(*ctx.q).bind_surface(*ctx.q, false);
......@@ -430,7 +430,7 @@ kernel::image_rd_argument::bind(exec_context &ctx,
auto v = bytes(ctx.sviews.size());
extend(v, module::argument::zero_ext, marg.target_size);
byteswap(v, ctx.q->dev.endianness());
byteswap(v, ctx.q->device().endianness());
align(ctx.input, marg.target_align);
insert(ctx.input, v);
......@@ -458,7 +458,7 @@ kernel::image_wr_argument::bind(exec_context &ctx,
auto v = bytes(ctx.resources.size());
extend(v, module::argument::zero_ext, marg.target_size);
byteswap(v, ctx.q->dev.endianness());
byteswap(v, ctx.q->device().endianness());
align(ctx.input, marg.target_align);
insert(ctx.input, v);
......
......@@ -46,11 +46,11 @@ namespace clover {
exec_context &
operator=(const exec_context &) = delete;
void *bind(command_queue *q);
void *bind(intrusive_ptr<command_queue> _q);
void unbind();
kernel &kern;
command_queue *q;