Commit 11a49f28 authored by Connor Abbott's avatar Connor Abbott
Browse files

lima/gp: Support exp2 and log2



log2 is tricky because there cannot be a move between complex1 and
postlog2. We can't guarantee that scheduling complex1 will succeed when
we schedule postlog2, so we try to schedule complex1 and if it fails we
back out by rewriting the postlog2 as a move and introducing a new
postlog2 so that we can try again later.
Signed-off-by: Connor Abbott's avatarConnor Abbott <cwabbott0@gmail.com>
Acked-by: Qiang Yu's avatarQiang Yu <yuq825@gmail.com>
parent c2f48d8f
Pipeline #52361 passed with stages
in 10 minutes and 19 seconds
......@@ -376,6 +376,8 @@ static void gpir_codegen_complex_slot(gpir_codegen_instr *code, gpir_instr *inst
case gpir_op_mov:
case gpir_op_rcp_impl:
case gpir_op_rsqrt_impl:
case gpir_op_exp2_impl:
case gpir_op_log2_impl:
{
gpir_alu_node *alu = gpir_node_to_alu(node);
code->complex_src = gpir_get_alu_input(node, alu->children[0]);
......@@ -395,6 +397,12 @@ static void gpir_codegen_complex_slot(gpir_codegen_instr *code, gpir_instr *inst
case gpir_op_rsqrt_impl:
code->complex_op = gpir_codegen_complex_op_rsqrt;
break;
case gpir_op_exp2_impl:
code->complex_op = gpir_codegen_complex_op_exp2;
break;
case gpir_op_log2_impl:
code->complex_op = gpir_codegen_complex_op_log2;
break;
default:
assert(0);
}
......@@ -410,14 +418,19 @@ static void gpir_codegen_pass_slot(gpir_codegen_instr *code, gpir_instr *instr)
return;
}
gpir_alu_node *alu = gpir_node_to_alu(node);
code->pass_src = gpir_get_alu_input(node, alu->children[0]);
switch (node->op) {
case gpir_op_mov:
{
gpir_alu_node *alu = gpir_node_to_alu(node);
code->pass_src = gpir_get_alu_input(node, alu->children[0]);
code->pass_op = gpir_codegen_pass_op_pass;
break;
}
case gpir_op_preexp2:
code->pass_op = gpir_codegen_pass_op_preexp2;
break;
case gpir_op_postlog2:
code->pass_op = gpir_codegen_pass_op_postlog2;
break;
default:
assert(0);
}
......
......@@ -177,6 +177,19 @@ static bool gpir_lower_complex(gpir_block *block, gpir_node *node)
gpir_alu_node *alu = gpir_node_to_alu(node);
gpir_node *child = alu->children[0];
if (node->op == gpir_op_exp2) {
gpir_alu_node *preexp2 = gpir_node_create(block, gpir_op_preexp2);
if (unlikely(!preexp2))
return false;
preexp2->children[0] = child;
preexp2->num_child = 1;
gpir_node_add_dep(&preexp2->node, child, GPIR_DEP_INPUT);
list_addtail(&preexp2->node.list, &node->list);
child = &preexp2->node;
}
gpir_alu_node *complex2 = gpir_node_create(block, gpir_op_complex2);
if (unlikely(!complex2))
return false;
......@@ -194,6 +207,12 @@ static bool gpir_lower_complex(gpir_block *block, gpir_node *node)
case gpir_op_rsqrt:
impl_op = gpir_op_rsqrt_impl;
break;
case gpir_op_exp2:
impl_op = gpir_op_exp2_impl;
break;
case gpir_op_log2:
impl_op = gpir_op_log2_impl;
break;
default:
assert(0);
}
......@@ -207,14 +226,33 @@ static bool gpir_lower_complex(gpir_block *block, gpir_node *node)
gpir_node_add_dep(&impl->node, child, GPIR_DEP_INPUT);
list_addtail(&impl->node.list, &node->list);
/* change node to complex1 node */
node->op = gpir_op_complex1;
alu->children[0] = &impl->node;
alu->children[1] = &complex2->node;
alu->children[2] = child;
alu->num_child = 3;
gpir_node_add_dep(node, &impl->node, GPIR_DEP_INPUT);
gpir_node_add_dep(node, &complex2->node, GPIR_DEP_INPUT);
gpir_alu_node *complex1 = gpir_node_create(block, gpir_op_complex1);
complex1->children[0] = &impl->node;
complex1->children[1] = &complex2->node;
complex1->children[2] = child;
complex1->num_child = 3;
gpir_node_add_dep(&complex1->node, child, GPIR_DEP_INPUT);
gpir_node_add_dep(&complex1->node, &impl->node, GPIR_DEP_INPUT);
gpir_node_add_dep(&complex1->node, &complex2->node, GPIR_DEP_INPUT);
list_addtail(&complex1->node.list, &node->list);
gpir_node *result = &complex1->node;
if (node->op == gpir_op_log2) {
gpir_alu_node *postlog2 = gpir_node_create(block, gpir_op_postlog2);
if (unlikely(!postlog2))
return false;
postlog2->children[0] = result;
postlog2->num_child = 1;
gpir_node_add_dep(&postlog2->node, result, GPIR_DEP_INPUT);
list_addtail(&postlog2->node.list, &node->list);
result = &postlog2->node;
}
gpir_node_replace_succ(result, node);
gpir_node_delete(node);
return true;
}
......@@ -384,6 +422,8 @@ static bool (*gpir_post_rsched_lower_funcs[gpir_op_num])(gpir_block *, gpir_node
[gpir_op_neg] = gpir_lower_neg,
[gpir_op_rcp] = gpir_lower_complex,
[gpir_op_rsqrt] = gpir_lower_complex,
[gpir_op_exp2] = gpir_lower_complex,
[gpir_op_log2] = gpir_lower_complex,
[gpir_op_eq] = gpir_lower_eq_ne,
[gpir_op_ne] = gpir_lower_eq_ne,
[gpir_op_abs] = gpir_lower_abs,
......
......@@ -118,6 +118,8 @@ static int nir_to_gpir_opcodes[nir_num_opcodes] = {
[nir_op_fmax] = gpir_op_max,
[nir_op_frcp] = gpir_op_rcp,
[nir_op_frsq] = gpir_op_rsqrt,
[nir_op_fexp2] = gpir_op_exp2,
[nir_op_flog2] = gpir_op_log2,
[nir_op_slt] = gpir_op_lt,
[nir_op_sge] = gpir_op_ge,
[nir_op_fcsel] = gpir_op_select,
......
......@@ -141,15 +141,25 @@ const gpir_op_info gpir_op_infos[] = {
},
[gpir_op_preexp2] = {
.name = "preexp2",
.slots = (int []) { GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_END },
.spillless = true,
.schedule_first = true,
},
[gpir_op_postlog2] = {
.name = "postlog2",
.slots = (int []) { GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_END },
},
[gpir_op_exp2_impl] = {
.name = "exp2_impl",
.slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },
.spillless = true,
.schedule_first = true,
},
[gpir_op_log2_impl] = {
.name = "log2_impl",
.slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },
.spillless = true,
.schedule_first = true,
},
[gpir_op_rcp_impl] = {
.name = "rcp_impl",
......
......@@ -627,23 +627,26 @@ static bool schedule_try_place_node(sched_ctx *ctx, gpir_node *node,
return true;
}
static gpir_node *create_move(sched_ctx *ctx, gpir_node *node)
/* Create a new node with "node" as the child, replace all uses of "node" with
* this new node, and replace "node" with it in the ready list.
*/
static gpir_node *create_replacement(sched_ctx *ctx, gpir_node *node,
gpir_op op)
{
gpir_alu_node *move = gpir_node_create(node->block, gpir_op_mov);
if (unlikely(!move))
return NULL;
move->children[0] = node;
move->num_child = 1;
gpir_alu_node *new_node = gpir_node_create(node->block, op);
if (unlikely(!new_node))
return NULL;
move->node.sched.instr = NULL;
move->node.sched.pos = -1;
move->node.sched.dist = node->sched.dist;
move->node.sched.max_node = node->sched.max_node;
move->node.sched.next_max_node = node->sched.next_max_node;
move->node.sched.complex_allowed = node->sched.complex_allowed;
new_node->children[0] = node;
new_node->num_child = 1;
gpir_debug("create move %d for %d\n", move->node.index, node->index);
new_node->node.sched.instr = NULL;
new_node->node.sched.pos = -1;
new_node->node.sched.dist = node->sched.dist;
new_node->node.sched.max_node = node->sched.max_node;
new_node->node.sched.next_max_node = node->sched.next_max_node;
new_node->node.sched.complex_allowed = node->sched.complex_allowed;
ctx->ready_list_slots--;
list_del(&node->list);
......@@ -651,12 +654,26 @@ static gpir_node *create_move(sched_ctx *ctx, gpir_node *node)
node->sched.next_max_node = false;
node->sched.ready = false;
node->sched.inserted = false;
gpir_node_replace_succ(&move->node, node);
gpir_node_add_dep(&move->node, node, GPIR_DEP_INPUT);
schedule_insert_ready_list(ctx, &move->node);
return &move->node;
gpir_node_replace_succ(&new_node->node, node);
gpir_node_add_dep(&new_node->node, node, GPIR_DEP_INPUT);
schedule_insert_ready_list(ctx, &new_node->node);
return &new_node->node;
}
static gpir_node *create_move(sched_ctx *ctx, gpir_node *node)
{
gpir_node *move = create_replacement(ctx, node, gpir_op_mov);
gpir_debug("create move %d for %d\n", move->index, node->index);
return move;
}
static gpir_node *create_postlog2(sched_ctx *ctx, gpir_node *node)
{
assert(node->op == gpir_op_complex1);
gpir_node *postlog2 = create_replacement(ctx, node, gpir_op_postlog2);
gpir_debug("create postlog2 %d for %d\n", postlog2->index, node->index);
return postlog2;
}
/* Once we schedule the successor, would the predecessor be fully ready? */
static bool pred_almost_ready(gpir_dep *dep)
......@@ -936,7 +953,22 @@ static bool used_by_store(gpir_node *node, gpir_instr *instr)
return false;
}
static gpir_node *consuming_postlog2(gpir_node *node)
{
if (node->op != gpir_op_complex1)
return NULL;
gpir_node_foreach_succ(node, dep) {
if (dep->type != GPIR_DEP_INPUT)
continue;
if (dep->succ->op == gpir_op_postlog2)
return dep->succ;
else
return NULL;
}
return NULL;
}
static bool try_spill_node(sched_ctx *ctx, gpir_node *node)
{
......@@ -961,6 +993,16 @@ static bool try_spill_node(sched_ctx *ctx, gpir_node *node)
if (available == 0)
return false;
/* Don't spill complex1 if it's used postlog2, turn the postlog2 into a
* move, replace the complex1 with postlog2 and spill that instead. The
* store needs a move anyways so the postlog2 is usually free.
*/
gpir_node *postlog2 = consuming_postlog2(node);
if (postlog2) {
postlog2->op = gpir_op_mov;
node = create_postlog2(ctx, node);
}
/* TODO: use a better heuristic for choosing an available register? */
int physreg = ffsll(available) - 1;
......@@ -1305,7 +1347,17 @@ static bool sched_move(sched_ctx *ctx)
{
list_for_each_entry(gpir_node, node, &ctx->ready_list, list) {
if (node->sched.max_node) {
place_move(ctx, node);
/* For complex1 that is consumed by a postlog2, we cannot allow any
* moves in between. Convert the postlog2 to a move and insert a new
* postlog2, and try to schedule it again in try_node().
*/
gpir_node *postlog2 = consuming_postlog2(node);
if (postlog2) {
postlog2->op = gpir_op_mov;
create_postlog2(ctx, node);
} else {
place_move(ctx, node);
}
return true;
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment