Commit 376b813e authored by Chris Wilson's avatar Chris Wilson 😣

igt/gem_exec_gttfill: Reduce overhead in setting up filler batches

Since all the batches start with the same content, we can reuse the same
buf to fill them.
Signed-off-by: Chris Wilson's avatarChris Wilson <chris@chris-wilson.co.uk>
parent 3e765840
......@@ -16,6 +16,8 @@ libintel_tools_la_SOURCES = \
igt_gt.h \
igt_gvt.c \
igt_gvt.h \
igt_rand.c \
igt_rand.h \
igt_stats.c \
igt_stats.h \
igt_sysfs.c \
......
......@@ -57,6 +57,7 @@
#include "igt_aux.h"
#include "igt_debugfs.h"
#include "igt_gt.h"
#include "igt_rand.h"
#include "config.h"
#include "intel_reg.h"
#include "ioctl_wrappers.h"
......@@ -486,15 +487,6 @@ void igt_exchange_int(void *array, unsigned i, unsigned j)
int_arr[j] = tmp;
}
static uint32_t
hars_petruska_f54_1_random_unsafe(void)
{
static uint32_t state = 0x12345678;
#define rol(x,k) ((x << k) | (x >> (32-k)))
return state = (state ^ rol (state, 5) ^ rol (state, 24)) + 0x37798849;
#undef rol
}
/**
* igt_permute_array:
* @array: pointer to array
......@@ -1206,19 +1198,9 @@ static struct igt_siglatency {
int sig;
} igt_siglatency;
static uint32_t
__hars_petruska_f54_1_random (void)
{
static uint32_t state = 0x12345678;
#define rol(x,k) ((x << k) | (x >> (32-k)))
return state = (state ^ rol (state, 5) ^ rol (state, 24)) + 0x37798849;
#undef rol
}
static long delay(void)
{
return __hars_petruska_f54_1_random() % (NSEC_PER_SEC / 1000);
return hars_petruska_f54_1_random_unsafe() % (NSEC_PER_SEC / 1000);
}
static double elapsed(const struct timespec *now, const struct timespec *last)
......
#include "igt_rand.h"
static uint32_t state = 0x12345678;
uint32_t
hars_petruska_f54_1_random_seed(uint32_t new_state)
{
uint32_t old_state = state;
state = new_state;
return old_state;
}
uint32_t
hars_petruska_f54_1_random_unsafe(void)
{
#define rol(x,k) ((x << k) | (x >> (32-k)))
return state = (state ^ rol (state, 5) ^ rol (state, 24)) + 0x37798849;
#undef rol
}
/*
* Copyright © 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef IGT_RAND_H
#define IGT_RAND_H
#include <stdint.h>
uint32_t hars_petruska_f54_1_random_seed(uint32_t seed);
uint32_t hars_petruska_f54_1_random_unsafe(void);
static inline void hars_petruska_f54_1_random_perturb(uint32_t xor)
{
uint32_t seed = hars_petruska_f54_1_random_seed(0) ^ xor;
hars_petruska_f54_1_random_seed(seed);
hars_petruska_f54_1_random_seed(hars_petruska_f54_1_random_unsafe());
}
#endif /* IGT_RAND_H */
......@@ -22,6 +22,7 @@
*/
#include "igt.h"
#include "igt_rand.h"
IGT_TEST_DESCRIPTION("Fill the GTT with batches.");
......@@ -60,52 +61,59 @@ static void submit(int fd, int gen,
uint32_t *handles, unsigned count)
{
struct drm_i915_gem_exec_object2 obj;
uint32_t batch[16];
unsigned n;
memset(&obj, 0, sizeof(obj));
obj.relocs_ptr = (uintptr_t)reloc;
obj.relocation_count = 2;
memset(reloc, 0, 2*sizeof(*reloc));
reloc[0].offset = eb->batch_start_offset;
reloc[0].offset += sizeof(uint32_t);
reloc[0].delta = BATCH_SIZE - eb->batch_start_offset - 8;
reloc[0].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
reloc[1].offset = eb->batch_start_offset;
reloc[1].offset += 3*sizeof(uint32_t);
reloc[1].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
n = 0;
batch[n] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
if (gen >= 8) {
batch[n] |= 1 << 21;
batch[n]++;
batch[++n] = reloc[0].delta;/* lower_32_bits(address) */
batch[++n] = 0; /* upper_32_bits(address) */
} else if (gen >= 4) {
batch[++n] = 0;
batch[++n] = reloc[0].delta;/* lower_32_bits(address) */
reloc[0].offset += sizeof(uint32_t);
} else {
batch[n]--;
batch[++n] = reloc[0].delta;/* lower_32_bits(address) */
reloc[1].offset -= sizeof(uint32_t);
}
batch[++n] = 0; /* lower_32_bits(value) */
batch[++n] = 0; /* upper_32_bits(value) / nop */
batch[++n] = MI_BATCH_BUFFER_END;
eb->buffers_ptr = (uintptr_t)&obj;
for (unsigned i = 0; i < count; i++) {
uint32_t batch[16];
unsigned n;
memset(&obj, 0, sizeof(obj));
obj.handle = handles[i];
obj.relocs_ptr = (uintptr_t)reloc;
obj.relocation_count = 2;
memset(reloc, 0, 2*sizeof(*reloc));
reloc[0].target_handle = obj.handle;
reloc[0].offset = eb->batch_start_offset;
reloc[0].offset += sizeof(uint32_t);
reloc[0].delta = BATCH_SIZE - eb->batch_start_offset - 8;
reloc[0].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
reloc[1].target_handle = obj.handle;
reloc[1].offset = eb->batch_start_offset;
reloc[1].offset += 3*sizeof(uint32_t);
reloc[1].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
n = 0;
batch[n] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
if (gen >= 8) {
batch[n] |= 1 << 21;
batch[n]++;
batch[++n] = reloc[0].delta;/* lower_32_bits(address) */
batch[++n] = 0; /* upper_32_bits(address) */
} else if (gen >= 4) {
batch[++n] = 0;
batch[++n] = reloc[0].delta;/* lower_32_bits(address) */
reloc[0].offset += sizeof(uint32_t);
} else {
batch[n]--;
batch[++n] = reloc[0].delta;/* lower_32_bits(address) */
reloc[1].offset -= sizeof(uint32_t);
}
batch[++n] = 0; /* lower_32_bits(value) */
batch[++n] = 0; /* upper_32_bits(value) / nop */
batch[++n] = MI_BATCH_BUFFER_END;
obj.offset = 0;
reloc[0].presumed_offset = obj.offset;
reloc[1].presumed_offset = obj.offset;
gem_write(fd, obj.handle, eb->batch_start_offset,
batch, sizeof(batch));
gem_execbuf(fd, eb);
}
/* As we have been lying about the write_domain, we need to do a sync */
gem_sync(fd, obj.handle);
}
static void fillgtt(int fd, unsigned ring, int timeout)
......@@ -113,6 +121,7 @@ static void fillgtt(int fd, unsigned ring, int timeout)
const int gen = intel_gen(intel_get_drm_devid(fd));
struct drm_i915_gem_execbuffer2 execbuf;
struct drm_i915_gem_relocation_entry reloc[2];
volatile uint64_t *shared;
unsigned *handles;
unsigned engines[16];
unsigned nengine;
......@@ -120,6 +129,9 @@ static void fillgtt(int fd, unsigned ring, int timeout)
uint64_t size;
unsigned count;
shared = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
igt_assert(shared != MAP_FAILED);
nengine = 0;
if (ring == 0) {
for_each_engine(fd, engine) {
......@@ -164,6 +176,8 @@ static void fillgtt(int fd, unsigned ring, int timeout)
submit(fd, gen, &execbuf, reloc, handles, count);
igt_fork(child, nengine) {
uint64_t cycles = 0;
hars_petruska_f54_1_random_perturb(child);
igt_permute_array(handles, count, xchg_u32);
execbuf.batch_start_offset = child*64;
execbuf.flags |= engines[child];
......@@ -177,13 +191,21 @@ static void fillgtt(int fd, unsigned ring, int timeout)
gem_read(fd, handle, reloc[0].delta, &buf[1], sizeof(buf[1]));
igt_assert_eq_u64(buf[0], buf[1]);
}
cycles++;
}
shared[child] = cycles;
igt_info("engine[%d]: %llu cycles\n", child, (long long)cycles);
}
igt_waitchildren();
for (unsigned i = 0; i < count; i++)
gem_close(fd, handles[i]);
shared[nengine] = 0;
for (unsigned i = 0; i < nengine; i++)
shared[nengine] += shared[i];
igt_info("Total: %llu cycles\n", (long long)shared[nengine]);
igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
}
......
......@@ -26,6 +26,7 @@
#include <sched.h>
#include "igt.h"
#include "igt_rand.h"
#include "igt_stats.h"
#if defined(__x86_64__) || defined(__i386__)
......@@ -41,16 +42,6 @@ struct data {
drmModeRes *resources;
};
static uint32_t state = 0x12345678;
static uint32_t
hars_petruska_f54_1_random (void)
{
#define rol(x,k) ((x << k) | (x >> (32-k)))
return state = (state ^ rol (state, 5) ^ rol (state, 24)) + 0x37798849;
#undef rol
}
static void stress(struct data *data,
uint32_t *crtc_id, unsigned num_crtcs,
int num_children, unsigned mode,
......@@ -94,9 +85,9 @@ static void stress(struct data *data,
CPU_SET(child, &allowed);
sched_setaffinity(getpid(), sizeof(cpu_set_t), &allowed);
state ^= child;
hars_petruska_f54_1_random_perturb(child);
igt_until_timeout(timeout) {
arg.crtc_id = crtc_id[hars_petruska_f54_1_random() % num_crtcs];
arg.crtc_id = crtc_id[hars_petruska_f54_1_random_unsafe() % num_crtcs];
do_ioctl(data->fd, DRM_IOCTL_MODE_CURSOR, &arg);
count++;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment