Compare revisions

2aed5454 · 642f4bf4 · 88b48161 · 193c8bdd · 00e69c8a · 9b69da6d
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -164,8 +164,6 @@ build:tests-debian-meson-mips:
 #################### TEST ##########################

 test:ninja-test:
-  tags:
-    - gstreamer
  retry: 2
  dependencies:
    - build:tests-fedora
@@ -173,8 +171,6 @@ test:ninja-test:
  script: meson test -C build --num-processes ${FDO_CI_CONCURRENT:-4}

 test:ninja-test-clang:
-  tags:
-    - gstreamer
  retry: 2
  dependencies:
    - build:tests-fedora-clang
@@ -184,8 +180,6 @@ test:ninja-test-clang:
  script: meson test -C build --num-processes ${FDO_CI_CONCURRENT:-4}

 test:ninja-test-minimal:
-  tags:
-    - gstreamer
  retry: 2
  image: $CI_REGISTRY/$CI_PROJECT_PATH/build-debian-minimal:commit-$CI_COMMIT_SHA
  dependencies:
@@ -195,8 +189,6 @@ test:ninja-test-minimal:

 # arm testing temporarily disabled until converted to run on native arm HW
 # test:ninja-test-arm64:
-#   tags:
-#     - gstreamer
 #   retry: 2
 #   image: $CI_REGISTRY/$CI_PROJECT_PATH/build-debian-arm64:commit-$CI_COMMIT_SHA
 #   dependencies:
@@ -212,8 +204,6 @@ test:ninja-test-minimal:
 #     when: on_failure
 #
 # test:ninja-test-armhf:
-#   tags:
-#     - gstreamer
 #   retry: 2
 #   image: $CI_REGISTRY/$CI_PROJECT_PATH/build-debian-armhf:commit-$CI_COMMIT_SHA
 #   dependencies:
@@ -228,22 +218,21 @@ test:ninja-test-minimal:
 #       - build
 #     when: on_failure

-test:ninja-test-mips:
-  tags:
-    - gstreamer
-  retry: 2
-  image: $CI_REGISTRY/$CI_PROJECT_PATH/build-debian-mips:commit-$CI_COMMIT_SHA
-  dependencies:
-    - build:tests-debian-meson-mips
-  stage: test
-  script:
-    - export PKG_CONFIG_PATH=/usr/lib/mips-linux-gnu/pkgconfig/
-    - env > build/envdump.txt
-    - meson test -C build --num-processes ${FDO_CI_CONCURRENT:-4}
-  artifacts:
-    paths:
-      - build
-    when: on_failure
+# mips testing temporarily disabled
+# test:ninja-test-mips:
+#   retry: 2
+#   image: $CI_REGISTRY/$CI_PROJECT_PATH/build-debian-mips:commit-$CI_COMMIT_SHA
+#   dependencies:
+#     - build:tests-debian-meson-mips
+#   stage: test
+#   script:
+#     - export PKG_CONFIG_PATH=/usr/lib/mips-linux-gnu/pkgconfig/
+#     - env > build/envdump.txt
+#     - meson test -C build --num-processes ${FDO_CI_CONCURRENT:-4}
+#   artifacts:
+#     paths:
+#       - build
+#     when: on_failure

 test:list-undocumented-tests:
  dependencies:

--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -30,6 +30,12 @@ The Code
  provided by the igt library. The semantic patch lib/igt.cocci can help with
  more automatic conversions.

+- Tests that use kernel interfaces (uapi, sysfs, or even debugfs) that
+  become deprecated in favour of new interfaces should have fallbacks
+  to the deprecated interfaces if the new stuff is not present in the
+  running kernel. The same IGT version can be used to test the tip of
+  development along with stable kernel releases that way.
+
 [igt-describe]: https://drm.pages.freedesktop.org/igt-gpu-tools/igt-gpu-tools-Core.html#igt-describe


@@ -57,6 +63,17 @@ Sending Patches

  on its first invocation.

+- Place relevant prefix in subject, for example when your change is in one
+  testfile, use its name without '.c' nor '.h' suffix, like:
+  tests/simple_test: short description
+  Consider sending cover letter with your patch, so if you decide to change
+  subject it can still be linked into same patchseries on patchwork.
+
+- Look into some guides from Linux and Open Source community:
+  https://kernelnewbies.org/PatchPhilosophy
+  https://www.kernel.org/doc/html/latest/process/submitting-patches.html
+  https://www.kernel.org/doc/html/latest/process/submit-checklist.html
+
 - Patches need to be reviewed on the mailing list. Exceptions only apply for
  testcases and tooling for drivers with just a single contributor (e.g. vc4).
  In this case patches must still be submitted to the mailing list first.
@@ -69,8 +86,17 @@ Sending Patches
  contact one of the maintainers (listed in the MAINTAINERS file) and cc the
  igt-dev mailing list.

+- Before sending use Linux kernel script 'checkpatch.pl' for checking your
+  patchset. You could ignore some of them like 'line too long' or 'typedef'
+  but most of the time its log is accurate. Useful options you could use:
+  --emacs --strict --show-types --max-line-length=100 \
+  --ignore=BIT_MACRO,SPLIT_STRING,LONG_LINE_STRING,BOOL_MEMBER
+
 - Changes to the testcases are automatically tested. Take the results into
-  account before merging.
+  account before merging.  Please also reply to CI failures if you think they
+  are unrelated, add also to Cc CI e-mail which is present in message.  This
+  can help our bug-filing team. When replying, you can cut a message after
+  'Known bugs' to keep it in reasonable size.


 Commit Rights

--- a/MAINTAINERS
+++ b/MAINTAINERS
-Petri Latvala <petri.latvala@intel.com>
+Petri Latvala <adrinael@adrinael.net>
 Arkadiusz Hiler <arek@hiler.eu>
+Kamil Konieczny <kamil.konieczny@linux.intel.com>
+Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
+Bhanuprakash Modem <bhanuprakash.modem@gmail.com>
+Ashutosh Dixit <ashutosh.dixit@intel.com>
+Karthik B S <karthik.b.s@intel.com>
--- a/NEWS
+++ b/NEWS
+Release 2.00 (2025-03-13)
+-------------------------
+
+General changes:
+
+- Added Karthik B S as a new maintainer.
+
+Library changes:
+
+- Added GFX1153 to GFX1150 amdgpu family. (Tim Huang)
+
+- Improved kernel module un/loading. (Lucas De Marchi)
+
+- Added ftrace logs at test boundaries. (Umesh Nerlige Ramappa)
+
+- Improved device scanning used in device filters. (Zbigniew Kempczyński)
+
+- Add support to check joiner mode limit. (Jeevan B)
+
+- Updated PCI ids for DG2/MTL/BMG/PTL. (Matt Atwood)
+
+- Added PTL opencl kernels. (Janga Rahul Kumar and Priyanka Dandamudi)
+
+- Extended GPGPU compute square kernel for PTL. (Sai Gowtham Ch)
+
+- Using Long Running mode for GPGPU compute with Xe. (Francois Dugast)
+
+- Make running under Valgrind quiet. (Tvrtko Ursulin)
+
+Runner changes:
+
+- Added igt_runner cmdline to results. (Lucas De Marchi)
+
+- Added printing GPU related facts. (Peter Senna Tschudin)
+
+- Added kmemleak scans. (Peter Senna Tschudin)
+
+- Parse results more persistently. (Kamil Konieczny)
+
+Test changes:
+
+- Improved Xe EUdebug tests. (Dominik Grzegorzek)
+
+- Improved support for 3d testing on VMs for vmwgfx. (Maaz Mombasawala)
+
+- Improved Xe debugfs testing. (Pravalika Gurram)
+
+- Improved amdgpu PCI unplug test. (Vitaly Prosyak)
+
+- Added support for page queues in amdgpu tests. (Jesse Zhang)
+
+- New short hibernate subtest in Intel KMS ccs. (Juha-Pekka Heikkila)
+
+- Renamed test i915_pipe_stress into kms_pipe_stress and added
+  support for Xe in it. (Swati Sharma)
+
+- New test for devcoredump for Xe. (Zhanjun Dong)
+
+- New DP link training validation of both UHBR and non-UHBR link
+  rates over SST and MST configurations. (Kunal Joshi)
+
+- New test for Frame Buffer Compression (FBC) with dirty rectangles which
+  allows FBC to recompress a subsection of a frame. (Santhosh Reddy Guddati)
+
+- New test for EU stall sampling for Xe. (Harish Chegondi)
+
+- New test for eudebug/SR-IOV exclusion for Xe. (Christoph Manszewski)
+
+- New test for PMU (Performance Monitoring Unit) for Xe. (Vinay Belgaumkar
+  and Riana Tauro)
+
+- New test for SRIOV auto-provisioning for Xe. (Marcin Bernatowicz)
+
+- New equal-throughput validation for VFs in SRIOV. (Marcin Bernatowicz)
+
+- Improved test documentation. (Katarzyna Piecielska and Swati Sharma)
+
+- Improved Xe OA test. (Umesh Nerlige Ramappa, Sai Teja Pottumuttu and
+  Ashutosh Dixit)
+
+Tools and scripts changes:
+
+- New tool for measuring display memory bandwidth utilization (Ville Syrjälä)
+
+- New igt_facts tool for displaying GPU related info. (Peter Senna Tschudin)
+
+- Power tool renamed to igt_power (Kamil Konieczny)
+
+- New --pci-slot option in intel_reg for multi-GPU configs. (Łukasz Łaguna
+  and Kamil Konieczny)
+
+- Added kmemleak option to run-tests.sh script. (Peter Senna Tschudin)
+
+And many other bug fixes, improvements, cleanups and new tests.
+
+
+Release 1.30 (2024-12-13)
+-------------------------
+
+General changes:
+
+- New meson options xe_eudebug and vmtb.
+
+Library changes:
+
+- Added PantherLake (PTL) support, unified PCI IDs into one
+  common header pciids.h (Ngai-Mint Kwan)
+
+- Added BMG support for OA (Observability Architecture) for Xe driver.
+  (José Roberto de Souza)
+
+- Added support for Xe3 platforms in GPGPU shader. (Andrzej Hajda)
+
+- Added 6k resolution support for a single CRTC. (Jeevan B)
+
+- Added support for MTL platform in GPGPU compute. (Nishit Sharma)
+
+Runner changes:
+
+- Set option PRUNE_KEEP_ALL as default. (Kamil Konieczny)
+
+- Allow to dynamically ignore dmesg warn messages when generating
+  results, usefull when driver is using kernel fault injection.
+  (Kamil Konieczny).
+
+Test changes:
+
+- Added sanity checks for KMS properties. (Dmitry Baryshkov, Maxime Ripard)
+
+- Improved GPGPU tests for i915 and Xe. (Zbigniew Kempczyński)
+
+- New SRIOV test for Functional Level Reset (FLR) for Xe. (Marcin Bernatowicz)
+
+- Added test that draws triangle without using 3d commands for vmwgfx.
+  (Maaz Mombasawala)
+
+- Added subtest for fallback for DP connector. (Kunal Joshi)
+
+- Added async flips suspend resume subtest. (Santhosh Reddy Guddati)
+
+- New test for error handling of Xe at probe time. (Francois Dugast)
+
+- Added testing SIZE_HINTS property in KMS cursor test. (Ville Syrjälä)
+
+- Added KMS testing for ultrajoiner. (Karthik B S)
+
+- New test for TLB invalidation in Xe. (Sai Gowtham Ch)
+
+- New test for timeslice duration in Xe. (Sai Gowtham Ch)
+
+- Display brightness test during DPMS on and off. (Mohammed Thasleem)
+
+- New tests for EU debugging for Xe. (Dominik Grzegorzek, Mika Kuoppala,
+  Christoph Manszewski, Karolina Stolarek, Maciej Patelczyk, Pawel Sikora,
+  Andrzej Hajda, Dominik Karol Piątkowski, Jonathan Cavitt et al)
+
+Tools changes:
+
+- New power tool for power/energy measurement. (Ville Syrjälä)
+
+- New VM Testbench (VMTB) - SR-IOV Virtual Machine testing tool.
+  (Adam Miszczak)
+
+- Fixes in amd_hdmi_compliance. (Stylon Wang and Wayne Lin)
+
+- Fixes in intel_reg. (Lucas De Marchi)
+
+And many other bug fixes, improvements, cleanups and new tests.
+
+
+Release 1.29 (2024-09-04)
+-------------------------
+
+General changes:
+
+- Added gcc warns: dangling-pointer, int-conversion (Bhanuprakash Modem)
+
+- More guidelines in CONTRIBUTING. (Louis Chauvet, Kamil Konieczny)
+
+- Reorganized Tests: Moved all vendor specific tests to their own dir
+  (Bhanuprakash Modem)
+
+- Fix musl/uclibc build (Bernd Kuhls, Stefano Ragni)
+
+Benchmarks changes:
+
+- New KMS framebuffer stress benchmark. (Arthur Grillo)
+
+- Added basic Xe support in gem_wsim. (Marcin Bernatowicz)
+
+Documentation changes:
+
+- Add documentation about cross-builds. (Mauro Carvalho Chehab)
+
+- Improve tests documentation. (Katarzyna Piecielska)
+
+Library changes:
+
+- Add Battlemage (BMG) support in xe_pciids.h (Andrzej Hajda)
+
+- Add amdgpu GFX1152, GFX1150 and gfx12. (Jesse Zhang and Tim Huang)
+
+- Added inline support for iga64 assembly in GPGPU shader. (Andrzej Hajda)
+
+- Improved KUnit support. (Janusz Krzysztofik)
+
+- Enable igt run on security enhanced distros (like Gentoo/Hardened)
+  (Matt Turner)
+
+- Use blitter as a default for all KMS tests. (Juha-Pekka Heikkila and
+  Bhanuprakash Modem)
+
+- Increased MAX supported pipes to 16 (Pipe-A to Pipe-P) (Vignesh Raman)
+
+- Added generic way to reset sysfs/debugfs attrs to default values upon
+  exit. (Ville Syrjälä)
+
+Runner changes:
+
+- Added hook scripts to igt_runner. (Gustavo Sousa)
+
+Test changes:
+
+- Added support for Xe in KMS tests. (Swati Sharma, Bhanuprakash Modem et al)
+
+- Added new subtests and improvements to VRR. (Bhanuprakash Modem, Jeevan B,
+  Manasi Navare, Sean Paul et al)
+
+- Added new subtests to force joiner. (Kunal Joshi)
+
+- Added fbdev tests to Xe. (Bhanuprakash Modem)
+
+- Added amdgpu fuzzing tests. (Vitaly Prosyak)
+
+- Added syncobj_eventfd test. (Simon Ser)
+
+- Added basic Single-Root IO Virtualization (SRIOV) test. (Katarzyna Dec et al)
+
+- Added prime test for vmwgfx. (Zack Rusin)
+
+- Improved core_getversion. (Rob Clark, Helen Koike, Kamil Konieczny)
+
+- Improved kms_atomic on non-mutable planes. (Melissa Wen)
+
+- Added and improved Xe tests. (Rodrigo Vivi, Matthew Auld, Zbigniew Kempczyński,
+  Francois Dugast, Nirmoy Das, Lucas De Marchi, Janga Rahul Kumar et al)
+
+Tools and scripts changes:
+
+- New Xe perf/OA tools. (Ashutosh Dixit)
+
+- New intel_tiling_detect tool. (Zbigniew Kempczyński)
+
+- New option in lsgpu for printing GPU on PCI bus, working also
+  in case when no gpu driver is loaded. (Zbigniew Kempczyński)
+
+- Added sysfs profiling knob to gputop. (Adrián Larumbe)
+
+- Support for Xe in gputop. (Lucas De Marchi)
+
+- Improved generating test lists at compilation time.(Mauro Carvalho Chehab)
+
+- Improved code coverage. (Mauro Carvalho Chehab)
+
+- Improved intel_vbt_decode and other intel tools. (Ville Syrjälä,
+  Lucas De Marchi, Jani Nikula, Tvrtko Ursulin, Gustavo Sousa et al)
+
+
+And many other bug fixes, improvements, cleanups and new tests.
+
+
+Release 1.28 (2023-09-13)
+-------------------------
+
+General changes:
+
+- New meson options testplan, sphinx and xe_driver. (Mauro Carvalho Chehab)
+
+Library changes:
+
+- Add amdgpu GFX1036, GFX1037 chips. (Jesse Zhang)
+
+- Add xe_pciids.h with Lunar Lake (LNL) support. (Lucas De Marchi)
+
+- Use the new procps library libproc2. (Craig Small)
+
+- Add helper for srandom seed initialization. (Łukasz Łaguna)
+
+- Support for vmwgfx driver. (Maaz Mombasawala, Roye Eshed, Zack Rusin)
+
+- i915_pciids.h updated to include Pontevecchio (PVC) platform.
+  (Niranjana Vishwanathapura)
+
+- Add OAM formats and support for media engines in perf tests.
+  (Umesh Nerlige Ramappa)
+
+- Support for Xe driver. (Matthew Brost, Mauro Carvalho Chehab, Rodrigo Vivi,
+  Jason Ekstrand, Francois Dugast, Philippe Lecluse, Zbigniew Kempczyński,
+  Maarten Lankhorst, Juha-Pekka Heikkila, Bhanuprakash Modem et al)
+
+Runner changes:
+
+- igt_runner can now dump GPU state on timeout. (Chris Wilson)
+
+- igt_runner will now use proper 'abort' as result instead of pseudoresult.
+  (Petri Latvala)
+
+Tools changes:
+
+- New vendor agnostic gputop tool. (Tvrtko Ursulin)
+
+- New tool to dump Intel GuC/HuC CSS header. (Lucas De Marchi)
+
+- Improve tools intel_watermark, intel_vbt_decode, intel_reg. (Ville Syrjälä)
+
+Documentation changes:
+
+- New way for documenting tests will allow to generate documentation and
+  testlists during build, see README.md and test_documentation.md. This
+  is mandatory for Intel (both i915 and xe) and kms tests. (Mauro Carvalho
+  Chehab)
+
+Test changes:
+
+- Move intel specific tests to new directory. (Bhanuprakash Modem)
+
+- Ported and refactored drmlib security tests in amdgpu. (Vitaly Prosyak)
+
+- Switch DRM selftests to KUnit. (Isabella Basso, Dominik Karol Piątkowski,
+  Mauro Carvalho Chehab)
+
+- Enabled MeteorLake aux ccs tests. (Juha-Pekka Heikkila)
+
+- Exercise oversized object detection for Xe. (Priyanka Dandamudi)
+
+- Enable validation for VDSC output formats. (Swati Sharma)
+
+- Add support for Bigjoiner / 8K mode. (Bhanuprakash Modem)
+
+- Use intel_cmds_info library. (Karolina Stolarek)
+
+- Use Intel kernel gpu command definitions. (Zbigniew Kempczyński)
+
+- Add a basic perf_pmu test. (Riana Tauro)
+
+- Add test for V3D's Wait BO IOCTL. (Maíra Canal)
+
+- Add i915_power test for power measurement. (Ashutosh Dixit)
+
+- Remove sysfs_clients. (Lucas De Marchi)
+
+
+And many other bug fixes, improvements, cleanups and new tests.
+
+
+Release 1.27.1 (2023-01-18)
+-------------------------
+
+- Removed gcc option -fcommon from meson.build and fix broken
+  build on Linux RedHat platform. (Zbigniew Kempczyński)
+
+- Updated CONTRIBUTING guidelines for interface deprecation. (Petri Latvala)
+
+- Fixed one additional test bug (Zbigniew Kempczyński)
+
+Release 1.27 (2023-01-12)
+-------------------------
+
+- Support for Intel discrete graphics and other new platforms (Andrzej
+  Turko, Matt Roper, Clint Taylor, Tejas Upadhyay, et al)
+
+- Support for MSM driver. (Mark Yacoub, Rob Clark)
+
+- Support for SRIOV device selection. (Łukasz Łaguna)
+
+- Tiled display emulation support with chamelium. (Kunal Joshi)
+
+- Support for Chamelium v3. (Mark Yacoub)
+
+- Initial FreeBSD support. (Jake Freeland)
+
+- Structured communication from tests to igt_runner. (Petri Latvala)
+
+- Removed last remaining uses of libdrm_intel in tests and
+  tools. (Zbigniew Kempczyński)
+
+- Automatic kernel code coverage collection during testing. (Mauro
+  Carvalho Chehab)
+
+
+And many other bug fixes, improvements, cleanups and new tests.
+
+
 Release 1.26 (2021-04-23)
 -------------------------


--- a/README.md
+++ b/README.md
@@ -49,6 +49,11 @@ Documentation is built using

    $ ninja -C build igt-gpu-tools-doc

+Please notice that some drivers and test sets may require that all
+tests to be properly documented via testplan. By default, build
+will fail if one forgets to document or update the documentation.
+This is currently enabled for the Xe, i915 drivers and for KMS tests.
+See docs/test_documentation.md for more details.

 Running Tests
 -------------
@@ -164,7 +169,7 @@ was used to generate them.
 Imported i915_drm.h uapi headers from airlied's drm-next branch.

 In some cases updating a single uapi file is needed as our history
-shows. So in this case, it should be done by:
+shows. In this case, it should be done by:

    # From the kernel dir with a drm/drm-next commit checked out:
    $ make INSTALL_HDR_PATH=<dest-dir> headers_install

--- a/assembler/gram.y
+++ b/assembler/gram.y
@@ -216,7 +216,7 @@ brw_program_add_instruction(struct brw_program *p,
 {
    struct brw_program_instruction *list_entry;

-    list_entry = calloc(sizeof(struct brw_program_instruction), 1);
+    list_entry = calloc(1, sizeof(struct brw_program_instruction));
    list_entry->type = GEN4ASM_INSTRUCTION_GEN;
    list_entry->insn.gen = instruction->insn.gen;
    brw_program_append_entry(p, list_entry);
@@ -228,7 +228,7 @@ brw_program_add_relocatable(struct brw_program *p,
 {
    struct brw_program_instruction *list_entry;

-    list_entry = calloc(sizeof(struct brw_program_instruction), 1);
+    list_entry = calloc(1, sizeof(struct brw_program_instruction));
    list_entry->type = GEN4ASM_INSTRUCTION_GEN_RELOCATABLE;
    list_entry->insn.gen = instruction->insn.gen;
    list_entry->reloc = instruction->reloc;
@@ -239,7 +239,7 @@ static void brw_program_add_label(struct brw_program *p, const char *label)
 {
    struct brw_program_instruction *list_entry;

-    list_entry = calloc(sizeof(struct brw_program_instruction), 1);
+    list_entry = calloc(1, sizeof(struct brw_program_instruction));
    list_entry->type = GEN4ASM_INSTRUCTION_LABEL;
    list_entry->insn.label.name = strdup(label);
    brw_program_append_entry(p, list_entry);

--- a/assembler/main.c
+++ b/assembler/main.c
@@ -397,7 +397,7 @@ int main(int argc, char **argv)
 	    if (entry1 && is_label(entry1) && is_entry_point(entry1)) {
 		// insert NOP instructions until (inst_offset+1) % 4 == 0
 		while (((inst_offset+1) % 4) != 0) {
-		    tmp_entry = calloc(sizeof(*tmp_entry), 1);
+		    tmp_entry = calloc(1, sizeof(*tmp_entry));
 		    tmp_entry->insn.gen.header.opcode = BRW_OPCODE_NOP;
 		    entry->next = tmp_entry;
 		    tmp_entry->next = entry1;

--- a/benchmarks/gem_busy.c
+++ b/benchmarks/gem_busy.c
@@ -33,8 +33,8 @@
 #include <fcntl.h>
 #include <inttypes.h>
 #include <errno.h>
+#include <poll.h>
 #include <sys/stat.h>
-#include <sys/poll.h>
 #include <sys/ioctl.h>
 #include <sys/time.h>
 #include <time.h>

--- a/benchmarks/gem_exec_reloc.c
+++ b/benchmarks/gem_exec_reloc.c
@@ -80,9 +80,9 @@ static int run(unsigned batch_size,
 	struct drm_i915_gem_relocation_entry *mem_reloc = NULL;
 	int *target;

-	gem_exec = calloc(sizeof(*gem_exec), num_objects + 1);
-	mem_reloc = calloc(sizeof(*mem_reloc), num_relocs);
-	target = calloc(sizeof(*target), num_relocs);
+	gem_exec = calloc(num_objects + 1, sizeof(*gem_exec));
+	mem_reloc = calloc(num_relocs, sizeof(*mem_reloc));
+	target = calloc(num_relocs, sizeof(*target));

 	fd = drm_open_driver(DRIVER_INTEL);


--- a/benchmarks/gem_exec_tracer.c
+++ b/benchmarks/gem_exec_tracer.c
@@ -271,7 +271,11 @@ static int is_i915(int fd)
 }

 int
+#ifdef __GLIBC__
 ioctl(int fd, unsigned long request, ...)
+#else
+ioctl(int fd, int request, ...)
+#endif
 {
 	struct trace *t, **p;
 	va_list args;

--- a/benchmarks/gem_latency.c
+++ b/benchmarks/gem_latency.c
@@ -36,10 +36,10 @@
 #include <inttypes.h>
 #include <limits.h>
 #include <errno.h>
+#include <poll.h>
 #include <sys/stat.h>
 #include <sys/ioctl.h>
 #include <sys/time.h>
-#include <sys/poll.h>
 #include <sys/resource.h>

 #include "drm.h"
@@ -193,7 +193,7 @@ static void setup_workload(struct producer *p, int gen,
 	struct drm_i915_gem_relocation_entry *reloc;
 	int offset;

-	reloc = calloc(sizeof(*reloc), 2*factor);
+	reloc = calloc(2*factor, sizeof(*reloc));

 	p->workload_dispatch.exec[0].handle = scratch;
 	p->workload_dispatch.exec[1].relocation_count = 2*factor;
@@ -457,7 +457,7 @@ static int run(int seconds,
 		return IGT_EXIT_SKIP; /* Needs BCS timestamp */

 	intel_register_access_init(&mmio_data,
-				   igt_device_get_pci_device(fd), false, fd);
+				   igt_device_get_pci_device(fd), false);

 	if (gen == 6)
 		timestamp_reg = REG(RCS_TIMESTAMP);

--- a/benchmarks/gem_wsim.c
+++ b/benchmarks/gem_wsim.c
--- a/benchmarks/intel_upload_blit_large.c
+++ b/benchmarks/intel_upload_blit_large.c
@@ -45,6 +45,7 @@
 */

 #include "igt.h"
+#include "i915/gem_create.h"
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
@@ -72,74 +73,99 @@ get_time_in_secs(void)
 }

 static void
-do_render(drm_intel_bufmgr *bufmgr, struct intel_batchbuffer *batch,
-	  drm_intel_bo *dst_bo, int width, int height)
+do_render(int i915, uint32_t dst_handle)
 {
-	uint32_t data[width * height];
-	drm_intel_bo *src_bo;
-	int i;
+	struct drm_i915_gem_execbuffer2 exec = {};
+	struct drm_i915_gem_exec_object2 obj[3] = {};
+	struct drm_i915_gem_relocation_entry reloc[2];
 	static uint32_t seed = 1;
+	uint32_t data[OBJECT_WIDTH * OBJECT_HEIGHT];
+	uint64_t size = OBJECT_WIDTH * OBJECT_HEIGHT * 4, bb_size = 4096;
+	uint32_t src_handle, bb_handle, *bb;
+	uint32_t gen = intel_gen(intel_get_drm_devid(i915));
+	const bool has_64b_reloc = gen >= 8;
+	int i;
+
+	bb_handle = gem_create_from_pool(i915, &bb_size, REGION_SMEM);
+	src_handle = gem_create_from_pool(i915, &size, REGION_SMEM);

-	/* Generate some junk.  Real workloads would be doing a lot more
-	 * work to generate the junk.
-	 */
-	for (i = 0; i < width * height; i++) {
+	for (i = 0; i < OBJECT_WIDTH * OBJECT_HEIGHT; i++)
 		data[i] = seed++;
-	}

-	/* Upload the junk. */
-	src_bo = drm_intel_bo_alloc(bufmgr, "src", sizeof(data), 4096);
-	drm_intel_bo_subdata(src_bo, 0, sizeof(data), data);
+	gem_write(i915, src_handle, 0, data, sizeof(data));

 	/* Render the junk to the dst. */
-	BLIT_COPY_BATCH_START(0);
-	OUT_BATCH((3 << 24) | /* 32 bits */
+	bb = gem_mmap__device_coherent(i915, bb_handle, 0, bb_size, PROT_WRITE);
+	i = 0;
+	bb[i++] = XY_SRC_COPY_BLT_CMD |
+		  XY_SRC_COPY_BLT_WRITE_ALPHA |
+		  XY_SRC_COPY_BLT_WRITE_RGB |
+		  (6 + 2*(gen >= 8));
+	bb[i++] = (3 << 24) | /* 32 bits */
 		  (0xcc << 16) | /* copy ROP */
-		  (width * 4) /* dst pitch */);
-	OUT_BATCH(0); /* dst x1,y1 */
-	OUT_BATCH((height << 16) | width); /* dst x2,y2 */
-	OUT_RELOC(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
-	OUT_BATCH(0); /* src x1,y1 */
-	OUT_BATCH(width * 4); /* src pitch */
-	OUT_RELOC(src_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
-	ADVANCE_BATCH();
-
-	intel_batchbuffer_flush(batch);
-
-	drm_intel_bo_unreference(src_bo);
+		  (OBJECT_WIDTH * 4) /* dst pitch */;
+	bb[i++] = 0; /* dst x1,y1 */
+	bb[i++] = (OBJECT_HEIGHT << 16) | OBJECT_WIDTH; /* dst x2,y2 */
+
+	obj[0].handle = dst_handle;
+	obj[0].offset = dst_handle * size;
+	reloc[0].target_handle = dst_handle;
+	reloc[0].presumed_offset = obj[0].offset;
+	reloc[0].offset = sizeof(uint32_t) * i;
+	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+	bb[i++] = obj[0].offset;
+	if (has_64b_reloc)
+		bb[i++] = obj[0].offset >> 32;
+
+	bb[i++] = 0; /* src x1,y1 */
+	bb[i++] = OBJECT_WIDTH * 4; /* src pitch */
+
+	obj[1].handle = src_handle;
+	obj[1].offset = src_handle * size;
+	reloc[1].target_handle = src_handle;
+	reloc[1].presumed_offset = obj[1].offset;
+	reloc[1].offset = sizeof(uint32_t) * i;
+	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[1].write_domain = 0;
+	bb[i++] = obj[1].offset;
+	if (has_64b_reloc)
+		bb[i++] = obj[1].offset >> 32;
+
+	obj[2].handle = bb_handle;
+	obj[2].relocs_ptr = to_user_pointer(reloc);
+	obj[2].relocation_count = 2;
+
+	bb[i++] = MI_BATCH_BUFFER_END;
+	gem_munmap(bb, bb_size);
+
+	exec.buffers_ptr = to_user_pointer(obj);
+	exec.buffer_count = 3;
+	exec.flags = gen >= 6 ? I915_EXEC_BLT : 0 | I915_EXEC_NO_RELOC;
+
+	gem_execbuf(i915, &exec);
 }

 int main(int argc, char **argv)
 {
-	int fd;
-	int object_size = OBJECT_WIDTH * OBJECT_HEIGHT * 4;
 	double start_time, end_time;
-	drm_intel_bo *dst_bo;
-	drm_intel_bufmgr *bufmgr;
-	struct intel_batchbuffer *batch;
-	int i;
-
-	fd = drm_open_driver(DRIVER_INTEL);
+	uint32_t dst_handle;
+	int i915, i;

-	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
-	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
-
-	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
-
-	dst_bo = drm_intel_bo_alloc(bufmgr, "dst", object_size, 4096);
+	i915 = drm_open_driver(DRIVER_INTEL);
+	dst_handle = gem_create(i915, OBJECT_WIDTH * OBJECT_HEIGHT * 4);

 	/* Prep loop to get us warmed up. */
-	for (i = 0; i < 60; i++) {
-		do_render(bufmgr, batch, dst_bo, OBJECT_WIDTH, OBJECT_HEIGHT);
-	}
-	drm_intel_bo_wait_rendering(dst_bo);
+	for (i = 0; i < 60; i++)
+		do_render(i915, dst_handle);
+	gem_sync(i915, dst_handle);

 	/* Do the actual timing. */
 	start_time = get_time_in_secs();
-	for (i = 0; i < 200; i++) {
-		do_render(bufmgr, batch, dst_bo, OBJECT_WIDTH, OBJECT_HEIGHT);
-	}
-	drm_intel_bo_wait_rendering(dst_bo);
+	for (i = 0; i < 200; i++)
+		do_render(i915, dst_handle);
+	gem_sync(i915, dst_handle);
+
 	end_time = get_time_in_secs();

 	printf("%d iterations in %.03f secs: %.01f MB/sec\n", i,
@@ -147,10 +173,5 @@ int main(int argc, char **argv)
 	       (double)i * OBJECT_WIDTH * OBJECT_HEIGHT * 4 / 1024.0 / 1024.0 /
 	       (end_time - start_time));

-	intel_batchbuffer_free(batch);
-	drm_intel_bufmgr_destroy(bufmgr);
-
-	close(fd);
-
-	return 0;
+	close(i915);
 }
--- a/benchmarks/intel_upload_blit_large_gtt.c
+++ b/benchmarks/intel_upload_blit_large_gtt.c
@@ -45,6 +45,7 @@
 */

 #include "igt.h"
+#include "i915/gem_create.h"
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
@@ -69,74 +70,98 @@ get_time_in_secs(void)
 }

 static void
-do_render(drm_intel_bufmgr *bufmgr, struct intel_batchbuffer *batch,
-	  drm_intel_bo *dst_bo, int width, int height)
+do_render(int i915, uint32_t dst_handle)
 {
-	uint32_t *data;
-	drm_intel_bo *src_bo;
-	int i;
+	struct drm_i915_gem_execbuffer2 exec = {};
+	struct drm_i915_gem_exec_object2 obj[3] = {};
+	struct drm_i915_gem_relocation_entry reloc[2];
 	static uint32_t seed = 1;
+	uint64_t size = OBJECT_WIDTH * OBJECT_HEIGHT * 4, bb_size = 4096;
+	uint32_t *data, src_handle, bb_handle, *bb;
+	uint32_t gen = intel_gen(intel_get_drm_devid(i915));
+	const bool has_64b_reloc = gen >= 8;
+	int i;

-	src_bo = drm_intel_bo_alloc(bufmgr, "src", width * height * 4, 4096);
-
-	drm_intel_gem_bo_map_gtt(src_bo);
+	bb_handle = gem_create_from_pool(i915, &bb_size, REGION_SMEM);
+	src_handle = gem_create_from_pool(i915, &size, REGION_SMEM);

-	data = src_bo->virtual;
-	for (i = 0; i < width * height; i++) {
+	data = gem_mmap__gtt(i915, src_handle, size, PROT_WRITE);
+	for (i = 0; i < OBJECT_WIDTH * OBJECT_HEIGHT; i++)
 		data[i] = seed++;
-	}
-
-	drm_intel_gem_bo_unmap_gtt(src_bo);
+	gem_munmap(data, size);

 	/* Render the junk to the dst. */
-	BLIT_COPY_BATCH_START(0);
-	OUT_BATCH((3 << 24) | /* 32 bits */
+	bb = gem_mmap__device_coherent(i915, bb_handle, 0, bb_size, PROT_WRITE);
+	i = 0;
+	bb[i++] = XY_SRC_COPY_BLT_CMD |
+		  XY_SRC_COPY_BLT_WRITE_ALPHA |
+		  XY_SRC_COPY_BLT_WRITE_RGB |
+		  (6 + 2*(gen >= 8));
+	bb[i++] = (3 << 24) | /* 32 bits */
 		  (0xcc << 16) | /* copy ROP */
-		  (width * 4) /* dst pitch */);
-	OUT_BATCH(0); /* dst x1,y1 */
-	OUT_BATCH((height << 16) | width); /* dst x2,y2 */
-	OUT_RELOC(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
-	OUT_BATCH(0); /* src x1,y1 */
-	OUT_BATCH(width * 4); /* src pitch */
-	OUT_RELOC(src_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
-	ADVANCE_BATCH();
-
-	intel_batchbuffer_flush(batch);
-
-	drm_intel_bo_unreference(src_bo);
+		  (OBJECT_WIDTH * 4) /* dst pitch */;
+	bb[i++] = 0; /* dst x1,y1 */
+	bb[i++] = (OBJECT_HEIGHT << 16) | OBJECT_WIDTH; /* dst x2,y2 */
+
+	obj[0].handle = dst_handle;
+	obj[0].offset = dst_handle * size;
+	reloc[0].target_handle = dst_handle;
+	reloc[0].presumed_offset = obj[0].offset;
+	reloc[0].offset = sizeof(uint32_t) * i;
+	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+	bb[i++] = obj[0].offset;
+	if (has_64b_reloc)
+		bb[i++] = obj[0].offset >> 32;
+
+	bb[i++] = 0; /* src x1,y1 */
+	bb[i++] = OBJECT_WIDTH * 4; /* src pitch */
+
+	obj[1].handle = src_handle;
+	obj[1].offset = src_handle * size;
+	reloc[1].target_handle = src_handle;
+	reloc[1].presumed_offset = obj[1].offset;
+	reloc[1].offset = sizeof(uint32_t) * i;
+	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[1].write_domain = 0;
+	bb[i++] = obj[1].offset;
+	if (has_64b_reloc)
+		bb[i++] = obj[1].offset >> 32;
+
+	obj[2].handle = bb_handle;
+	obj[2].relocs_ptr = to_user_pointer(reloc);
+	obj[2].relocation_count = 2;
+
+	bb[i++] = MI_BATCH_BUFFER_END;
+	gem_munmap(bb, bb_size);
+
+	exec.buffers_ptr = to_user_pointer(obj);
+	exec.buffer_count = 3;
+	exec.flags = gen >= 6 ? I915_EXEC_BLT : 0 | I915_EXEC_NO_RELOC;
+
+	gem_execbuf(i915, &exec);
 }

 int main(int argc, char **argv)
 {
-	int fd;
-	int object_size = OBJECT_WIDTH * OBJECT_HEIGHT * 4;
 	double start_time, end_time;
-	drm_intel_bo *dst_bo;
-	drm_intel_bufmgr *bufmgr;
-	struct intel_batchbuffer *batch;
-	int i;
-
-	fd = drm_open_driver(DRIVER_INTEL);
+	uint32_t dst_handle;
+	int i915, i;

-	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
-	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
-
-	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
-
-	dst_bo = drm_intel_bo_alloc(bufmgr, "dst", object_size, 4096);
+	i915 = drm_open_driver(DRIVER_INTEL);
+	dst_handle = gem_create(i915, OBJECT_WIDTH * OBJECT_HEIGHT * 4);

 	/* Prep loop to get us warmed up. */
-	for (i = 0; i < 60; i++) {
-		do_render(bufmgr, batch, dst_bo, OBJECT_WIDTH, OBJECT_HEIGHT);
-	}
-	drm_intel_bo_wait_rendering(dst_bo);
+	for (i = 0; i < 60; i++)
+		do_render(i915, dst_handle);
+	gem_sync(i915, dst_handle);

 	/* Do the actual timing. */
 	start_time = get_time_in_secs();
-	for (i = 0; i < 200; i++) {
-		do_render(bufmgr, batch, dst_bo, OBJECT_WIDTH, OBJECT_HEIGHT);
-	}
-	drm_intel_bo_wait_rendering(dst_bo);
+	for (i = 0; i < 200; i++)
+		do_render(i915, dst_handle);
+	gem_sync(i915, dst_handle);
+
 	end_time = get_time_in_secs();

 	printf("%d iterations in %.03f secs: %.01f MB/sec\n", i,
@@ -144,10 +169,5 @@ int main(int argc, char **argv)
 	       (double)i * OBJECT_WIDTH * OBJECT_HEIGHT * 4 / 1024.0 / 1024.0 /
 	       (end_time - start_time));

-	intel_batchbuffer_free(batch);
-	drm_intel_bufmgr_destroy(bufmgr);
-
-	close(fd);
-
-	return 0;
+	close(i915);
 }
--- a/benchmarks/intel_upload_blit_large_map.c
+++ b/benchmarks/intel_upload_blit_large_map.c
@@ -48,6 +48,7 @@
 */

 #include "igt.h"
+#include "i915/gem_create.h"
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
@@ -72,74 +73,99 @@ get_time_in_secs(void)
 }

 static void
-do_render(drm_intel_bufmgr *bufmgr, struct intel_batchbuffer *batch,
-	  drm_intel_bo *dst_bo, int width, int height)
+do_render(int i915, uint32_t dst_handle)
 {
-	uint32_t *data;
-	drm_intel_bo *src_bo;
-	int i;
+	struct drm_i915_gem_execbuffer2 exec = {};
+	struct drm_i915_gem_exec_object2 obj[3] = {};
+	struct drm_i915_gem_relocation_entry reloc[2];
 	static uint32_t seed = 1;
+	uint64_t size = OBJECT_WIDTH * OBJECT_HEIGHT * 4, bb_size = 4096;
+	uint32_t *data, src_handle, bb_handle, *bb;
+	uint32_t gen = intel_gen(intel_get_drm_devid(i915));
+	const bool has_64b_reloc = gen >= 8;
+	int i;

-	src_bo = drm_intel_bo_alloc(bufmgr, "src", width * height * 4, 4096);
-
-	drm_intel_bo_map(src_bo, 1);
+	bb_handle = gem_create_from_pool(i915, &bb_size, REGION_SMEM);
+	src_handle = gem_create_from_pool(i915, &size, REGION_SMEM);

-	data = src_bo->virtual;
-	for (i = 0; i < width * height; i++) {
+	data = gem_mmap__cpu(i915, src_handle, 0, size, PROT_WRITE);
+	for (i = 0; i < OBJECT_WIDTH * OBJECT_HEIGHT; i++)
 		data[i] = seed++;
-	}
-
-	drm_intel_bo_unmap(src_bo);
+	gem_set_domain(i915, src_handle, I915_GEM_DOMAIN_CPU, 0);
+	gem_munmap(data, size);

 	/* Render the junk to the dst. */
-	BLIT_COPY_BATCH_START(0);
-	OUT_BATCH((3 << 24) | /* 32 bits */
+	bb = gem_mmap__device_coherent(i915, bb_handle, 0, bb_size, PROT_WRITE);
+	i = 0;
+	bb[i++] = XY_SRC_COPY_BLT_CMD |
+		  XY_SRC_COPY_BLT_WRITE_ALPHA |
+		  XY_SRC_COPY_BLT_WRITE_RGB |
+		  (6 + 2*(gen >= 8));
+	bb[i++] = (3 << 24) | /* 32 bits */
 		  (0xcc << 16) | /* copy ROP */
-		  (width * 4) /* dst pitch */);
-	OUT_BATCH(0); /* dst x1,y1 */
-	OUT_BATCH((height << 16) | width); /* dst x2,y2 */
-	OUT_RELOC(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
-	OUT_BATCH(0); /* src x1,y1 */
-	OUT_BATCH(width * 4); /* src pitch */
-	OUT_RELOC(src_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
-	ADVANCE_BATCH();
-
-	intel_batchbuffer_flush(batch);
-
-	drm_intel_bo_unreference(src_bo);
+		  (OBJECT_WIDTH * 4) /* dst pitch */;
+	bb[i++] = 0; /* dst x1,y1 */
+	bb[i++] = (OBJECT_HEIGHT << 16) | OBJECT_WIDTH; /* dst x2,y2 */
+
+	obj[0].handle = dst_handle;
+	obj[0].offset = dst_handle * size;
+	reloc[0].target_handle = dst_handle;
+	reloc[0].presumed_offset = obj[0].offset;
+	reloc[0].offset = sizeof(uint32_t) * i;
+	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+	bb[i++] = obj[0].offset;
+	if (has_64b_reloc)
+		bb[i++] = obj[0].offset >> 32;
+
+	bb[i++] = 0; /* src x1,y1 */
+	bb[i++] = OBJECT_WIDTH * 4; /* src pitch */
+
+	obj[1].handle = src_handle;
+	obj[1].offset = src_handle * size;
+	reloc[1].target_handle = src_handle;
+	reloc[1].presumed_offset = obj[1].offset;
+	reloc[1].offset = sizeof(uint32_t) * i;
+	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[1].write_domain = 0;
+	bb[i++] = obj[1].offset;
+	if (has_64b_reloc)
+		bb[i++] = obj[1].offset >> 32;
+
+	obj[2].handle = bb_handle;
+	obj[2].relocs_ptr = to_user_pointer(reloc);
+	obj[2].relocation_count = 2;
+
+	bb[i++] = MI_BATCH_BUFFER_END;
+	gem_munmap(bb, bb_size);
+
+	exec.buffers_ptr = to_user_pointer(obj);
+	exec.buffer_count = 3;
+	exec.flags = gen >= 6 ? I915_EXEC_BLT : 0 | I915_EXEC_NO_RELOC;
+
+	gem_execbuf(i915, &exec);
 }

 int main(int argc, char **argv)
 {
-	int fd;
-	int object_size = OBJECT_WIDTH * OBJECT_HEIGHT * 4;
 	double start_time, end_time;
-	drm_intel_bo *dst_bo;
-	drm_intel_bufmgr *bufmgr;
-	struct intel_batchbuffer *batch;
-	int i;
-
-	fd = drm_open_driver(DRIVER_INTEL);
+	uint32_t dst_handle;
+	int i915, i;

-	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
-	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
-
-	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
-
-	dst_bo = drm_intel_bo_alloc(bufmgr, "dst", object_size, 4096);
+	i915 = drm_open_driver(DRIVER_INTEL);
+	dst_handle = gem_create(i915, OBJECT_WIDTH * OBJECT_HEIGHT * 4);

 	/* Prep loop to get us warmed up. */
-	for (i = 0; i < 60; i++) {
-		do_render(bufmgr, batch, dst_bo, OBJECT_WIDTH, OBJECT_HEIGHT);
-	}
-	drm_intel_bo_wait_rendering(dst_bo);
+	for (i = 0; i < 60; i++)
+		do_render(i915, dst_handle);
+	gem_sync(i915, dst_handle);

 	/* Do the actual timing. */
 	start_time = get_time_in_secs();
-	for (i = 0; i < 200; i++) {
-		do_render(bufmgr, batch, dst_bo, OBJECT_WIDTH, OBJECT_HEIGHT);
-	}
-	drm_intel_bo_wait_rendering(dst_bo);
+	for (i = 0; i < 200; i++)
+		do_render(i915, dst_handle);
+	gem_sync(i915, dst_handle);
+
 	end_time = get_time_in_secs();

 	printf("%d iterations in %.03f secs: %.01f MB/sec\n", i,
@@ -147,10 +173,6 @@ int main(int argc, char **argv)
 	       (double)i * OBJECT_WIDTH * OBJECT_HEIGHT * 4 / 1024.0 / 1024.0 /
 	       (end_time - start_time));

-	intel_batchbuffer_free(batch);
-	drm_intel_bufmgr_destroy(bufmgr);
-
-	close(fd);
-
-	return 0;
+	close(i915);
 }
+
--- a/benchmarks/intel_upload_blit_small.c
+++ b/benchmarks/intel_upload_blit_small.c
@@ -41,6 +41,7 @@
 */

 #include "igt.h"
+#include "i915/gem_create.h"
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
@@ -66,21 +67,27 @@ get_time_in_secs(void)
 }

 static void
-do_render(drm_intel_bufmgr *bufmgr, struct intel_batchbuffer *batch,
-	  drm_intel_bo *dst_bo, int width, int height)
+do_render(int i915, uint32_t dst_handle)
 {
-	uint32_t data[64];
-	drm_intel_bo *src_bo;
-	int i;
+	struct drm_i915_gem_execbuffer2 exec = {};
+	struct drm_i915_gem_exec_object2 obj[3] = {};
+	struct drm_i915_gem_relocation_entry reloc[2];
 	static uint32_t seed = 1;
+	uint32_t data[OBJECT_WIDTH * OBJECT_HEIGHT];
+	uint64_t size = OBJECT_WIDTH * OBJECT_HEIGHT * 4, bb_size = 4096;
+	uint32_t src_handle, bb_handle, *bb;
+	uint32_t gen = intel_gen(intel_get_drm_devid(i915));
+	const bool has_64b_reloc = gen >= 8;
+	int i;

-	src_bo = drm_intel_bo_alloc(bufmgr, "src", width * height * 4, 4096);
+	bb_handle = gem_create_from_pool(i915, &bb_size, REGION_SMEM);
+	src_handle = gem_create_from_pool(i915, &size, REGION_SMEM);

 	/* Upload some junk.  Real workloads would be doing a lot more
 	 * work to generate the junk.
 	 */
-	for (i = 0; i < width * height;) {
-		int size, j;
+	for (i = 0; i < OBJECT_WIDTH * OBJECT_HEIGHT; i++) {
+		int subsize, j;

 		/* Choose a size from 1 to 64 dwords to upload.
 		 * Normal workloads have a distribution of sizes with a
@@ -88,68 +95,92 @@ do_render(drm_intel_bufmgr *bufmgr, struct intel_batchbuffer *batch,
 		 * pile of vertices, most likely), but I'm trying to get at
 		 * the cost of the small uploads here.
 		 */
-		size = random() % 64 + 1;
-		if (i + size > width * height)
-			size = width * height - i;
+		subsize = random() % 64 + 1;
+		if (i + subsize > OBJECT_WIDTH * OBJECT_HEIGHT)
+			subsize = OBJECT_WIDTH * OBJECT_HEIGHT - i;

-		for (j = 0; j < size; j++)
+		for (j = 0; j < subsize; j++)
 			data[j] = seed++;

 		/* Upload the junk. */
-		drm_intel_bo_subdata(src_bo, i * 4, size * 4, data);
+		//drm_intel_bo_subdata(src_bo, i * 4, size * 4, data);
+		gem_write(i915, src_handle, i * 4, data, subsize * 4);

-		i += size;
+		i += subsize;
 	}

 	/* Render the junk to the dst. */
-	BLIT_COPY_BATCH_START(0);
-	OUT_BATCH((3 << 24) | /* 32 bits */
+	bb = gem_mmap__device_coherent(i915, bb_handle, 0, bb_size, PROT_WRITE);
+	i = 0;
+	bb[i++] = XY_SRC_COPY_BLT_CMD |
+		  XY_SRC_COPY_BLT_WRITE_ALPHA |
+		  XY_SRC_COPY_BLT_WRITE_RGB |
+		  (6 + 2*(gen >= 8));
+	bb[i++] = (3 << 24) | /* 32 bits */
 		  (0xcc << 16) | /* copy ROP */
-		  (width * 4) /* dst pitch */);
-	OUT_BATCH(0); /* dst x1,y1 */
-	OUT_BATCH((height << 16) | width); /* dst x2,y2 */
-	OUT_RELOC(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
-	OUT_BATCH(0); /* src x1,y1 */
-	OUT_BATCH(width * 4); /* src pitch */
-	OUT_RELOC(src_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
-	ADVANCE_BATCH();
-
-	intel_batchbuffer_flush(batch);
-
-	drm_intel_bo_unreference(src_bo);
+		  (OBJECT_WIDTH * 4) /* dst pitch */;
+	bb[i++] = 0; /* dst x1,y1 */
+	bb[i++] = (OBJECT_HEIGHT << 16) | OBJECT_WIDTH; /* dst x2,y2 */
+
+	obj[0].handle = dst_handle;
+	obj[0].offset = dst_handle * size;
+	reloc[0].target_handle = dst_handle;
+	reloc[0].presumed_offset = obj[0].offset;
+	reloc[0].offset = sizeof(uint32_t) * i;
+	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+	bb[i++] = obj[0].offset;
+	if (has_64b_reloc)
+		bb[i++] = obj[0].offset >> 32;
+
+	bb[i++] = 0; /* src x1,y1 */
+	bb[i++] = OBJECT_WIDTH * 4; /* src pitch */
+
+	obj[1].handle = src_handle;
+	obj[1].offset = src_handle * size;
+	reloc[1].target_handle = src_handle;
+	reloc[1].presumed_offset = obj[1].offset;
+	reloc[1].offset = sizeof(uint32_t) * i;
+	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[1].write_domain = 0;
+	bb[i++] = obj[1].offset;
+	if (has_64b_reloc)
+		bb[i++] = obj[1].offset >> 32;
+
+	obj[2].handle = bb_handle;
+	obj[2].relocs_ptr = to_user_pointer(reloc);
+	obj[2].relocation_count = 2;
+
+	bb[i++] = MI_BATCH_BUFFER_END;
+	gem_munmap(bb, bb_size);
+
+	exec.buffers_ptr = to_user_pointer(obj);
+	exec.buffer_count = 3;
+	exec.flags = gen >= 6 ? I915_EXEC_BLT : 0 | I915_EXEC_NO_RELOC;
+
+	gem_execbuf(i915, &exec);
 }

 int main(int argc, char **argv)
 {
-	int fd;
-	int object_size = OBJECT_WIDTH * OBJECT_HEIGHT * 4;
 	double start_time, end_time;
-	drm_intel_bo *dst_bo;
-	drm_intel_bufmgr *bufmgr;
-	struct intel_batchbuffer *batch;
-	int i;
-
-	fd = drm_open_driver(DRIVER_INTEL);
-
-	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
-	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
-
-	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+	uint32_t dst_handle;
+	int i915, i;

-	dst_bo = drm_intel_bo_alloc(bufmgr, "dst", object_size, 4096);
+	i915 = drm_open_driver(DRIVER_INTEL);
+	dst_handle = gem_create(i915, OBJECT_WIDTH * OBJECT_HEIGHT * 4);

 	/* Prep loop to get us warmed up. */
-	for (i = 0; i < 20; i++) {
-		do_render(bufmgr, batch, dst_bo, OBJECT_WIDTH, OBJECT_HEIGHT);
-	}
-	drm_intel_bo_wait_rendering(dst_bo);
+	for (i = 0; i < 60; i++)
+		do_render(i915, dst_handle);
+	gem_sync(i915, dst_handle);

 	/* Do the actual timing. */
 	start_time = get_time_in_secs();
-	for (i = 0; i < 1000; i++) {
-		do_render(bufmgr, batch, dst_bo, OBJECT_WIDTH, OBJECT_HEIGHT);
-	}
-	drm_intel_bo_wait_rendering(dst_bo);
+	for (i = 0; i < 1000; i++)
+		do_render(i915, dst_handle);
+	gem_sync(i915, dst_handle);
+
 	end_time = get_time_in_secs();

 	printf("%d iterations in %.03f secs: %.01f MB/sec\n", i,
@@ -157,10 +188,6 @@ int main(int argc, char **argv)
 	       (double)i * OBJECT_WIDTH * OBJECT_HEIGHT * 4 / 1024.0 / 1024.0 /
 	       (end_time - start_time));

-	intel_batchbuffer_free(batch);
-	drm_intel_bufmgr_destroy(bufmgr);
-
-	close(fd);
-
-	return 0;
+	close(i915);
 }
+
--- a/benchmarks/kms_fb_stress.c
+++ b/benchmarks/kms_fb_stress.c
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Arthur Grillo
+ */
+
+#include "igt.h"
+
+#define FRAME_COUNT 100
+#define NUM_FBS 2
+
+struct rect_t {
+	int x, y;
+	int width, height;
+};
+
+struct plane_t {
+	igt_plane_t *base;
+	struct rect_t rect;
+	uint32_t format;
+	struct igt_fb fbs[NUM_FBS];
+};
+
+struct kms_t {
+	struct rect_t crtc;
+	struct plane_t primary;
+	struct plane_t overlay_a;
+	struct plane_t overlay_b;
+	struct plane_t writeback;
+};
+
+struct data_t {
+	int fd;
+	igt_display_t display;
+	igt_output_t *wb_output;
+	drmModeModeInfo *mode;
+	struct kms_t kms;
+};
+
+static void plane_setup(struct plane_t *plane, int index)
+{
+	igt_plane_set_size(plane->base, plane->rect.width, plane->rect.height);
+	igt_plane_set_position(plane->base, plane->rect.x, plane->rect.y);
+	igt_plane_set_fb(plane->base, &plane->fbs[index]);
+}
+
+static void gen_fbs(struct data_t *data)
+{
+	struct kms_t *kms = &data->kms;
+	drmModeModeInfo *mode = igt_output_get_mode(data->wb_output);
+
+	for (int i = 0; i < NUM_FBS; i++) {
+		igt_create_color_fb(data->fd, kms->primary.rect.width, kms->primary.rect.height,
+				    kms->primary.format, DRM_FORMAT_MOD_LINEAR,
+				    !i, i, i,
+				    &kms->primary.fbs[i]);
+
+		igt_create_color_fb(data->fd, kms->overlay_a.rect.width, kms->overlay_a.rect.height,
+				    kms->overlay_a.format, DRM_FORMAT_MOD_LINEAR,
+				    i, !i, i,
+				    &kms->overlay_a.fbs[i]);
+
+		igt_create_color_fb(data->fd, kms->overlay_b.rect.width, kms->overlay_b.rect.height,
+				    kms->overlay_b.format, DRM_FORMAT_MOD_LINEAR,
+				    i, i, !i,
+				    &kms->overlay_b.fbs[i]);
+
+		kms->writeback.rect.width = mode->hdisplay;
+		kms->writeback.rect.height = mode->vdisplay;
+		igt_create_fb(data->fd, kms->writeback.rect.width, kms->writeback.rect.height,
+			      kms->writeback.format, DRM_FORMAT_MOD_LINEAR,
+			      &kms->writeback.fbs[i]);
+	}
+}
+
+static igt_output_t *find_wb_output(struct data_t *data)
+{
+	for (int i = 0; i < data->display.n_outputs; i++) {
+		igt_output_t *output = &data->display.outputs[i];
+
+		if (output->config.connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
+		return output;
+
+	}
+
+	return NULL;
+}
+
+static void set_crtc_size(struct data_t *data)
+{
+	drmModeModeInfo *mode;
+	struct rect_t *crtc = &data->kms.crtc;
+
+	for_each_connector_mode(data->wb_output) {
+		mode = &data->wb_output->config.connector->modes[j__];
+		if (mode->hdisplay == crtc->width && mode->vdisplay == crtc->height) {
+			igt_output_override_mode(data->wb_output, mode);
+			return;
+		}
+	}
+
+
+	igt_assert_f(0, "CRTC size %dx%d not supported\n", crtc->width, crtc->height);
+}
+
+static struct kms_t default_kms = {
+	.crtc = {
+		.width = 4096, .height = 2160,
+	},
+	.primary = {
+		.rect = {
+			.x = 101, .y = 0,
+			.width = 3639, .height = 2160,
+		},
+		.format = DRM_FORMAT_XRGB8888,
+	},
+	.overlay_a = {
+		.rect = {
+			.x = 201, .y = 199,
+			.width = 3033, .height = 1777,
+		},
+		.format = DRM_FORMAT_XRGB16161616,
+	},
+	.overlay_b = {
+		.rect = {
+			.x = 1800, .y = 250,
+			.width = 1507, .height = 1400,
+		},
+		.format = DRM_FORMAT_ARGB8888,
+	},
+	.writeback = {
+		.rect = {
+			.x = 0, .y = 0,
+			// Size is to be determined at runtime
+		},
+		.format = DRM_FORMAT_XRGB8888,
+	},
+};
+
+
+igt_simple_main
+{
+	struct data_t data = {0};
+	enum pipe pipe = PIPE_NONE;
+	struct timespec then, now;
+	double elapsed;
+
+	data.kms = default_kms;
+
+	data.fd = drm_open_driver_master(DRIVER_ANY);
+
+	kmstest_set_vt_graphics_mode();
+
+	igt_display_require(&data.display, data.fd);
+	igt_require(data.display.is_atomic);
+
+	igt_display_require_output(&data.display);
+
+	igt_display_reset(&data.display);
+
+	data.wb_output = find_wb_output(&data);
+	igt_require(data.wb_output);
+
+	for_each_pipe(&data.display, pipe) {
+		igt_debug("Selecting pipe %s to %s\n",
+			  kmstest_pipe_name(pipe),
+			  igt_output_name(data.wb_output));
+		igt_output_set_pipe(data.wb_output, pipe);
+		break;
+	}
+
+	set_crtc_size(&data);
+
+	gen_fbs(&data);
+
+	data.kms.primary.base = igt_output_get_plane_type(data.wb_output, DRM_PLANE_TYPE_PRIMARY);
+	data.kms.overlay_a.base = igt_output_get_plane_type_index(data.wb_output,
+								  DRM_PLANE_TYPE_OVERLAY, 0);
+	data.kms.overlay_b.base = igt_output_get_plane_type_index(data.wb_output,
+								  DRM_PLANE_TYPE_OVERLAY, 1);
+
+	igt_assert_eq(igt_gettime(&then), 0);
+
+	for (int i = 0; i < FRAME_COUNT; i++) {
+		int fb_index = i % NUM_FBS;
+
+		plane_setup(&data.kms.primary, fb_index);
+
+		plane_setup(&data.kms.overlay_a, fb_index);
+
+		plane_setup(&data.kms.overlay_b, fb_index);
+
+		igt_output_set_writeback_fb(data.wb_output, &data.kms.writeback.fbs[fb_index]);
+
+		igt_display_commit2(&data.display, COMMIT_ATOMIC);
+	}
+
+	igt_assert_eq(igt_gettime(&now), 0);
+	elapsed = igt_time_elapsed(&then, &now);
+
+	igt_info("Time spent in the loop with %d frames: %lfs.\n", FRAME_COUNT, elapsed);
+
+	igt_display_fini(&data.display);
+	drm_close_driver(data.fd);
+}
--- a/benchmarks/kms_vblank.c
+++ b/benchmarks/kms_vblank.c
@@ -161,7 +161,7 @@ int main(int argc, char **argv)
 		}
 	}

-	fd = drm_open_driver(DRIVER_INTEL);
+	fd = drm_open_driver(DRIVER_INTEL | DRIVER_XE);
 	if (!crtc0_active(fd)) {
 		fprintf(stderr, "CRTC/pipe 0 not active\n");
 		return 77;

--- a/benchmarks/meson.build
+++ b/benchmarks/meson.build
@@ -13,20 +13,16 @@ benchmark_progs = [
 	'gem_syslatency',
 	'gem_userptr_benchmark',
 	'gem_wsim',
+	'intel_upload_blit_large',
+	'intel_upload_blit_large_gtt',
+	'intel_upload_blit_large_map',
+	'intel_upload_blit_small',
+	'kms_fb_stress',
 	'kms_vblank',
 	'prime_lookup',
 	'vgem_mmap',
 ]

-if libdrm_intel.found()
-	benchmark_progs += [
-		'intel_upload_blit_large',
-		'intel_upload_blit_large_gtt',
-		'intel_upload_blit_large_map',
-		'intel_upload_blit_small',
-	]
-endif
-
 benchmarksdir = join_paths(libexecdir, 'benchmarks')

 foreach prog : benchmark_progs

--- a/benchmarks/wsim/README
+++ b/benchmarks/wsim/README
 Workload descriptor format
 ==========================

+Lines starting with '#' are treated as comments and will not create a work step.
+
 ctx.engine.duration_us.dependency.wait,...
 <uint>.<str>.<uint>[-<uint>]|*.<int <= 0>[/<int <= 0>][...].<0|1>,...
 B.<uint>
@@ -86,6 +88,19 @@ Batch durations can also be specified as infinite by using the '*' in the
 duration field. Such batches must be ended by the terminate command ('T')
 otherwise they will cause a GPU hang to be reported.

+Xe and i915 differences
+------------------------
+
+There are differences between Xe and i915, like not allowing a BO list to
+be passed to an exec (and create implicit syncs). For more details see:
+https://gitlab.freedesktop.org/drm/xe/kernel/-/blob/drm-xe-next/drivers/gpu/drm/xe/xe_exec.c
+
+Currently following batch steps are equal on Xe:
+1.1000.-2.0 <==> 1.1000.f-2.0
+and will create explicit sync fence dependency (via syncobjects).
+
+The data dependency need to wait for working sets implementation.
+
 Sync (fd) fences
 ----------------

@@ -129,7 +144,7 @@ runnable. When the second RCS batch completes the standalone fence is signaled
 which allows the two VCS batches to be executed. Finally we wait until the both
 VCS batches have completed before starting the (optional) next iteration.

-Submit fences
+Submit fences (i915 only)
 -------------

 Submit fences are a type of input fence which are signalled when the originating
@@ -146,7 +161,7 @@ Submit fences have the identical syntax as the sync fences with the lower-case
 Here VCS1 and VCS2 batches will only be submitted for executing once the RCS
 batch enters the GPU.

-Context priority
+Context priority (i915 only)
 ----------------

  P.1.-1
@@ -211,7 +226,7 @@ Example:

 This enables load balancing for context number one.

-Engine bonds
+Engine bonds (i915 only)
 ------------

 Engine bonds are extensions on load balanced contexts. They allow expressing
@@ -259,7 +274,7 @@ then look like:
  2.DEFAULT.1000.s-1.0
  a.-3

-Context SSEU configuration
+Context SSEU configuration (i915 only)
 --------------------------

  S.1.1
@@ -279,7 +294,7 @@ Slice mask of -1 has a special meaning of "all slices". Otherwise any integer
 can be specifying as the slice mask, but beware any apart from 1 and -1 can make
 the workload not portable between different GPUs.

-Working sets
+Working sets (i915 only)
 ------------

 When used plainly workload steps can create implicit data dependencies by
No results found